Database

Dict based database

Bases: BaseDatabase

Source code in simstring\database\dict.py
class DictDatabase(BaseDatabase):
    def __init__(self, feature_extractor):
        self.feature_extractor = feature_extractor
        self.strings: List[str] = []
        self.feature_set_size_to_string_map: Dict[int, Set[str]] = defaultdict(set) # 3.10 and up only
        self.feature_set_size_and_feature_to_string_map: dict = defaultdict(defaultdict_set)

    def add(self, string: str):
        features = self.feature_extractor.features(string)
        size = len(features)

        self.strings.append(string)
        self.feature_set_size_to_string_map[size].add(string)

        for feature in features:
            self.feature_set_size_and_feature_to_string_map[size][feature].add(string)

    def all(self) -> List[str]:
        return self.strings

    def lookup_strings_by_feature_set_size_and_feature(self, size: int, feature: str) -> Set[str]:
        return self.feature_set_size_and_feature_to_string_map[size][feature]

    def min_feature_size(self) -> int:
        return min(self.feature_set_size_to_string_map.keys())

    def max_feature_size(self) -> int:
        return max(self.feature_set_size_to_string_map.keys())

    # def __getstate__(self):
    #     """To pickle the object"""
    #     return self.__dict__

    # def __setstate__(self, d):
    #     """To unpickle the object"""
    #     self.__dict__ = d

    def save(self, filename:str):
        """Save the database to a file as defined by filename.

        Args:
            filename: Filename to save the db at. Should include file extention.

        Returns:
            None
        """
        with open(filename, "wb") as f:
            pickle.dump(self, f)

    @staticmethod
    def load(filename:str) -> "DictDatabase":
        """Load db from a file

        Args:
            filename (str): Name of the file to load

        Returns:
            DictDatabase: the db
        """
        with open(filename, "rb") as f:
            db = pickle.load(f)
        return db

    def dumps(self) -> bytes:
        """Generate pickle byte stream

        Returns:
            _type_: _description_
        """
        return pickle.dumps(self)




    @staticmethod
    def loads(binary_data: bytes) -> "DictDatabase":
        """Load a binary string representing a database

        Initially only unpickles the data

        Args:
            binary_data (str): String of data to unpickle

        Returns:
            Model object
        """
        return pickle.loads(binary_data)

dumps()

Generate pickle byte stream

Returns:

Name Type Description
_type_ bytes

description

Source code in simstring\database\dict.py
def dumps(self) -> bytes:
    """Generate pickle byte stream

    Returns:
        _type_: _description_
    """
    return pickle.dumps(self)

load(filename) staticmethod

Load db from a file

Parameters:

Name Type Description Default
filename str

Name of the file to load

required

Returns:

Name Type Description
DictDatabase DictDatabase

the db

Source code in simstring\database\dict.py
@staticmethod
def load(filename:str) -> "DictDatabase":
    """Load db from a file

    Args:
        filename (str): Name of the file to load

    Returns:
        DictDatabase: the db
    """
    with open(filename, "rb") as f:
        db = pickle.load(f)
    return db

loads(binary_data) staticmethod

Load a binary string representing a database

Initially only unpickles the data

Parameters:

Name Type Description Default
binary_data str

String of data to unpickle

required

Returns:

Type Description
DictDatabase

Model object

Source code in simstring\database\dict.py
@staticmethod
def loads(binary_data: bytes) -> "DictDatabase":
    """Load a binary string representing a database

    Initially only unpickles the data

    Args:
        binary_data (str): String of data to unpickle

    Returns:
        Model object
    """
    return pickle.loads(binary_data)

save(filename)

Save the database to a file as defined by filename.

Parameters:

Name Type Description Default
filename str

Filename to save the db at. Should include file extention.

required

Returns:

Type Description

None

Source code in simstring\database\dict.py
def save(self, filename:str):
    """Save the database to a file as defined by filename.

    Args:
        filename: Filename to save the db at. Should include file extention.

    Returns:
        None
    """
    with open(filename, "wb") as f:
        pickle.dump(self, f)

PyMongo based database

Bases: BaseDatabase

Source code in simstring\database\mongo.py
class MongoDatabase(BaseDatabase):
    def __init__(self, feature_extractor, host=(os.environ["MONGO_HOST"] if "MONGO_HOST" in os.environ else 'localhost'), port=27017, database='simstring'):
        self.feature_extractor = feature_extractor

        client = MongoClient(host, port)
        db = client[database]
        self.collection = db.strings
        self.ensure_index()

    def add(self, string):
        features = self.feature_extractor.features(string)
        self.collection.insert_one({"string": string, "features": features, "size": len(features)})

    def all(self):
        return list(map(lambda x: x['string'], self.all_documents()))

    def all_documents(self):
        return list(self.collection.find())

    def ensure_index(self):
        self.collection.create_index('size')
        self.collection.create_index('features')

    def lookup_strings_by_feature_set_size_and_feature(self, size, feature):
        documents = list(self.collection.find({"size": size, "features": feature}))
        return set(list(map(lambda x: x['string'], documents)))

    def reset_collection(self):
        self.collection.remove()
        self.ensure_index()