Database
Dict based database
Bases: BaseDatabase
Source code in simstring\database\dict.py
class DictDatabase(BaseDatabase):
def __init__(self, feature_extractor):
self.feature_extractor = feature_extractor
self.strings: List[str] = []
self.feature_set_size_to_string_map: Dict[int, Set[str]] = defaultdict(set) # 3.10 and up only
self.feature_set_size_and_feature_to_string_map: dict = defaultdict(defaultdict_set)
def add(self, string: str):
features = self.feature_extractor.features(string)
size = len(features)
self.strings.append(string)
self.feature_set_size_to_string_map[size].add(string)
for feature in features:
self.feature_set_size_and_feature_to_string_map[size][feature].add(string)
def all(self) -> List[str]:
return self.strings
def lookup_strings_by_feature_set_size_and_feature(self, size: int, feature: str) -> Set[str]:
return self.feature_set_size_and_feature_to_string_map[size][feature]
def min_feature_size(self) -> int:
return min(self.feature_set_size_to_string_map.keys())
def max_feature_size(self) -> int:
return max(self.feature_set_size_to_string_map.keys())
# def __getstate__(self):
# """To pickle the object"""
# return self.__dict__
# def __setstate__(self, d):
# """To unpickle the object"""
# self.__dict__ = d
def save(self, filename:str):
"""Save the database to a file as defined by filename.
Args:
filename: Filename to save the db at. Should include file extention.
Returns:
None
"""
with open(filename, "wb") as f:
pickle.dump(self, f)
@staticmethod
def load(filename:str) -> "DictDatabase":
"""Load db from a file
Args:
filename (str): Name of the file to load
Returns:
DictDatabase: the db
"""
with open(filename, "rb") as f:
db = pickle.load(f)
return db
def dumps(self) -> bytes:
"""Generate pickle byte stream
Returns:
_type_: _description_
"""
return pickle.dumps(self)
@staticmethod
def loads(binary_data: bytes) -> "DictDatabase":
"""Load a binary string representing a database
Initially only unpickles the data
Args:
binary_data (str): String of data to unpickle
Returns:
Model object
"""
return pickle.loads(binary_data)
dumps()
Generate pickle byte stream
Returns:
Name | Type | Description |
---|---|---|
_type_ |
bytes
|
description |
Source code in simstring\database\dict.py
def dumps(self) -> bytes:
"""Generate pickle byte stream
Returns:
_type_: _description_
"""
return pickle.dumps(self)
load(filename)
staticmethod
Load db from a file
Parameters:
Name | Type | Description | Default |
---|---|---|---|
filename |
str
|
Name of the file to load |
required |
Returns:
Name | Type | Description |
---|---|---|
DictDatabase |
DictDatabase
|
the db |
Source code in simstring\database\dict.py
@staticmethod
def load(filename:str) -> "DictDatabase":
"""Load db from a file
Args:
filename (str): Name of the file to load
Returns:
DictDatabase: the db
"""
with open(filename, "rb") as f:
db = pickle.load(f)
return db
loads(binary_data)
staticmethod
Load a binary string representing a database
Initially only unpickles the data
Parameters:
Name | Type | Description | Default |
---|---|---|---|
binary_data |
str
|
String of data to unpickle |
required |
Returns:
Type | Description |
---|---|
DictDatabase
|
Model object |
Source code in simstring\database\dict.py
@staticmethod
def loads(binary_data: bytes) -> "DictDatabase":
"""Load a binary string representing a database
Initially only unpickles the data
Args:
binary_data (str): String of data to unpickle
Returns:
Model object
"""
return pickle.loads(binary_data)
save(filename)
Save the database to a file as defined by filename.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
filename |
str
|
Filename to save the db at. Should include file extention. |
required |
Returns:
Type | Description |
---|---|
None |
Source code in simstring\database\dict.py
def save(self, filename:str):
"""Save the database to a file as defined by filename.
Args:
filename: Filename to save the db at. Should include file extention.
Returns:
None
"""
with open(filename, "wb") as f:
pickle.dump(self, f)
PyMongo based database
Bases: BaseDatabase
Source code in simstring\database\mongo.py
class MongoDatabase(BaseDatabase):
def __init__(self, feature_extractor, host=(os.environ["MONGO_HOST"] if "MONGO_HOST" in os.environ else 'localhost'), port=27017, database='simstring'):
self.feature_extractor = feature_extractor
client = MongoClient(host, port)
db = client[database]
self.collection = db.strings
self.ensure_index()
def add(self, string):
features = self.feature_extractor.features(string)
self.collection.insert_one({"string": string, "features": features, "size": len(features)})
def all(self):
return list(map(lambda x: x['string'], self.all_documents()))
def all_documents(self):
return list(self.collection.find())
def ensure_index(self):
self.collection.create_index('size')
self.collection.create_index('features')
def lookup_strings_by_feature_set_size_and_feature(self, size, feature):
documents = list(self.collection.find({"size": size, "features": feature}))
return set(list(map(lambda x: x['string'], documents)))
def reset_collection(self):
self.collection.remove()
self.ensure_index()