|
from createVectorDB import createVectorDB
|
|
from splitBioModels import splitBioModels
|
|
from createDocuments import createDocuments
|
|
from generateResponse import generateResponse
|
|
from selectBioModels import search_biomodels
|
|
from selectBioModels import copy_matching_files
|
|
|
|
DATA_PATH = r"C:\Users\navan\Downloads\BioModelsRAG\BioModelsRAG\2data"
|
|
CHROMA_DATA_PATH = r"C:\Users\navan\Downloads\BioModelsRAG\CHROMA_EMBEDDINGS_PATH"
|
|
directory = r'C:\Users\navan\Downloads\BioModelsRAG\BioModelsRAG\data'
|
|
output_file = r'C:\Users\navan\Downloads\BioModelsRAG\biomodels_output.csv'
|
|
final_models_folder = r'C:\Users\navan\Downloads\BioModelsRAG\final_models'
|
|
user_keywords = input("Keyword you would like to search for: ").split()
|
|
|
|
|
|
def main(report:bool = True, directory = DATA_PATH, chroma_data_path = CHROMA_DATA_PATH):
|
|
data = []
|
|
search_biomodels(directory, user_keywords, output_file)
|
|
copy_matching_files(output_file, directory, final_models_folder)
|
|
|
|
splitBioModels(directory=DATA_PATH, final_items=data)
|
|
|
|
collection = createVectorDB(
|
|
collection_name="123456789101112131415",
|
|
chroma_data_path=chroma_data_path,
|
|
embed_model="all-MiniLM-L6-v2",
|
|
metadata={"hnsw:space": "cosine"}
|
|
)
|
|
|
|
if report:
|
|
print("Collection created:", collection)
|
|
|
|
createDocuments(final_items=data, collection=collection)
|
|
|
|
if report:
|
|
print("Documents added to collection.")
|
|
|
|
query = "What protein interacts with DesensitizedAch2?"
|
|
result = generateResponse(query_text=query, collection=collection)
|
|
return result
|
|
|
|
if __name__ == "__main__":
|
|
result = main()
|
|
print(result)
|
|
|
|
|