TheBobBob commited on
Commit
c47b49b
·
verified ·
1 Parent(s): 18b72e8

Delete selectBioModels.py

Browse files
Files changed (1) hide show
  1. selectBioModels.py +0 -81
selectBioModels.py DELETED
@@ -1,81 +0,0 @@
1
- import os
2
- import re
3
- import pandas as pd
4
- import shutil
5
-
6
- # Function to search BioModels and create the CSV file
7
- def search_biomodels(directory, keywords, output_file):
8
- biomodel_numbers_list = []
9
- matching_biomodels = []
10
-
11
- files = os.listdir(directory)
12
-
13
- for file in files:
14
- file_path = os.path.join(directory, file)
15
-
16
- try:
17
- with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
18
- file_content = f.read()
19
-
20
- # Find all biomodel numbers using a more flexible regex
21
- biomodel_numbers = re.findall(r'biomodels\.db/(\w+)', file_content)
22
-
23
- # Search for the biomodel name, case-insensitive, and allow variations
24
- biomodel_name_match = re.search(rf'{re.escape(keywords[0])} is "([^"]+)"', file_content, re.IGNORECASE)
25
- biomodel_name = biomodel_name_match.group(1) if biomodel_name_match else ''
26
-
27
- def matches_keywords(name, keywords):
28
- # Check for any keyword match in the biomodel name, case-insensitive
29
- return any(keyword.lower() in name.lower() for keyword in keywords)
30
-
31
- # If a matching biomodel name is found, save it
32
- if biomodel_name and matches_keywords(biomodel_name, keywords):
33
- biomodel_numbers_list.extend(biomodel_numbers)
34
- matching_biomodels.extend([biomodel_name] * len(biomodel_numbers))
35
-
36
- except Exception as e:
37
- print(f"Error processing file {file_path}: {e}")
38
-
39
- # Create a DataFrame from the collected data
40
- df = pd.DataFrame({
41
- 'Biomodel Number': biomodel_numbers_list,
42
- 'Biomodel Name': [matching_biomodels[i] if i < len(matching_biomodels) else '' for i in range(len(biomodel_numbers_list))]
43
- })
44
-
45
- # Save the DataFrame to a CSV file
46
- df.to_csv(output_file, index=False)
47
- print(f"Data saved to {output_file}")
48
-
49
- # Function to copy matching files to final_models directory
50
- def copy_matching_files(csv_file, data_folder, final_models_folder):
51
- # Create the final_models folder if it doesn't exist
52
- os.makedirs(final_models_folder, exist_ok=True)
53
-
54
- # Load the CSV file into a DataFrame
55
- df = pd.read_csv(csv_file)
56
-
57
- # Iterate through the data folder to find and copy matching files
58
- for root, dirs, files in os.walk(data_folder):
59
- for file in files:
60
- file_path = os.path.join(root, file)
61
- with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
62
- content = f.read()
63
- # Check if any biomodel name or number is in the file
64
- for i, row in df.iterrows():
65
- biomodel_number = row['Biomodel Number']
66
- biomodel_name = row['Biomodel Name']
67
- if (biomodel_name and biomodel_name.lower() in content.lower()) or biomodel_number in content:
68
- shutil.copy(file_path, final_models_folder)
69
- print(f"Copied: {file} to final_models")
70
-
71
- print(f"All matching biomodel files have been copied to {final_models_folder}")
72
-
73
- # Main execution
74
- directory = r'C:\Users\navan\Downloads\BioModelsRAG\BioModelsRAG\data'
75
- output_file = r'C:\Users\navan\Downloads\BioModelsRAG\biomodels_output.csv'
76
- final_models_folder = r'C:\Users\navan\Downloads\BioModelsRAG\final_models'
77
- user_keywords = input("Keyword you would like to search for: ").split()
78
-
79
- # Search and copy files
80
- search_biomodels(directory, user_keywords, output_file)
81
- copy_matching_files(output_file, directory, final_models_folder)