TheBobBob commited on
Commit
95b6366
·
verified ·
1 Parent(s): cf460dc

Delete splitBioModels.py

Browse files
Files changed (1) hide show
  1. splitBioModels.py +0 -50
splitBioModels.py DELETED
@@ -1,50 +0,0 @@
1
- from langchain_text_splitters import CharacterTextSplitter
2
- import os
3
- from typing import List, Optional
4
-
5
- def splitBioModels(directory: str, final_items: Optional[List[str]] = None) -> List[str]:
6
- """Separates BioModel database based on indentation
7
-
8
- Args:
9
- directory (str): Relative path to the folder containing the files.
10
- final_items (Optional[List[str]]): A list to store the split content. If None, a new list will be created.
11
-
12
- Returns:
13
- List[str]: A list of text chunks split from the BioModel files.
14
- """
15
- text_splitter2 = CharacterTextSplitter(
16
- separator=" // ",
17
- chunk_size=1000000000,
18
- chunk_overlap=20,
19
- length_function=len,
20
- is_separator_regex=False
21
- )
22
-
23
- if final_items is None:
24
- final_items = []
25
- final_items = list(final_items)
26
-
27
- directory_path = os.path.abspath(directory)
28
- if not os.path.isdir(directory_path):
29
- print(f"Directory not found: {directory_path}")
30
- return final_items
31
-
32
- files = os.listdir(directory_path)
33
- for file in files:
34
- file_path = os.path.join(directory_path, file)
35
- try:
36
- with open(file_path, 'r') as f:
37
- last_part = os.path.basename(file_path)
38
- file_content = f.read()
39
- items = text_splitter2.create_documents([file_content])
40
- for item in items:
41
- item.metadata = last_part
42
- final_items.extend(items)
43
- except Exception as e:
44
- print(f"Error reading file {file_path}: {e}")
45
-
46
- return final_items
47
-
48
-
49
-
50
-