TheBobBob
/

BioModelsRAG

Model card Files Files and versions Community

TheBobBob commited on Sep 3, 2024

Commit

95b6366

·

verified ·

1 Parent(s): cf460dc

Delete splitBioModels.py

Files changed (1) hide show

splitBioModels.py +0 -50

splitBioModels.py DELETED Viewed

@@ -1,50 +0,0 @@
-from langchain_text_splitters import CharacterTextSplitter
-import os
-from typing import List, Optional
-def splitBioModels(directory: str, final_items: Optional[List[str]] = None) -> List[str]:
-    """Separates BioModel database based on indentation
-    Args:
-        directory (str): Relative path to the folder containing the files.
-        final_items (Optional[List[str]]): A list to store the split content. If None, a new list will be created.
-    Returns:
-        List[str]: A list of text chunks split from the BioModel files.
-    """
-    text_splitter2 = CharacterTextSplitter(
-        separator="  // ",
-        chunk_size=1000000000,
-        chunk_overlap=20,
-        length_function=len,
-        is_separator_regex=False
-    )
-    if final_items is None:
-        final_items = []
-    final_items = list(final_items)
-    directory_path = os.path.abspath(directory)
-    if not os.path.isdir(directory_path):
-        print(f"Directory not found: {directory_path}")
-        return final_items
-    files = os.listdir(directory_path)
-    for file in files:
-        file_path = os.path.join(directory_path, file)
-        try:
-            with open(file_path, 'r') as f:
-                last_part = os.path.basename(file_path)
-                file_content = f.read()
-                items = text_splitter2.create_documents([file_content])
-                for item in items:
-                    item.metadata = last_part
-                final_items.extend(items)
-        except Exception as e:
-            print(f"Error reading file {file_path}: {e}")
-    return final_items