bupa1018 commited on
Commit
f7701d7
·
1 Parent(s): d092474

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -39
app.py CHANGED
@@ -50,45 +50,6 @@ api = HfApi()
50
 
51
 
52
 
53
- def split_python_code_into_chunks(texts, file_paths):
54
- chunks = []
55
- for text, file_path in zip(texts, file_paths):
56
- """
57
- Custom made python code splitter, algorithm iterates through child nodes of ast-tree(max child depth = 2)
58
- aims to have full body of methods along signature (+ can handle decorators) in a chunk and adds method specific metadata
59
- e.g visbility: public, _internal
60
- type: "class", "methods", "command"(CLI commands)
61
- source:
62
-
63
-
64
- with the intend to use a filter when retrieving potentaion useful snippets.
65
-
66
-
67
-
68
- """
69
- document_chunks = chunk_python_code_with_metadata(text, file_path)
70
- chunks.extend(document_chunks)
71
- return chunks
72
-
73
-
74
- # Split text into chunks
75
- def split_into_chunks(texts, references, chunk_size, chunk_overlap):
76
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
77
- chunks = []
78
-
79
- for text, reference in zip(texts, references):
80
- chunks.extend([
81
- Document(
82
- page_content=chunk,
83
- metadata={
84
- "source": reference,
85
- "usage": "doc"
86
- }
87
- )
88
- for chunk in text_splitter.split_text(text)
89
- ])
90
- return chunks
91
-
92
 
93
  # Setup Vectorstore
94
  def embed_documents_into_vectorstore(chunks, model_name, persist_directory):
 
50
 
51
 
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  # Setup Vectorstore
55
  def embed_documents_into_vectorstore(chunks, model_name, persist_directory):