Commit
Β·
b035b13
1
Parent(s):
e7ba74a
Update app.py
Browse files
app.py
CHANGED
@@ -49,7 +49,7 @@ def get_csv_file(docs):
|
|
49 |
def get_json_file(docs):
|
50 |
pass
|
51 |
|
52 |
-
|
53 |
# λ¬Έμλ€μ μ²λ¦¬νμ¬ ν
μ€νΈ μ²ν¬λ‘ λλλ ν¨μμ
λλ€.
|
54 |
def get_text_chunks(documents):
|
55 |
text_splitter = RecursiveCharacterTextSplitter(
|
@@ -60,7 +60,25 @@ def get_text_chunks(documents):
|
|
60 |
|
61 |
documents = text_splitter.split_documents(documents) # λ¬Έμλ€μ μ²ν¬λ‘ λλλλ€.
|
62 |
return documents # λλ μ²ν¬λ₯Ό λ°νν©λλ€.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
# ν
μ€νΈ μ²ν¬λ€λ‘λΆν° λ²‘ν° μ€ν μ΄λ₯Ό μμ±νλ ν¨μμ
λλ€.
|
66 |
def get_vectorstore(text_chunks):
|
|
|
49 |
def get_json_file(docs):
|
50 |
pass
|
51 |
|
52 |
+
'''
|
53 |
# λ¬Έμλ€μ μ²λ¦¬νμ¬ ν
μ€νΈ μ²ν¬λ‘ λλλ ν¨μμ
λλ€.
|
54 |
def get_text_chunks(documents):
|
55 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
|
60 |
|
61 |
documents = text_splitter.split_documents(documents) # λ¬Έμλ€μ μ²ν¬λ‘ λλλλ€.
|
62 |
return documents # λλ μ²ν¬λ₯Ό λ°νν©λλ€.
|
63 |
+
'''
|
64 |
+
def get_text_chunks(documents):
|
65 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
66 |
+
chunk_size=1000, # μ²ν¬μ ν¬κΈ°λ₯Ό μ§μ ν©λλ€.
|
67 |
+
chunk_overlap=200, # μ²ν¬ μ¬μ΄μ μ€λ³΅μ μ§μ ν©λλ€.
|
68 |
+
length_function=len # ν
μ€νΈμ κΈΈμ΄λ₯Ό μΈ‘μ νλ ν¨μλ₯Ό μ§μ ν©λλ€.
|
69 |
+
)
|
70 |
|
71 |
+
text_list = [] # κ° λ¬Έμμ ν
μ€νΈλ₯Ό λ΄μ 리μ€νΈλ₯Ό μμ±ν©λλ€.
|
72 |
+
for doc in documents:
|
73 |
+
try:
|
74 |
+
text = doc.getvalue().decode("utf-8") # νμΌ λ΄μ©μ utf-8 νμμΌλ‘ λμ½λ©νμ¬ ν
μ€νΈλ‘ λ³νν©λλ€.
|
75 |
+
text_list.append(text)
|
76 |
+
except Exception as e:
|
77 |
+
print(f"An error occurred while processing a document: {e}")
|
78 |
+
|
79 |
+
# λ¬Έμλ€μ μ²ν¬λ‘ λλλλ€.
|
80 |
+
documents = text_splitter.split_documents(text_list)
|
81 |
+
return documents # λλ μ²ν¬λ₯Ό λ°νν©λλ€.
|
82 |
|
83 |
# ν
μ€νΈ μ²ν¬λ€λ‘λΆν° λ²‘ν° μ€ν μ΄λ₯Ό μμ±νλ ν¨μμ
λλ€.
|
84 |
def get_vectorstore(text_chunks):
|