Spaces:

DDingcheol
/

DagAIChatbotStreamlitSpace

Sleeping

DDingcheol commited on Nov 17, 2023

Commit

b035b13

1 Parent(s): e7ba74a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -49,7 +49,7 @@ def get_csv_file(docs):
 def get_json_file(docs):
     pass
 # 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
 def get_text_chunks(documents):
     text_splitter = RecursiveCharacterTextSplitter(
@@ -60,7 +60,25 @@ def get_text_chunks(documents):
     documents = text_splitter.split_documents(documents)  # 문서들을 청크로 나눕니다.
     return documents  # 나눈 청크를 반환합니다.
 # 텍스트 청크들로부터 벡터 스토어를 생성하는 함수입니다.
 def get_vectorstore(text_chunks):

 def get_json_file(docs):
     pass
+'''
 # 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
 def get_text_chunks(documents):
     text_splitter = RecursiveCharacterTextSplitter(
     documents = text_splitter.split_documents(documents)  # 문서들을 청크로 나눕니다.
     return documents  # 나눈 청크를 반환합니다.
+'''
+def get_text_chunks(documents):
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1000,  # 청크의 크기를 지정합니다.
+        chunk_overlap=200,  # 청크 사이의 중복을 지정합니다.
+        length_function=len  # 텍스트의 길이를 측정하는 함수를 지정합니다.
+    )
+    text_list = []  # 각 문서의 텍스트를 담을 리스트를 생성합니다.
+    for doc in documents:
+        try:
+            text = doc.getvalue().decode("utf-8")  # 파일 내용을 utf-8 형식으로 디코딩하여 텍스트로 변환합니다.
+            text_list.append(text)
+        except Exception as e:
+            print(f"An error occurred while processing a document: {e}")
+    # 문서들을 청크로 나눕니다.
+    documents = text_splitter.split_documents(text_list)
+    return documents  # 나눈 청크를 반환합니다.
 # 텍스트 청크들로부터 벡터 스토어를 생성하는 함수입니다.
 def get_vectorstore(text_chunks):