DDingcheol commited on
Commit
d296422
Β·
1 Parent(s): a26e592

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -1
app.py CHANGED
@@ -46,7 +46,7 @@ def get_csv_file(docs):
46
  def get_json_file(docs):
47
  pass
48
 
49
-
50
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
51
  def get_text_chunks(documents):
52
  text_splitter = RecursiveCharacterTextSplitter(
@@ -57,7 +57,17 @@ def get_text_chunks(documents):
57
 
58
  documents = text_splitter.split_documents(documents) # λ¬Έμ„œλ“€μ„ 청크둜 λ‚˜λˆ•λ‹ˆλ‹€.
59
  return documents # λ‚˜λˆˆ 청크λ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.
 
 
 
 
 
 
 
60
 
 
 
 
61
 
62
  # ν…μŠ€νŠΈ μ²­ν¬λ“€λ‘œλΆ€ν„° 벑터 μŠ€ν† μ–΄λ₯Ό μƒμ„±ν•˜λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
63
  def get_vectorstore(text_chunks):
 
46
  def get_json_file(docs):
47
  pass
48
 
49
+ '''
50
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
51
  def get_text_chunks(documents):
52
  text_splitter = RecursiveCharacterTextSplitter(
 
57
 
58
  documents = text_splitter.split_documents(documents) # λ¬Έμ„œλ“€μ„ 청크둜 λ‚˜λˆ•λ‹ˆλ‹€.
59
  return documents # λ‚˜λˆˆ 청크λ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.
60
+ '''
61
+ def get_text_chunks(documents):
62
+ text_splitter = RecursiveCharacterTextSplitter(
63
+ chunk_size=1000, # 청크의 크기λ₯Ό μ§€μ •ν•©λ‹ˆλ‹€.
64
+ chunk_overlap=200, # 청크 μ‚¬μ΄μ˜ 쀑볡을 μ§€μ •ν•©λ‹ˆλ‹€.
65
+ length_function=len # ν…μŠ€νŠΈμ˜ 길이λ₯Ό μΈ‘μ •ν•˜λŠ” ν•¨μˆ˜λ₯Ό μ§€μ •ν•©λ‹ˆλ‹€.
66
+ )
67
 
68
+ documents = [doc for doc in documents if doc.strip()] # 빈 ν…μŠ€νŠΈλ₯Ό μ œκ±°ν•©λ‹ˆλ‹€.
69
+ text_chunks = text_splitter.split_documents(documents) # λ¬Έμ„œλ“€μ„ 청크둜 λ‚˜λˆ•λ‹ˆλ‹€.
70
+ return text_chunks # λ‚˜λˆˆ 청크λ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.
71
 
72
  # ν…μŠ€νŠΈ μ²­ν¬λ“€λ‘œλΆ€ν„° 벑터 μŠ€ν† μ–΄λ₯Ό μƒμ„±ν•˜λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
73
  def get_vectorstore(text_chunks):