DDingcheol commited on
Commit
8af059d
Β·
1 Parent(s): d296422

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -11
app.py CHANGED
@@ -46,7 +46,7 @@ def get_csv_file(docs):
46
  def get_json_file(docs):
47
  pass
48
 
49
- '''
50
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
51
  def get_text_chunks(documents):
52
  text_splitter = RecursiveCharacterTextSplitter(
@@ -57,17 +57,7 @@ def get_text_chunks(documents):
57
 
58
  documents = text_splitter.split_documents(documents) # λ¬Έμ„œλ“€μ„ 청크둜 λ‚˜λˆ•λ‹ˆλ‹€.
59
  return documents # λ‚˜λˆˆ 청크λ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.
60
- '''
61
- def get_text_chunks(documents):
62
- text_splitter = RecursiveCharacterTextSplitter(
63
- chunk_size=1000, # 청크의 크기λ₯Ό μ§€μ •ν•©λ‹ˆλ‹€.
64
- chunk_overlap=200, # 청크 μ‚¬μ΄μ˜ 쀑볡을 μ§€μ •ν•©λ‹ˆλ‹€.
65
- length_function=len # ν…μŠ€νŠΈμ˜ 길이λ₯Ό μΈ‘μ •ν•˜λŠ” ν•¨μˆ˜λ₯Ό μ§€μ •ν•©λ‹ˆλ‹€.
66
- )
67
 
68
- documents = [doc for doc in documents if doc.strip()] # 빈 ν…μŠ€νŠΈλ₯Ό μ œκ±°ν•©λ‹ˆλ‹€.
69
- text_chunks = text_splitter.split_documents(documents) # λ¬Έμ„œλ“€μ„ 청크둜 λ‚˜λˆ•λ‹ˆλ‹€.
70
- return text_chunks # λ‚˜λˆˆ 청크λ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.
71
 
72
  # ν…μŠ€νŠΈ μ²­ν¬λ“€λ‘œλΆ€ν„° 벑터 μŠ€ν† μ–΄λ₯Ό μƒμ„±ν•˜λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
73
  def get_vectorstore(text_chunks):
 
46
  def get_json_file(docs):
47
  pass
48
 
49
+
50
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
51
  def get_text_chunks(documents):
52
  text_splitter = RecursiveCharacterTextSplitter(
 
57
 
58
  documents = text_splitter.split_documents(documents) # λ¬Έμ„œλ“€μ„ 청크둜 λ‚˜λˆ•λ‹ˆλ‹€.
59
  return documents # λ‚˜λˆˆ 청크λ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.
 
 
 
 
 
 
 
60
 
 
 
 
61
 
62
  # ν…μŠ€νŠΈ μ²­ν¬λ“€λ‘œλΆ€ν„° 벑터 μŠ€ν† μ–΄λ₯Ό μƒμ„±ν•˜λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
63
  def get_vectorstore(text_chunks):