nijoow commited on
Commit
ac9b36c
Β·
1 Parent(s): 8cfe6ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -20
app.py CHANGED
@@ -14,16 +14,7 @@ from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVL
14
  import tempfile # μž„μ‹œ νŒŒμΌμ„ μƒμ„±ν•˜κΈ° μœ„ν•œ λΌμ΄λΈŒλŸ¬λ¦¬μž…λ‹ˆλ‹€.
15
  import os
16
 
17
- for file in docs:
18
- print('file -type : ',file.type)
19
- if file.type == 'text/plain' :
20
- doc_list.extend(get_text_file(file))
21
- elif file.type in ['application/octet-stream','application/pdf'] :
22
- doc_list.extend(get_pdf_text(file))
23
- elif file.type == 'text/csv' :
24
- doc_list.extend(get_csv_file(file))
25
- elif file.type == 'application/json' :
26
- doc_list.extend(get_json_file(file))
27
 
28
  # PDF λ¬Έμ„œλ‘œλΆ€ν„° ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
29
  def get_pdf_text(pdf_docs):
@@ -38,17 +29,23 @@ def get_pdf_text(pdf_docs):
38
  # 과제
39
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
40
 
41
- def get_text_file(docs):
42
- text_list = []
43
- for doc in docs:
44
- text_list.append(get_text_from_text_file(doc))
45
- return text_list
 
 
 
46
 
47
- def get_csv_file(docs):
48
- text_list = []
49
- for doc in docs:
50
- text_list.append(get_text_from_csv_file(doc))
51
- return text_list
 
 
 
52
 
53
  def get_json_file(docs):
54
  text_list = []
 
14
  import tempfile # μž„μ‹œ νŒŒμΌμ„ μƒμ„±ν•˜κΈ° μœ„ν•œ λΌμ΄λΈŒλŸ¬λ¦¬μž…λ‹ˆλ‹€.
15
  import os
16
 
17
+
 
 
 
 
 
 
 
 
 
18
 
19
  # PDF λ¬Έμ„œλ‘œλΆ€ν„° ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
20
  def get_pdf_text(pdf_docs):
 
29
  # 과제
30
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
31
 
32
+ def get_text_file(text_docs):
33
+ temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
34
+ temp_filepath = os.path.join(temp_dir.name, text_docs.name) # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
35
+ with open(temp_filepath, "wb") as f: # μž„μ‹œ νŒŒμΌμ„ λ°”μ΄λ„ˆλ¦¬ μ“°κΈ° λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
36
+ f.write(text_docs.getvalue()) # λ¬Έμ„œμ˜ λ‚΄μš©μ„ μž„μ‹œ νŒŒμΌμ— μ”λ‹ˆλ‹€.
37
+ text_loader = TEXTLoader(temp_file_path)
38
+ text_doc = loader.load()# ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.
39
+ return text_doc # μΆ”μΆœν•œ ν…μŠ€νŠΈλ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.
40
 
41
+ def get_csv_file(csv_docs):
42
+ temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
43
+ temp_filepath = os.path.join(temp_dir.name, csv_docs.name) # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
44
+ with open(temp_filepath, "wb") as f: # μž„μ‹œ νŒŒμΌμ„ λ°”μ΄λ„ˆλ¦¬ μ“°κΈ° λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
45
+ f.write(csv_docs.getvalue()) # λ¬Έμ„œμ˜ λ‚΄μš©μ„ μž„μ‹œ νŒŒμΌμ— μ”λ‹ˆλ‹€.
46
+ csv_loader = CSVLoader(temp_file_path)
47
+ data = loader.load()# ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.
48
+ return csv_doc # μΆ”μΆœν•œ ν…μŠ€νŠΈλ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.
49
 
50
  def get_json_file(docs):
51
  text_list = []