tony346 commited on
Commit
e5975dd
Β·
verified Β·
1 Parent(s): 897ec15

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -7
app.py CHANGED
@@ -14,7 +14,6 @@ from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVL
14
  import tempfile # μž„μ‹œ νŒŒμΌμ„ μƒμ„±ν•˜κΈ° μœ„ν•œ λΌμ΄λΈŒλŸ¬λ¦¬μž…λ‹ˆλ‹€.
15
  import os
16
 
17
-
18
  # PDF λ¬Έμ„œλ‘œλΆ€ν„° ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
19
  def get_pdf_text(pdf_docs):
20
  temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
@@ -25,20 +24,39 @@ def get_pdf_text(pdf_docs):
25
  pdf_doc = pdf_loader.load() # ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.
26
  return pdf_doc # μΆ”μΆœν•œ ν…μŠ€νŠΈλ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.
27
 
28
- # 과제
29
- # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
30
 
31
  def get_text_file(docs):
32
- pass
 
 
 
 
 
 
33
 
34
 
35
  def get_csv_file(docs):
36
- pass
 
 
 
 
 
 
37
 
38
  def get_json_file(docs):
39
- pass
 
 
 
 
 
 
 
 
 
 
40
 
41
-
42
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
43
  def get_text_chunks(documents):
44
  text_splitter = RecursiveCharacterTextSplitter(
 
14
  import tempfile # μž„μ‹œ νŒŒμΌμ„ μƒμ„±ν•˜κΈ° μœ„ν•œ λΌμ΄λΈŒλŸ¬λ¦¬μž…λ‹ˆλ‹€.
15
  import os
16
 
 
17
  # PDF λ¬Έμ„œλ‘œλΆ€ν„° ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
18
  def get_pdf_text(pdf_docs):
19
  temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
 
24
  pdf_doc = pdf_loader.load() # ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.
25
  return pdf_doc # μΆ”μΆœν•œ ν…μŠ€νŠΈλ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.
26
 
 
 
27
 
28
  def get_text_file(docs):
29
+ temp_dir = tempfile.TemporaryDirectory()
30
+ temp_filepath = os.path.join(temp_dir.name, docs.name)
31
+ with open(temp_filepath, "wb") as f:
32
+ f.write(docs.getvalue())
33
+ text_loader = TextLoader(temp_filepath)
34
+ text_doc = text_loader.load()
35
+ return text_doc
36
 
37
 
38
  def get_csv_file(docs):
39
+ temp_dir = tempfile.TemporaryDirectory()
40
+ temp_filepath = os.path.join(temp_dir.name, docs.name)
41
+ with open(temp_filepath, "wb") as f:
42
+ f.write(docs.getvalue())
43
+ csv_loader = CSVLoader(temp_filepath)
44
+ csv_doc = csv_loader.load()
45
+ return csv_doc
46
 
47
  def get_json_file(docs):
48
+ temp_dir = tempfile.TemporaryDirectory()
49
+ temp_filepath = os.path.join(temp_dir.name, docs.name)
50
+ with open(temp_filepath, "wb") as f:
51
+ f.write(docs.getvalue())
52
+ json_loader = JSONLoader(temp_filepath,
53
+ jq_schema='.scans[].relationships',
54
+ text_content=False)
55
+
56
+ json_doc = json_loader.load()
57
+ # print('json_doc = ',json_doc)
58
+ return json_doc
59
 
 
60
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
61
  def get_text_chunks(documents):
62
  text_splitter = RecursiveCharacterTextSplitter(