doa12 commited on
Commit
f3324c3
ยท
1 Parent(s): 7fdf14a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -16
app.py CHANGED
@@ -29,26 +29,33 @@ def get_pdf_text(pdf_docs):
29
  # ์•„๋ž˜ ํ…์ŠคํŠธ ์ถ”์ถœ ํ•จ์ˆ˜๋ฅผ ์ž‘์„ฑ
30
 
31
  def get_text_file(txt_docs):
32
- temp_dir = tempfile.TemporaryDirectory() # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
33
- temp_filepath = os.path.join(temp_dir.name, txt_docs.name) # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
34
- with open(temp_filepath, "wb") as f: # ์ž„์‹œ ํŒŒ์ผ์„ ๋ฐ”์ด๋„ˆ๋ฆฌ ์“ฐ๊ธฐ ๋ชจ๋“œ๋กœ ์—ฝ๋‹ˆ๋‹ค.
35
- f.write(txt_docs.getvalue()) # PDF ๋ฌธ์„œ์˜ ๋‚ด์šฉ์„ ์ž„์‹œ ํŒŒ์ผ์— ์”๋‹ˆ๋‹ค.
36
- txt_loader = TextLoader(temp_filepath) # PyPDFLoader๋ฅผ ์‚ฌ์šฉํ•ด PDF๋ฅผ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
37
- txt_doc = txt_loader.load() # ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
38
- return txt_doc # ์ถ”์ถœํ•œ ํ…์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
39
 
40
  def get_csv_file(csv_docs):
41
- temp_dir = tempfile.TemporaryDirectory() # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
42
- temp_filepath = os.path.join(temp_dir.name, csv_docs.name) # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
43
- with open(temp_filepath, "wb") as f: # ์ž„์‹œ ํŒŒ์ผ์„ ๋ฐ”์ด๋„ˆ๋ฆฌ ์“ฐ๊ธฐ ๋ชจ๋“œ๋กœ ์—ฝ๋‹ˆ๋‹ค.
44
- f.write(csv_docs.getvalue()) # PDF ๋ฌธ์„œ์˜ ๋‚ด์šฉ์„ ์ž„์‹œ ํŒŒ์ผ์— ์”๋‹ˆ๋‹ค.
45
- csv_loader = CSVLoader(temp_filepath) # PyPDFLoader๋ฅผ ์‚ฌ์šฉํ•ด PDF๋ฅผ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
46
- csv_doc = csv_loader.load() # ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
47
- return csv_doc # ์ถ”์ถœํ•œ ํ…์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
48
 
49
  def get_json_file(json_docs):
50
- pass
51
-
 
 
 
 
 
 
 
52
 
53
  # ๋ฌธ์„œ๋“ค์„ ์ฒ˜๋ฆฌํ•˜์—ฌ ํ…์ŠคํŠธ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
54
  def get_text_chunks(documents):
 
29
  # ์•„๋ž˜ ํ…์ŠคํŠธ ์ถ”์ถœ ํ•จ์ˆ˜๋ฅผ ์ž‘์„ฑ
30
 
31
  def get_text_file(txt_docs):
32
+ temp_dir = tempfile.TemporaryDirectory()
33
+ temp_filepath = os.path.join(temp_dir.name, txt_docs.name)
34
+ with open(temp_filepath, "wb") as f:
35
+ f.write(txt_docs.getvalue())
36
+ txt_loader = TextLoader(temp_filepath)
37
+ txt_doc = txt_loader.load()
38
+ return txt_doc
39
 
40
  def get_csv_file(csv_docs):
41
+ temp_dir = tempfile.TemporaryDirectory()
42
+ temp_filepath = os.path.join(temp_dir.name, csv_docs.name)
43
+ with open(temp_filepath, "wb") as f:
44
+ f.write(csv_docs.getvalue())
45
+ csv_loader = CSVLoader(temp_filepath)
46
+ csv_doc = csv_loader.load()
47
+ return csv_doc
48
 
49
  def get_json_file(json_docs):
50
+ temp_dir = tempfile.TemporaryDirectory()
51
+ temp_filepath = os.path.join(temp_dir.name, json_docs.name)
52
+ with open(temp_filepath, "wb") as f:
53
+ f.write(json_docs.getvalue())
54
+ json_loader = JSONLoader(temp_filepath,
55
+ jq_schema,
56
+ text_content=False)
57
+ json_doc = json_loader.load()
58
+ return json_doc
59
 
60
  # ๋ฌธ์„œ๋“ค์„ ์ฒ˜๋ฆฌํ•˜์—ฌ ํ…์ŠคํŠธ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
61
  def get_text_chunks(documents):