Moonjunho commited on
Commit
42e48fb
ยท
1 Parent(s): 188da05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -22
app.py CHANGED
@@ -25,33 +25,30 @@ def get_pdf_text(pdf_docs):
25
  # ๊ณผ์ œ
26
  # ์•„๋ž˜ ํ…์ŠคํŠธ ์ถ”์ถœ ํ•จ์ˆ˜๋ฅผ ์ž‘์„ฑ
27
  def get_text_file(docs):
28
- text = file.getvalue().decode("utf-8") # ํ…์ŠคํŠธ ํŒŒ์ผ์„ ๋ฌธ์ž์—ด๋กœ ๋””์ฝ”๋”ฉํ•ฉ๋‹ˆ๋‹ค.
29
- return [text] # ํ…์ŠคํŠธ๋ฅผ ๋ฆฌ์ŠคํŠธ์— ๋‹ด์•„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
30
 
31
  def get_csv_file(docs):
32
- import pandas as pd
33
-
34
- # CSV ํŒŒ์ผ์„ Pandas DataFrame์œผ๋กœ ์ฝ์Šต๋‹ˆ๋‹ค.
35
- df = pd.read_csv(file)
36
-
37
- # ์—ฌ๊ธฐ์—์„œ ์›ํ•˜๋Š” ์—ด์„ ์„ ํƒํ•˜๊ฑฐ๋‚˜ ์ „์ฒด ํŒŒ์ผ์„ ์ฝ์–ด์˜ฌ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
38
- # ์˜ˆ๋ฅผ ๋“ค์–ด, 'text_column' ์—ด์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜๋Š” ๋ฐฉ๋ฒ•์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค.
39
- texts = df['text_column'].tolist()
40
-
41
- return texts # ํ…์ŠคํŠธ ๋ฆฌ์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
42
 
43
  def get_json_file(docs):
44
- import json
45
-
46
- # JSON ํŒŒ์ผ์„ ๋””์ฝ”๋”ฉํ•˜์—ฌ ๋ฐ์ดํ„ฐ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
47
- data = json.load(file)
48
-
49
- # ์—ฌ๊ธฐ์—์„œ ์ ์ ˆํ•œ ๋ฐฉ๋ฒ•์œผ๋กœ JSON ๋ฐ์ดํ„ฐ์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
50
- # ์˜ˆ๋ฅผ ๋“ค์–ด, 'text' ํ‚ค์— ํ•ด๋‹นํ•˜๋Š” ๊ฐ’์„ ์ถ”์ถœํ•˜๋Š” ๋ฐฉ๋ฒ•์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค.
51
- texts = [item['text'] for item in data]
52
-
53
- return texts
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  # ๋ฌธ์„œ๋“ค์„ ์ฒ˜๋ฆฌํ•˜์—ฌ ํ…์ŠคํŠธ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
57
  def get_text_chunks(documents):
 
25
  # ๊ณผ์ œ
26
  # ์•„๋ž˜ ํ…์ŠคํŠธ ์ถ”์ถœ ํ•จ์ˆ˜๋ฅผ ์ž‘์„ฑ
27
  def get_text_file(docs):
28
+ pass
 
29
 
30
  def get_csv_file(docs):
31
+ pass
 
 
 
 
 
 
 
 
 
32
 
33
  def get_json_file(docs):
34
+ temp_dir = tempfile.TemporaryDirectory()
35
+ temp_filepath = os.path.join(temp_dir.name, pdf_docs.name)
36
+ with open(temp_filepath, "wb") as f:
37
+ f.write(pdf_docs.getvalue())
 
 
 
 
 
 
38
 
39
+
40
+ loader = JSONLoader(
41
+ file_path='./example_data/facebook_chat.json',
42
+ jq_schema='.messages[].content',
43
+ text_content=False)
44
+
45
+ data = loader.load()
46
+ return data
47
+
48
+
49
+
50
+
51
+
52
 
53
  # ๋ฌธ์„œ๋“ค์„ ์ฒ˜๋ฆฌํ•˜์—ฌ ํ…์ŠคํŠธ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
54
  def get_text_chunks(documents):