[email protected] commited on
Commit
333bd63
ยท
1 Parent(s): a7f4e5b

edit codes

Browse files
Files changed (1) hide show
  1. app.py +19 -25
app.py CHANGED
@@ -31,22 +31,22 @@ def get_pdf_text(pdf_docs):
31
  # ์•„๋ž˜ ํ…์ŠคํŠธ ์ถ”์ถœ ํ•จ์ˆ˜๋ฅผ ์ž‘์„ฑ
32
 
33
  def get_text_file(docs):
34
- temp_dir = tempfile.TemporaryDirectory() # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
35
- temp_filepath = os.path.join(temp_dir.name, docs.name) # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
36
- with open(temp_filepath, "wb") as f: # ์ž„์‹œ ํŒŒ์ผ์„ ๋ฐ”์ด๋„ˆ๋ฆฌ ์“ฐ๊ธฐ ๋ชจ๋“œ๋กœ ์—ฝ๋‹ˆ๋‹ค.
37
  f.write(docs.getvalue()) # TXT ๋ฌธ์„œ์˜ ๋‚ด์šฉ์„ ์ž„์‹œ ํŒŒ์ผ์— ์”๋‹ˆ๋‹ค.
38
- with open(temp_filepath, "r") as txt_file: # TXT ํŒŒ์ผ์„ ๋ฐ”์ด๋„ˆ๋ฆฌ ์“ฐ๊ธฐ ๋ชจ๋“œ๋กœ ์—ฝ๋‹ˆ๋‹ค.
39
  txt_doc = txt_file.read() # ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
40
  return txt_doc # ์ถ”์ถœํ•œ ํ…์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
41
 
42
  def get_csv_file(docs):
43
- temp_dir = tempfile.TemporaryDirectory()
44
- temp_filepath = os.path.join(temp_dir.name, docs.name)
45
- with open(temp_filepath, "wb") as f:
46
  f.write(docs.getvalue())
47
 
48
  csv_data = []
49
- with open(temp_filepath, "r") as csv_file:
50
  csv_reader = csv.reader(csv_file)
51
  for row in csv_reader:
52
  csv_data.append(row)
@@ -54,14 +54,16 @@ def get_csv_file(docs):
54
  return csv_data
55
 
56
  def get_json_file(docs):
57
- temp_dir = tempfile.TemporaryDirectory()
58
- temp_filepath = os.path.join(temp_dir.name, docs.name)
59
- with open(temp_filepath, "wb") as f:
60
  f.write(docs.getvalue())
61
-
62
- with open(temp_filepath, "r") as json_file:
63
- json_data = json.load(json_file)
64
-
 
 
65
  return json_data
66
 
67
 
@@ -72,17 +74,9 @@ def get_text_chunks(documents):
72
  chunk_overlap=200, # ์ฒญํฌ ์‚ฌ์ด์˜ ์ค‘๋ณต์„ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
73
  length_function=len # ํ…์ŠคํŠธ์˜ ๊ธธ์ด๋ฅผ ์ธก์ •ํ•˜๋Š” ํ•จ์ˆ˜๋ฅผ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
74
  )
75
- texts = []
76
- for doc in documents:
77
- if isinstance(doc, str):
78
- # doc์ด ๋ฌธ์ž์—ด์ธ ๊ฒฝ์šฐ ์ง์ ‘ texts์— ์ถ”๊ฐ€
79
- texts.append(doc)
80
- else:
81
- # doc์ด 'page_content' ์†์„ฑ์„ ๊ฐ–์ถ˜ ๊ฐ์ฒด์ธ ๊ฒฝ์šฐ
82
- texts.append(doc.page_content)
83
 
84
- chunks = text_splitter.split_documents(texts)
85
- return chunks
86
 
87
 
88
  # ํ…์ŠคํŠธ ์ฒญํฌ๋“ค๋กœ๋ถ€ํ„ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด๋ฅผ ์ƒ์„ฑํ•˜๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
 
31
  # ์•„๋ž˜ ํ…์ŠคํŠธ ์ถ”์ถœ ํ•จ์ˆ˜๋ฅผ ์ž‘์„ฑ
32
 
33
  def get_text_file(docs):
34
+ temp_dir2 = tempfile.TemporaryDirectory() # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
35
+ temp_filepath2 = os.path.join(temp_dir2.name, docs.name) # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
36
+ with open(temp_filepath2, "wb") as f: # ์ž„์‹œ ํŒŒ์ผ์„ ๋ฐ”์ด๋„ˆ๋ฆฌ ์“ฐ๊ธฐ ๋ชจ๋“œ๋กœ ์—ฝ๋‹ˆ๋‹ค.
37
  f.write(docs.getvalue()) # TXT ๋ฌธ์„œ์˜ ๋‚ด์šฉ์„ ์ž„์‹œ ํŒŒ์ผ์— ์”๋‹ˆ๋‹ค.
38
+ with open(temp_filepath2, "r") as txt_file: # TXT ํŒŒ์ผ์„ ๋ฐ”์ด๋„ˆ๋ฆฌ ์“ฐ๊ธฐ ๋ชจ๋“œ๋กœ ์—ฝ๋‹ˆ๋‹ค.
39
  txt_doc = txt_file.read() # ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
40
  return txt_doc # ์ถ”์ถœํ•œ ํ…์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
41
 
42
  def get_csv_file(docs):
43
+ temp_dir3 = tempfile.TemporaryDirectory()
44
+ temp_filepath3 = os.path.join(temp_dir3.name, docs.name)
45
+ with open(temp_filepath3, "wb") as f:
46
  f.write(docs.getvalue())
47
 
48
  csv_data = []
49
+ with open(temp_filepath3, "r") as csv_file:
50
  csv_reader = csv.reader(csv_file)
51
  for row in csv_reader:
52
  csv_data.append(row)
 
54
  return csv_data
55
 
56
  def get_json_file(docs):
57
+ temp_dir4 = tempfile.TemporaryDirectory()
58
+ temp_filepath4 = os.path.join(temp_dir4.name, docs.name)
59
+ with open(temp_filepath4, "wb") as f:
60
  f.write(docs.getvalue())
61
+ json_loader = JSONLoader(
62
+ file_path=temp_filepath4,
63
+ jq_schema='.messages[].content',
64
+ text_content=False
65
+ )
66
+ json_data = json_loader.load()
67
  return json_data
68
 
69
 
 
74
  chunk_overlap=200, # ์ฒญํฌ ์‚ฌ์ด์˜ ์ค‘๋ณต์„ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
75
  length_function=len # ํ…์ŠคํŠธ์˜ ๊ธธ์ด๋ฅผ ์ธก์ •ํ•˜๋Š” ํ•จ์ˆ˜๋ฅผ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
76
  )
 
 
 
 
 
 
 
 
77
 
78
+ documents = text_splitter.split_documents(documents) # ๋ฌธ์„œ๋“ค์„ ์ฒญํฌ๋กœ ๋‚˜๋ˆ•๋‹ˆ๋‹ค
79
+ return documents # ๋‚˜๋ˆˆ ์ฒญํฌ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
80
 
81
 
82
  # ํ…์ŠคํŠธ ์ฒญํฌ๋“ค๋กœ๋ถ€ํ„ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด๋ฅผ ์ƒ์„ฑํ•˜๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.