chojo12 commited on
Commit
daae5fc
Β·
1 Parent(s): 9934126

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -7
app.py CHANGED
@@ -24,15 +24,45 @@ def get_pdf_text(pdf_docs):
24
 
25
  # 과제
26
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
27
- def get_text_file(docs):
28
- pass
29
-
30
- def get_csv_file(docs):
31
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- def get_json_file(docs):
34
- pass
35
 
 
 
 
 
 
 
 
 
36
 
37
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
38
  def get_text_chunks(documents):
 
24
 
25
  # 과제
26
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
27
+ def get_text_file(txt_docs):
28
+ temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
29
+ temp_filepath = os.path.join(temp_dir.name, "temp.txt") # κ³ μ •λœ 이름인 "temp.txt"둜 μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
30
+ with open(temp_filepath, "w", encoding="utf-8") as f: # μž„μ‹œ νŒŒμΌμ„ utf-8 μΈμ½”λ”©μœΌλ‘œ μ“°κΈ° λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
31
+ f.write(txt_docs) # ν…μŠ€νŠΈ 파일 λ‚΄μš©μ„ μž„μ‹œ νŒŒμΌμ— μ”λ‹ˆλ‹€.
32
+
33
+ # ν…μŠ€νŠΈ 파일 λ‚΄μš©μ„ μ½μ–΄μ˜΅λ‹ˆλ‹€.
34
+ with open(temp_filepath, "r", encoding="utf-8") as f:
35
+ text_content = f.read()
36
+
37
+ return text_content # μΆ”μΆœλœ ν…μŠ€νŠΈ λ‚΄μš©μ„ λ°˜ν™˜ν•©λ‹ˆλ‹€.
38
+
39
+
40
+
41
+
42
+ def get_csv_file(csv_docs):
43
+ temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
44
+ temp_filepath = os.path.join(temp_dir.name, "temp.csv") # κ³ μ •λœ 이름인 "temp.csv"둜 μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
45
+
46
+ # CSV 데이터λ₯Ό μž„μ‹œ νŒŒμΌμ— μ“°κΈ°
47
+ with open(temp_filepath, "w", newline="", encoding="utf-8") as csv_file:
48
+ csv_file.write(csv_docs.getvalue())
49
+
50
+ # μž„μ‹œ νŒŒμΌμ—μ„œ ν…μŠ€νŠΈ μΆ”μΆœ
51
+ with open(temp_filepath, "r", encoding="utf-8") as csv_file:
52
+ csv_reader = csv.reader(csv_file)
53
+ text_content = "\n".join(",".join(row) for row in csv_reader)
54
+
55
+ return text_content # μΆ”μΆœλœ ν…μŠ€νŠΈ λ‚΄μš©μ„ λ°˜ν™˜ν•©λ‹ˆλ‹€.
56
 
 
 
57
 
58
+ def get_json_file(json_docs, key_to_extract):
59
+ try:
60
+ json_data = json.loads(json_docs) # JSON λ¬Έμžμ—΄μ„ 파이썬 객체둜 λ‘œλ“œν•©λ‹ˆλ‹€.
61
+ extracted_text = json_data.get(key_to_extract, '') # μ§€μ •λœ ν‚€μ˜ 값을 μΆ”μΆœν•©λ‹ˆλ‹€. ν‚€κ°€ 없을 경우 빈 λ¬Έμžμ—΄ λ°˜ν™˜.
62
+ return extracted_text
63
+ except json.JSONDecodeError as e:
64
+ print(f"Error decoding JSON: {e}")
65
+ return None
66
 
67
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
68
  def get_text_chunks(documents):