Spaces:
Sleeping
Sleeping
Commit
Β·
e2b23b2
1
Parent(s):
feff33b
Update app.py
Browse files
app.py
CHANGED
@@ -25,15 +25,30 @@ def get_pdf_text(pdf_docs):
|
|
25 |
|
26 |
# κ³Όμ
|
27 |
# μλ ν
μ€νΈ μΆμΆ ν¨μλ₯Ό μμ±
|
28 |
-
def get_text_file(
|
29 |
try:
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
|
37 |
|
38 |
def get_csv_file(docs):
|
39 |
pass
|
|
|
25 |
|
26 |
# κ³Όμ
|
27 |
# μλ ν
μ€νΈ μΆμΆ ν¨μλ₯Ό μμ±
|
28 |
+
def get_text_file(docs):
|
29 |
try:
|
30 |
+
# μμ λλ ν 리 μμ±
|
31 |
+
temp_dir = tempfile.TemporaryDirectory()
|
32 |
+
|
33 |
+
# μμ νμΌ μμ±
|
34 |
+
temp_file = tempfile.NamedTemporaryFile(dir=temp_dir.name, suffix=".txt", delete=False)
|
35 |
+
|
36 |
+
# docsλ‘ μ λ¬λ νμΌ λ΄μ©μ μμ νμΌμ μ
|
37 |
+
for file in docs:
|
38 |
+
# νμΌμμ ν
μ€νΈλ₯Ό μ½μ΄μ μμ νμΌμ μ
|
39 |
+
text_content = file.read().decode('utf-8') # νμΌμμ ν
μ€νΈ μ½κΈ°
|
40 |
+
temp_file.write(text_content.encode('utf-8')) # μμ νμΌμ ν
μ€νΈ μ°κΈ°
|
41 |
+
|
42 |
+
# μμ νμΌ μ½κΈ°
|
43 |
+
temp_file.seek(0) # νμΌ ν¬μΈν°λ₯Ό νμΌμ μμμΌλ‘ μ΄λ
|
44 |
+
extracted_text = temp_file.read().decode('utf-8') # μμ νμΌμμ ν
μ€νΈ μ½κΈ°
|
45 |
+
|
46 |
+
# μμ νμΌ λ° λλ ν 리 μμ
|
47 |
+
temp_file.close()
|
48 |
+
temp_dir.cleanup()
|
49 |
+
|
50 |
+
return extracted_text
|
51 |
|
|
|
52 |
|
53 |
def get_csv_file(docs):
|
54 |
pass
|