Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import
|
2 |
import os
|
3 |
import json
|
4 |
import gradio as gr
|
@@ -16,6 +16,9 @@ from langchain_core.runnables import RunnableParallel, RunnablePassthrough
|
|
16 |
|
17 |
huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
|
18 |
|
|
|
|
|
|
|
19 |
def load_and_split_document(file):
|
20 |
"""Loads and splits the document into pages."""
|
21 |
loader = PyPDFLoader(file.name)
|
@@ -89,14 +92,12 @@ def extract_db_to_excel():
|
|
89 |
data = [{"page_content": doc.page_content, "metadata": json.dumps(doc.metadata)} for doc in documents]
|
90 |
df = pd.DataFrame(data)
|
91 |
|
92 |
-
#
|
93 |
-
|
94 |
-
|
95 |
-
df.to_excel(
|
96 |
-
output.seek(0)
|
97 |
|
98 |
-
|
99 |
-
return (output.getvalue(), "database_output.xlsx")
|
100 |
|
101 |
# Modify the Gradio interface
|
102 |
with gr.Blocks() as demo:
|
|
|
1 |
+
import tempfile
|
2 |
import os
|
3 |
import json
|
4 |
import gradio as gr
|
|
|
16 |
|
17 |
huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
|
18 |
|
19 |
+
# At the beginning of your script
|
20 |
+
os.environ['TMPDIR'] = '/tmp'
|
21 |
+
|
22 |
def load_and_split_document(file):
|
23 |
"""Loads and splits the document into pages."""
|
24 |
loader = PyPDFLoader(file.name)
|
|
|
92 |
data = [{"page_content": doc.page_content, "metadata": json.dumps(doc.metadata)} for doc in documents]
|
93 |
df = pd.DataFrame(data)
|
94 |
|
95 |
+
# Create a temporary file
|
96 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp:
|
97 |
+
excel_path = tmp.name
|
98 |
+
df.to_excel(excel_path, index=False)
|
|
|
99 |
|
100 |
+
return excel_path
|
|
|
101 |
|
102 |
# Modify the Gradio interface
|
103 |
with gr.Blocks() as demo:
|