Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -61,7 +61,11 @@ def extract_text_from_docx(docx_path):
|
|
61 |
return text
|
62 |
|
63 |
|
64 |
-
def process_and_query(text, file=None):
|
|
|
|
|
|
|
|
|
65 |
# Check if a file is uploaded
|
66 |
if file:
|
67 |
# Get the uploaded file content
|
@@ -69,40 +73,24 @@ def process_and_query(text, file=None):
|
|
69 |
if file.filename.endswith('.pdf'):
|
70 |
with open("temp.pdf", "wb") as f:
|
71 |
f.write(content)
|
72 |
-
|
73 |
elif file.filename.endswith('.docx'):
|
74 |
with open("temp.docx", "wb") as f:
|
75 |
f.write(content)
|
76 |
-
|
77 |
else:
|
78 |
return {"error": "Unsupported file format"}
|
79 |
|
80 |
-
#
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
query_embedding = embedding_model.encode([text])
|
87 |
-
D, I = index.search(np.array(query_embedding), k=5)
|
88 |
-
|
89 |
-
top_documents = []
|
90 |
-
for idx in I[0]:
|
91 |
-
if idx != -1: # Ensure that a valid index is found
|
92 |
-
top_documents.append(f"Document {idx}")
|
93 |
-
|
94 |
-
# Generate response using LLM (optional, commented out)
|
95 |
-
# You can replace this with your desired LLM interaction logic
|
96 |
-
# response = llm.run(inputs=text, max_length=100, temperature=0.7)["generated_text"]
|
97 |
-
|
98 |
-
return {"top_documents": top_documents, "response": None} # Response from LLM (optional)
|
99 |
|
|
|
|
|
100 |
|
101 |
-
# Define the Gradio interface
|
102 |
-
interface = gr.Interface(
|
103 |
-
fn=process_and_query,
|
104 |
-
inputs={"text": gr.Textbox(label="Enter text or upload a file"),
|
105 |
-
"file": gr.FileUpload(label="Upload File (PDF or Word
|
106 |
|
107 |
|
108 |
|
|
|
61 |
return text
|
62 |
|
63 |
|
64 |
+
def process_and_query(state, text, file=None):
|
65 |
+
# Initialize state on first run
|
66 |
+
if state is None:
|
67 |
+
state = {"processed_text": None, "conversation": []}
|
68 |
+
|
69 |
# Check if a file is uploaded
|
70 |
if file:
|
71 |
# Get the uploaded file content
|
|
|
73 |
if file.filename.endswith('.pdf'):
|
74 |
with open("temp.pdf", "wb") as f:
|
75 |
f.write(content)
|
76 |
+
state["processed_text"] = extract_text_from_pdf("temp.pdf")
|
77 |
elif file.filename.endswith('.docx'):
|
78 |
with open("temp.docx", "wb") as f:
|
79 |
f.write(content)
|
80 |
+
state["processed_text"] = extract_text_from_docx("temp.docx")
|
81 |
else:
|
82 |
return {"error": "Unsupported file format"}
|
83 |
|
84 |
+
# Handle user question
|
85 |
+
if state["processed_text"] and text:
|
86 |
+
# Process the question and potentially use LLM for answering (optional)
|
87 |
+
question_embedding = embedding_model.encode([text])
|
88 |
+
# ... (logic to search the index and potentially use LLM for answering)
|
89 |
+
answer = "Answer retrieved from the document based on your question." # Placeholder answer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
+
# Update conversation history
|
92 |
+
state["conversation"].append({"question": text,
|
93 |
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
|
96 |
|