Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -8,11 +8,11 @@ from transformers import pipeline
|
|
8 |
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
9 |
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
|
10 |
|
11 |
-
def extract_text(
|
12 |
-
if
|
13 |
-
return "\n".join([page.extract_text() or "" for page in PdfReader(
|
14 |
-
elif
|
15 |
-
return "\n".join([p.text for p in docx.Document(
|
16 |
return ""
|
17 |
|
18 |
def chunk_text(text, chunk_size=500):
|
@@ -28,11 +28,11 @@ def chunk_text(text, chunk_size=500):
|
|
28 |
chunks.append(buffer.strip())
|
29 |
return chunks
|
30 |
|
31 |
-
def ask_question(
|
32 |
-
if not
|
33 |
return "Please upload a file.", history
|
34 |
|
35 |
-
text = extract_text(
|
36 |
chunks = chunk_text(text)
|
37 |
emb_chunks = embedder.encode(chunks, convert_to_tensor=True)
|
38 |
emb_question = embedder.encode(question, convert_to_tensor=True)
|
@@ -55,7 +55,7 @@ with gr.Blocks() as demo:
|
|
55 |
file_input = gr.File(
|
56 |
label="Choose a PDF or Word file",
|
57 |
file_types=[".pdf", ".docx"],
|
58 |
-
type="
|
59 |
)
|
60 |
|
61 |
chatbot = gr.Chatbot(label="💬 Chat with Document")
|
|
|
8 |
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
9 |
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
|
10 |
|
11 |
+
def extract_text(file_path):
|
12 |
+
if file_path.endswith(".pdf"):
|
13 |
+
return "\n".join([page.extract_text() or "" for page in PdfReader(file_path).pages])
|
14 |
+
elif file_path.endswith(".docx"):
|
15 |
+
return "\n".join([p.text for p in docx.Document(file_path).paragraphs])
|
16 |
return ""
|
17 |
|
18 |
def chunk_text(text, chunk_size=500):
|
|
|
28 |
chunks.append(buffer.strip())
|
29 |
return chunks
|
30 |
|
31 |
+
def ask_question(file_path, question, history):
|
32 |
+
if not file_path:
|
33 |
return "Please upload a file.", history
|
34 |
|
35 |
+
text = extract_text(file_path)
|
36 |
chunks = chunk_text(text)
|
37 |
emb_chunks = embedder.encode(chunks, convert_to_tensor=True)
|
38 |
emb_question = embedder.encode(question, convert_to_tensor=True)
|
|
|
55 |
file_input = gr.File(
|
56 |
label="Choose a PDF or Word file",
|
57 |
file_types=[".pdf", ".docx"],
|
58 |
+
type="filepath" # ✅ Fixed for Hugging Face
|
59 |
)
|
60 |
|
61 |
chatbot = gr.Chatbot(label="💬 Chat with Document")
|