Spaces:

DrishtiSharma
/

docqa-with-deepseek-r1

Build error

DrishtiSharma commited on Feb 14

Commit

2c2a658

verified ·

1 Parent(s): 94c2133

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -54,7 +54,34 @@ def extract_metadata_llm(pdf_path):
     with pdfplumber.open(pdf_path) as pdf:
         first_page_text = pdf.pages[0].extract_text() if pdf.pages else "No text found."
-    # Run LLM Metadata Extraction
     metadata_chain = LLMChain(llm=llm_judge, prompt=metadata_prompt, output_key="metadata")
     metadata_response = metadata_chain.invoke({"text": first_page_text})

     with pdfplumber.open(pdf_path) as pdf:
         first_page_text = pdf.pages[0].extract_text() if pdf.pages else "No text found."
+    #Define metadata_prompt
+    metadata_prompt = PromptTemplate(
+        input_variables=["text"],
+        template="""
+        Given the following first page of a research paper, extract metadata **strictly in JSON format**.
+        - If no data is found for a field, return `"Unknown"` instead.
+        - The response must be valid JSON.
+        Example output:
+        ```json
+        {
+            "Title": "Example Paper Title",
+            "Author": "John Doe, Jane Smith",
+            "Emails": "[email protected], [email protected]",
+            "Affiliations": "School of AI, University of Example"
+        }
+        ```
+        Now, extract the metadata from this document:
+        ```
+        {text}
+        ```
+        """
+    )
+    # ✅ Now metadata_prompt exists when used in LLMChain
     metadata_chain = LLMChain(llm=llm_judge, prompt=metadata_prompt, output_key="metadata")
     metadata_response = metadata_chain.invoke({"text": first_page_text})