Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -54,7 +54,34 @@ def extract_metadata_llm(pdf_path):
|
|
54 |
with pdfplumber.open(pdf_path) as pdf:
|
55 |
first_page_text = pdf.pages[0].extract_text() if pdf.pages else "No text found."
|
56 |
|
57 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
metadata_chain = LLMChain(llm=llm_judge, prompt=metadata_prompt, output_key="metadata")
|
59 |
metadata_response = metadata_chain.invoke({"text": first_page_text})
|
60 |
|
|
|
54 |
with pdfplumber.open(pdf_path) as pdf:
|
55 |
first_page_text = pdf.pages[0].extract_text() if pdf.pages else "No text found."
|
56 |
|
57 |
+
#Define metadata_prompt
|
58 |
+
metadata_prompt = PromptTemplate(
|
59 |
+
input_variables=["text"],
|
60 |
+
template="""
|
61 |
+
Given the following first page of a research paper, extract metadata **strictly in JSON format**.
|
62 |
+
|
63 |
+
- If no data is found for a field, return `"Unknown"` instead.
|
64 |
+
- The response must be valid JSON.
|
65 |
+
|
66 |
+
Example output:
|
67 |
+
```json
|
68 |
+
{
|
69 |
+
"Title": "Example Paper Title",
|
70 |
+
"Author": "John Doe, Jane Smith",
|
71 |
+
"Emails": "[email protected], [email protected]",
|
72 |
+
"Affiliations": "School of AI, University of Example"
|
73 |
+
}
|
74 |
+
```
|
75 |
+
|
76 |
+
Now, extract the metadata from this document:
|
77 |
+
|
78 |
+
```
|
79 |
+
{text}
|
80 |
+
```
|
81 |
+
"""
|
82 |
+
)
|
83 |
+
|
84 |
+
# ✅ Now metadata_prompt exists when used in LLMChain
|
85 |
metadata_chain = LLMChain(llm=llm_judge, prompt=metadata_prompt, output_key="metadata")
|
86 |
metadata_response = metadata_chain.invoke({"text": first_page_text})
|
87 |
|