DrishtiSharma commited on
Commit
2c2a658
·
verified ·
1 Parent(s): 94c2133

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -1
app.py CHANGED
@@ -54,7 +54,34 @@ def extract_metadata_llm(pdf_path):
54
  with pdfplumber.open(pdf_path) as pdf:
55
  first_page_text = pdf.pages[0].extract_text() if pdf.pages else "No text found."
56
 
57
- # Run LLM Metadata Extraction
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  metadata_chain = LLMChain(llm=llm_judge, prompt=metadata_prompt, output_key="metadata")
59
  metadata_response = metadata_chain.invoke({"text": first_page_text})
60
 
 
54
  with pdfplumber.open(pdf_path) as pdf:
55
  first_page_text = pdf.pages[0].extract_text() if pdf.pages else "No text found."
56
 
57
+ #Define metadata_prompt
58
+ metadata_prompt = PromptTemplate(
59
+ input_variables=["text"],
60
+ template="""
61
+ Given the following first page of a research paper, extract metadata **strictly in JSON format**.
62
+
63
+ - If no data is found for a field, return `"Unknown"` instead.
64
+ - The response must be valid JSON.
65
+
66
+ Example output:
67
+ ```json
68
+ {
69
+ "Title": "Example Paper Title",
70
+ "Author": "John Doe, Jane Smith",
71
72
+ "Affiliations": "School of AI, University of Example"
73
+ }
74
+ ```
75
+
76
+ Now, extract the metadata from this document:
77
+
78
+ ```
79
+ {text}
80
+ ```
81
+ """
82
+ )
83
+
84
+ # ✅ Now metadata_prompt exists when used in LLMChain
85
  metadata_chain = LLMChain(llm=llm_judge, prompt=metadata_prompt, output_key="metadata")
86
  metadata_response = metadata_chain.invoke({"text": first_page_text})
87