Hammad712 commited on
Commit
7f269b9
·
verified ·
1 Parent(s): afb89fd

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +75 -0
main.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from typing import List
4
+ from fastapi import FastAPI, UploadFile, File, HTTPException
5
+ from pydantic import BaseModel
6
+ from langchain_groq import ChatGroq
7
+ from langchain.document_loaders import PyPDFLoader
8
+
9
+ # Load API key securely from environment variable
10
+ API_KEY = os.getenv("GROQ_API_KEY")
11
+ if not API_KEY:
12
+ raise ValueError("GROQ_API_KEY environment variable not set.")
13
+
14
+ app = FastAPI(title="PDF Question Extractor", version="1.0")
15
+
16
+ # Pydantic model for response
17
+ class ExtractionResult(BaseModel):
18
+ answers: List[str]
19
+
20
+ # Initialize LLM
21
+ def get_llm():
22
+ return ChatGroq(
23
+ model="llama-3.3-70b-versatile",
24
+ temperature=0,
25
+ max_tokens=1024,
26
+ api_key=API_KEY
27
+ )
28
+
29
+ llm = get_llm()
30
+
31
+ @app.post("/extract-answers/")
32
+ async def extract_answers(file: UploadFile = File(...)):
33
+ try:
34
+ # Save the uploaded file temporarily
35
+ file_path = f"./temp_{file.filename}"
36
+ with open(file_path, "wb") as buffer:
37
+ buffer.write(file.file.read())
38
+
39
+ # Load and extract text from PDF
40
+ loader = PyPDFLoader(file_path)
41
+ pages = loader.load_and_split()
42
+ all_page_content = "\n".join(page.page_content for page in pages)
43
+
44
+ # JSON schema definition
45
+ schema_dict = ExtractionResult.model_json_schema()
46
+ schema = json.dumps(schema_dict, indent=2)
47
+
48
+ # System message
49
+ system_message = (
50
+ "You are a document analysis tool that extracts the options and correct answers from the provided document content. "
51
+ "The output must be a JSON object that strictly follows the schema: " + schema
52
+ )
53
+
54
+ # User message
55
+ user_message = (
56
+ "Please extract the correct answers and options (A, B, C, D, E) from the following document content:\n\n"
57
+ + all_page_content
58
+ )
59
+
60
+ # Construct final prompt
61
+ prompt = system_message + "\n\n" + user_message
62
+
63
+ # Get LLM response
64
+ response = llm.invoke(prompt, response_format={"type": "json_object"})
65
+
66
+ # Parse and validate response
67
+ result = ExtractionResult.model_validate_json(response.content)
68
+
69
+ # Cleanup
70
+ os.remove(file_path)
71
+
72
+ return result.model_dump()
73
+
74
+ except Exception as e:
75
+ raise HTTPException(status_code=500, detail=str(e))