Hammad712 commited on
Commit
7fa9057
·
verified ·
1 Parent(s): 610d668

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +55 -73
main.py CHANGED
@@ -1,81 +1,63 @@
1
  import os
2
- import json
3
- from typing import List
4
  from fastapi import FastAPI, UploadFile, File, HTTPException
5
- from pydantic import BaseModel
6
- from langchain_groq import ChatGroq
7
- from langchain.document_loaders import PyPDFLoader
8
 
9
- # Securely load your Groq API key from environment variables
10
- API_KEY = os.getenv("GROQ_API_KEY")
11
- if not API_KEY:
12
- raise ValueError("GROQ_API_KEY environment variable not set.")
13
 
14
- app = FastAPI(title="PDF Question Extractor", version="1.0")
 
 
 
15
 
16
- # Define the expected JSON response schema
17
- class ExtractionResult(BaseModel):
18
- answers: List[str]
19
-
20
- # Initialize the language model (LLM)
21
- def get_llm():
22
- return ChatGroq(
23
- model="llama-3.3-70b-versatile",
24
- temperature=0,
25
- max_tokens=1024,
26
- api_key=API_KEY
27
- )
28
-
29
- llm = get_llm()
30
-
31
- # Root endpoint: Provides a welcome message and instructions
32
- @app.get("/")
33
- async def root():
34
- return {
35
- "message": "Welcome to the PDF Question Extractor API.",
36
- "usage": "POST your PDF to /extract-answers/ to extract answers."
37
- }
38
-
39
- # PDF extraction endpoint: Processes a PDF file upload
40
- @app.post("/extract-answers/")
41
- async def extract_answers(file: UploadFile = File(...)):
42
  try:
43
- # Save the uploaded file temporarily
44
- file_path = f"./temp_{file.filename}"
45
- with open(file_path, "wb") as buffer:
46
- buffer.write(file.file.read())
47
-
48
- # Load and split the PDF into pages
49
- loader = PyPDFLoader(file_path)
50
- pages = loader.load_and_split()
51
- all_page_content = "\n".join(page.page_content for page in pages)
52
-
53
- # Generate the JSON schema from the Pydantic model
54
- schema_dict = ExtractionResult.model_json_schema()
55
- schema = json.dumps(schema_dict, indent=2)
56
-
57
- # Build the prompt with system and user messages
58
- system_message = (
59
- "You are a document analysis tool that extracts the options and correct answers "
60
- "from the provided document content. The output must be a JSON object that strictly follows the schema: "
61
- + schema
62
- )
63
- user_message = (
64
- "Please extract the correct answers and options (A, B, C, D, E) from the following document content:\n\n"
65
- + all_page_content
66
- )
67
- prompt = system_message + "\n\n" + user_message
68
-
69
- # Invoke the LLM and request a JSON response
70
- response = llm.invoke(prompt, response_format={"type": "json_object"})
71
-
72
- # Validate and parse the JSON response using Pydantic
73
- result = ExtractionResult.model_validate_json(response.content)
74
-
75
- # Cleanup the temporary file
76
- os.remove(file_path)
77
-
78
- return result.model_dump()
79
-
80
  except Exception as e:
81
  raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import shutil
 
3
  from fastapi import FastAPI, UploadFile, File, HTTPException
4
+ from fastapi.responses import JSONResponse
5
+ from pdf_processor import extract_answers_from_pdf, evaluate_student
 
6
 
7
+ app = FastAPI()
 
 
 
8
 
9
+ # Directory to temporarily store uploaded files.
10
+ UPLOAD_DIR = "uploads"
11
+ if not os.path.exists(UPLOAD_DIR):
12
+ os.makedirs(UPLOAD_DIR)
13
 
14
+ @app.post("/extract/")
15
+ async def extract_pdf(file: UploadFile = File(...)):
16
+ """
17
+ Endpoint to extract answers from a PDF file.
18
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  try:
20
+ file_location = os.path.join(UPLOAD_DIR, file.filename)
21
+ with open(file_location, "wb") as f:
22
+ shutil.copyfileobj(file.file, f)
23
+
24
+ result = extract_answers_from_pdf(file_location)
25
+ return JSONResponse(content=result.model_dump())
26
+ except Exception as e:
27
+ raise HTTPException(status_code=500, detail=str(e))
28
+ finally:
29
+ if os.path.exists(file_location):
30
+ os.remove(file_location)
31
+
32
+ @app.post("/evaluate/")
33
+ async def evaluate_pdfs(answer_key_file: UploadFile = File(...), student_file: UploadFile = File(...)):
34
+ """
35
+ Endpoint to evaluate student answers by comparing the answer key and student's answer PDFs.
36
+ """
37
+ try:
38
+ answer_key_path = os.path.join(UPLOAD_DIR, answer_key_file.filename)
39
+ student_path = os.path.join(UPLOAD_DIR, student_file.filename)
40
+
41
+ with open(answer_key_path, "wb") as f:
42
+ shutil.copyfileobj(answer_key_file.file, f)
43
+ with open(student_path, "wb") as f:
44
+ shutil.copyfileobj(student_file.file, f)
45
+
46
+ # Extract answers from both PDFs.
47
+ answer_key_result = extract_answers_from_pdf(answer_key_path)
48
+ student_result = extract_answers_from_pdf(student_path)
49
+
50
+ # Evaluate the student answers.
51
+ evaluation = evaluate_student(answer_key_result, student_result)
52
+ return JSONResponse(content=evaluation.model_dump())
 
 
 
 
53
  except Exception as e:
54
  raise HTTPException(status_code=500, detail=str(e))
55
+ finally:
56
+ if os.path.exists(answer_key_path):
57
+ os.remove(answer_key_path)
58
+ if os.path.exists(student_path):
59
+ os.remove(student_path)
60
+
61
+ if __name__ == "__main__":
62
+ import uvicorn
63
+ uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)