Hammad712 commited on
Commit
16aaeed
·
verified ·
1 Parent(s): b7f6319

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +21 -15
main.py CHANGED
@@ -6,18 +6,18 @@ from pydantic import BaseModel
6
  from langchain_groq import ChatGroq
7
  from langchain.document_loaders import PyPDFLoader
8
 
9
- # Load API key securely from environment variable
10
  API_KEY = os.getenv("GROQ_API_KEY")
11
  if not API_KEY:
12
  raise ValueError("GROQ_API_KEY environment variable not set.")
13
 
14
  app = FastAPI(title="PDF Question Extractor", version="1.0")
15
 
16
- # Pydantic model for response
17
  class ExtractionResult(BaseModel):
18
  answers: List[str]
19
 
20
- # Initialize LLM
21
  def get_llm():
22
  return ChatGroq(
23
  model="llama-3.3-70b-versatile",
@@ -28,6 +28,15 @@ def get_llm():
28
 
29
  llm = get_llm()
30
 
 
 
 
 
 
 
 
 
 
31
  @app.post("/extract-answers/")
32
  async def extract_answers(file: UploadFile = File(...)):
33
  try:
@@ -36,37 +45,34 @@ async def extract_answers(file: UploadFile = File(...)):
36
  with open(file_path, "wb") as buffer:
37
  buffer.write(file.file.read())
38
 
39
- # Load and extract text from PDF
40
  loader = PyPDFLoader(file_path)
41
  pages = loader.load_and_split()
42
  all_page_content = "\n".join(page.page_content for page in pages)
43
 
44
- # JSON schema definition
45
  schema_dict = ExtractionResult.model_json_schema()
46
  schema = json.dumps(schema_dict, indent=2)
47
 
48
- # System message
49
  system_message = (
50
- "You are a document analysis tool that extracts the options and correct answers from the provided document content. "
51
- "The output must be a JSON object that strictly follows the schema: " + schema
 
52
  )
53
-
54
- # User message
55
  user_message = (
56
  "Please extract the correct answers and options (A, B, C, D, E) from the following document content:\n\n"
57
  + all_page_content
58
  )
59
-
60
- # Construct final prompt
61
  prompt = system_message + "\n\n" + user_message
62
 
63
- # Get LLM response
64
  response = llm.invoke(prompt, response_format={"type": "json_object"})
65
 
66
- # Parse and validate response
67
  result = ExtractionResult.model_validate_json(response.content)
68
 
69
- # Cleanup
70
  os.remove(file_path)
71
 
72
  return result.model_dump()
 
6
  from langchain_groq import ChatGroq
7
  from langchain.document_loaders import PyPDFLoader
8
 
9
+ # Securely load your Groq API key from environment variables
10
  API_KEY = os.getenv("GROQ_API_KEY")
11
  if not API_KEY:
12
  raise ValueError("GROQ_API_KEY environment variable not set.")
13
 
14
  app = FastAPI(title="PDF Question Extractor", version="1.0")
15
 
16
+ # Define the expected JSON response schema
17
  class ExtractionResult(BaseModel):
18
  answers: List[str]
19
 
20
+ # Initialize the language model (LLM)
21
  def get_llm():
22
  return ChatGroq(
23
  model="llama-3.3-70b-versatile",
 
28
 
29
  llm = get_llm()
30
 
31
+ # Root endpoint: Provides a welcome message and instructions
32
+ @app.get("/")
33
+ async def root():
34
+ return {
35
+ "message": "Welcome to the PDF Question Extractor API.",
36
+ "usage": "POST your PDF to /extract-answers/ to extract answers."
37
+ }
38
+
39
+ # PDF extraction endpoint: Processes a PDF file upload
40
  @app.post("/extract-answers/")
41
  async def extract_answers(file: UploadFile = File(...)):
42
  try:
 
45
  with open(file_path, "wb") as buffer:
46
  buffer.write(file.file.read())
47
 
48
+ # Load and split the PDF into pages
49
  loader = PyPDFLoader(file_path)
50
  pages = loader.load_and_split()
51
  all_page_content = "\n".join(page.page_content for page in pages)
52
 
53
+ # Generate the JSON schema from the Pydantic model
54
  schema_dict = ExtractionResult.model_json_schema()
55
  schema = json.dumps(schema_dict, indent=2)
56
 
57
+ # Build the prompt with system and user messages
58
  system_message = (
59
+ "You are a document analysis tool that extracts the options and correct answers "
60
+ "from the provided document content. The output must be a JSON object that strictly follows the schema: "
61
+ + schema
62
  )
 
 
63
  user_message = (
64
  "Please extract the correct answers and options (A, B, C, D, E) from the following document content:\n\n"
65
  + all_page_content
66
  )
 
 
67
  prompt = system_message + "\n\n" + user_message
68
 
69
+ # Invoke the LLM and request a JSON response
70
  response = llm.invoke(prompt, response_format={"type": "json_object"})
71
 
72
+ # Validate and parse the JSON response using Pydantic
73
  result = ExtractionResult.model_validate_json(response.content)
74
 
75
+ # Cleanup the temporary file
76
  os.remove(file_path)
77
 
78
  return result.model_dump()