Spaces:

Nasma
/

ocrgpt

Sleeping

App Files Files Community

Nasma commited on Jan 29

Commit

a952212

verified ·

1 Parent(s): b304fd9

Create main.py

Browse files

Files changed (1) hide show

main.py +142 -0

main.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import base64
+import os
+import io
+import json
+from fastapi import FastAPI, HTTPException, File, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
+from PyPDF2 import PdfReader
+from PIL import Image
+import fitz  # PyMuPDF
+from dotenv import load_dotenv
+import openai
+#from openai import OpenAI
+# Load environment variables
+load_dotenv()
+openai.api_key = os.environ["OPENAI_API_KEY"]
+#client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
+if not openai.api_key:
+    raise RuntimeError("Missing OpenAI API key. Please set OPENAI_API_KEY in the environment variables.")
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+def vision(file_content):
+    pdf_document = fitz.open("pdf", file_content)
+    base64_images = []
+    vision_data = [ {
+                    "type": "text",
+                    "text": "extract the all text from this images",
+                    }
+                  ]
+    # Convert PDF pages to images
+    for page_num in range(len(pdf_document)):
+        page = pdf_document.load_page(page_num)
+        pix = page.get_pixmap()
+        img_bytes = pix.tobytes("png")
+        img = Image.open(io.BytesIO(img_bytes))
+        # Convert the image to base64
+        buffered = io.BytesIO()
+        img.save(buffered, format="PNG")
+        img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
+        base64_images.append(img_base64)
+        vision_data.append(
+                        {
+                            "type": "image_url",
+                            "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"},
+                        }
+                        )
+    print("PDF pages converted to images successfully!")
+    # Use GPT-4 to process the images (textual representation)
+    try:
+        response = openai.ChatCompletion.create(
+            model="gpt-4o-mini",
+            messages=[
+                {
+                    "role": "user",
+                    "content": vision_data,
+                }
+            ],
+        )
+        print(response.choices[0]["message"]["content"])
+        return response.choices[0]["message"]["content"]
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error generating response: {str(e)}")
+@app.post("/get_ocr_data/")
+def get_data(input_file: UploadFile = File(...)):
+    #try:
+        # Read the uploaded file
+        file_content = input_file.file.read()
+        file_type = input_file.content_type
+        text = ""
+        if file_type == "application/pdf":
+            pdf_reader = PdfReader(io.BytesIO(file_content))
+            for page in pdf_reader.pages:
+                text += page.extract_text()
+            if len(text.strip()):  # If PDF text extraction is insufficient
+                print("\nvision running..........................\n")
+                text = vision(file_content)
+        else:
+            raise HTTPException(status_code=400, detail="Unsupported file type")
+        print(text.strip())
+        # Call GPT-4o to process extracted text into structured JSON
+        prompt = f"""This is CV data: {text.strip()}.
+        IMPORTANT: The output should be a JSON array! Make Sure the JSON is valid.if you not found data then fill with "none" don't add any extra explaition text
+        need only json
+        Example Output:
+        ```json
+        [
+           "firstname": "firstname",
+           "lastname": "lastname",
+           "email": "email",
+           "contact_number": "contact number",
+           "home_address": "full home address",
+           "home_town": "home town or city",
+           "total_years_of_experience": "total years of experience",
+           "education": "Institution Name, Country, Degree Name, Graduation Year; Institution Name, Country, Degree Name, Graduation Year",
+           "LinkedIn_link": "LinkedIn link",
+           "experience": "experience",
+           "industry": "industry of work",
+           "skills": "skills(Identify and list specific skills mentioned in both the skills section and inferred from the experience section), formatted as: Skill 1, Skill 2, Skill 3, Skill 4, Skill 5",
+           "positions": ["Job title 1, Job title 2, Job title 3"]
+        ]
+        ```
+        """
+        response = openai.ChatCompletion.create(
+            model="gpt-4o",
+            messages=[
+                {"role": "system", "content": """You are an assistant processing CV data and formatting it into structured JSON."""
+                },
+                {"role": "user", "content": prompt}
+            ]
+        )
+        data = (response["choices"][0]["message"]["content"]).replace("json","").replace("```","")
+        print(data)
+        data = json.loads(data)
+        #data = response["choices"][0]["message"]["content"]
+        return {"data": data}
+    #except Exception as e:
+        #raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")