ocr_api2 / main.py
Nasma's picture
Update main.py
acf9f09 verified
raw
history blame
7.94 kB
# try: from pip._internal.operations import freeze
# except ImportError: # pip < 10.0
# from pip.operations import freeze
# pkgs = freeze.freeze()
# for pkg in pkgs: print(pkg)
# import os
# from fastapi import FastAPI, HTTPException, File, UploadFile,Query
# from fastapi.middleware.cors import CORSMiddleware
# from PyPDF2 import PdfReader
# import google.generativeai as genai
# import json
# import base64
# from io import BytesIO
# from PIL import Image
# import io
# import requests
# import fitz # PyMuPDF
# import os
# from dotenv import load_dotenv
# # Load the environment variables from the .env file
# load_dotenv()
# # Configure Gemini API
# secret = os.environ["GEMINI"]
# genai.configure(api_key=secret)
# model_vision = genai.GenerativeModel('gemini-1.5-flash')
# model_text = genai.GenerativeModel('gemini-pro')
# app = FastAPI()
# app.add_middleware(
# CORSMiddleware,
# allow_origins=["*"],
# allow_credentials=True,
# allow_methods=["*"],
# allow_headers=["*"],
# )
# def vision(file_content):
# # Open the PDF
# pdf_document = fitz.open("pdf",file_content)
# gemini_input = ["extract the whole text"]
# # Iterate through the pages
# for page_num in range(len(pdf_document)):
# # Select the page
# page = pdf_document.load_page(page_num)
# # Render the page to a pixmap (image)
# pix = page.get_pixmap()
# print(type(pix))
# # Convert the pixmap to bytes
# img_bytes = pix.tobytes("png")
# # Convert bytes to a PIL Image
# img = Image.open(io.BytesIO(img_bytes))
# gemini_input.append(img)
# # # Save the image if needed
# # img.save(f'page_{page_num + 1}.png')
# print("PDF pages converted to images successfully!")
# # Now you can pass the PIL image to the model_vision
# response = model_vision.generate_content(gemini_input).text
# return response
# @app.post("/get_ocr_data/")
# async def get_data(input_file: UploadFile = File(...)):
# #try:
# # Determine the file type by reading the first few bytes
# file_content = await input_file.read()
# file_type = input_file.content_type
# text = ""
# if file_type == "application/pdf":
# # Read PDF file using PyPDF2
# pdf_reader = PdfReader(io.BytesIO(file_content))
# for page in pdf_reader.pages:
# text += page.extract_text()
# if len(text)<10:
# print("vision called")
# text = vision(file_content)
# else:
# raise HTTPException(status_code=400, detail="Unsupported file type")
# # Call Gemini (or another model) to extract required data
# prompt = f"""This is CV data: {text.strip()}
# IMPORTANT: The output should be a JSON array! Make Sure the JSON is valid.
# Example Output:
# [
# "firstname" : "firstname",
# "lastname" : "lastname",
# "gender" : "gender",
# "email" : "email",
# "contact_number" : "contact number",
# "age" : "age",
# "home_address" : "full home address",
# "home_town" : "home town or city",
# "total_years_of_experience" : "total years of experience",
# "LinkedIn_link" : "LinkedIn link",
# "positions": [ "Job title 1", "Job title 2", "Job title 3" ],
# "industry": "industry of work",
# "experience" : "experience",
# "skills" : Skills(Identify and list specific skills mentioned in both the skills section and inferred from the experience section)
# ]
# """
# response = model_text.generate_content(prompt)
# print(response.text)
# data = json.loads(response.text.replace("JSON", "").replace("json", "").replace("```", ""))
# return {"data": data}
# #except Exception as e:
# #raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")
from fastapi import FastAPI, HTTPException, File, UploadFile, Query
from fastapi.middleware.cors import CORSMiddleware
from PyPDF2 import PdfReader
import google.generativeai as genai
import json
from PIL import Image
import io
import fitz # PyMuPDF
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
secret = os.environ["GEMINI"]
genai.configure(api_key=secret)
model_vision = genai.GenerativeModel('gemini-1.5-flash')
model_text = genai.GenerativeModel('gemini-pro')
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
def process_pdf_text(file_content):
"""Extract text from PDF using PyPDF2."""
pdf_reader = PdfReader(io.BytesIO(file_content))
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
def process_pdf_images(file_content):
"""Extract images from PDF and pass to Gemini Vision."""
pdf_document = fitz.open("pdf", file_content)
gemini_input = []
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
pix = page.get_pixmap()
img_bytes = pix.tobytes("png")
img = Image.open(io.BytesIO(img_bytes))
gemini_input.append(img)
# Call Gemini Vision with extracted images
response = model_vision.generate_content(["extract the whole text", *gemini_input])
return response.text
@app.post("/get_ocr_data/")
async def get_data(user_id: str = Query(...), input_file: UploadFile = File(...)):
try:
file_content = await input_file.read()
file_type = input_file.content_type
if file_type != "application/pdf":
raise HTTPException(status_code=400, detail="Unsupported file type")
# Process PDF
text = process_pdf_text(file_content)
if len(text.strip()) < 10: # Fallback to image-based OCR if text is minimal
text = process_pdf_images(file_content)
# Call Gemini Text model
prompt = f"""
This is CV data: {text.strip()}
IMPORTANT: The output should be a JSON array! Make sure the JSON is valid.
Example Output:
[
"firstname" : "firstname",
"lastname" : "lastname",
"email" : "email",
"contact_number" : "contact number",
"home_address" : "full home address",
"home_town" : "home town or city",
"total_years_of_experience" : "total years of experience",
"education": "Institution Name, Degree Name",
"LinkedIn_link" : "LinkedIn link",
"experience" : "experience",
"industry": "industry of work",
"skills" : skills(Identify and list specific skills mentioned in both the skills section and inferred from the experience section),
"positions": [ "Job title 1", "Job title 2", "Job title 3" ],
"summary": "Generate a summary of the CV, including key qualifications, notable experiences, and relevant skills."
]
"""
response = model_text.generate_content(prompt)
data = json.loads(response.text.replace("```", "")) # Sanitize response
return {"data": data}
# except Exception as e:
# raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")