ocr_api2 / main.py
Arafath10's picture
Update main.py
3b59cf8 verified
raw
history blame
3.22 kB
try: from pip._internal.operations import freeze
except ImportError: # pip < 10.0
from pip.operations import freeze
pkgs = freeze.freeze()
for pkg in pkgs: print(pkg)
import os
from fastapi import FastAPI, HTTPException, File, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from PyPDF2 import PdfReader
import google.generativeai as genai
import json
import base64
from io import BytesIO
from PIL import Image
import io
import requests
import fitz # PyMuPDF
import os
from dotenv import load_dotenv
# Load the environment variables from the .env file
load_dotenv()
# Configure Gemini API
genai.configure(api_key="AIzaSyBsutShR1tNNdomkaL3DYHjMrM_59Y1mg8")
#secret = os.environ["GEMINI"]
#genai.configure(api_key=secret)
model_vision = genai.GenerativeModel('gemini-1.5-flash')
model_text = genai.GenerativeModel('gemini-pro')
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
def vision(file_content):
# Open the PDF
pdf_document = fitz.open("pdf",pdf)
gemini_input = ["extract the whole text"]
# Iterate through the pages
for page_num in range(len(pdf_document)):
# Select the page
page = pdf_document.load_page(page_num)
# Render the page to a pixmap (image)
pix = page.get_pixmap()
print(type(pix))
# Convert the pixmap to bytes
img_bytes = pix.tobytes("png")
# Convert bytes to a PIL Image
img = Image.open(io.BytesIO(img_bytes))
gemini_input.append(img)
# # Save the image if needed
# img.save(f'page_{page_num + 1}.png')
print("PDF pages converted to images successfully!")
# Now you can pass the PIL image to the model_vision
response = model_vision.generate_content(gemini_input).text
return response
@app.post("/get_ocr_data/")
async def get_data(input_file: UploadFile = File(...)):
try:
# Determine the file type by reading the first few bytes
file_content = await input_file.read()
file_type = input_file.content_type
text = ""
if file_type == "application/pdf":
if text=="":
text = vision(file_content)
# else:
# # Read PDF file using PyPDF2
# pdf_reader = PdfReader(io.BytesIO(file_content))
# for page in pdf_reader.pages:
# text += page.extract_text()
else:
raise HTTPException(status_code=400, detail="Unsupported file type")
# Call Gemini (or another model) to extract required data
prompt = f"""This is CV data: {text.strip()}
I want only:
firstname, lastname, contact number, total years of experience, LinkedIn link, experience, skills
in JSON format only"""
response = model_text.generate_content(prompt)
data = json.loads(response.text.replace("```json", "").replace("```", ""))
return {"data": data}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")