ocr_api2

Sleeping

App Files Files Community

ocr_api2 / main.py

Nasma

Update main.py

acf9f09 verified 6 months ago

raw

history blame

7.94 kB

	# try: from pip._internal.operations import freeze
	# except ImportError: # pip < 10.0
	# from pip.operations import freeze

	# pkgs = freeze.freeze()
	# for pkg in pkgs: print(pkg)
	# import os
	# from fastapi import FastAPI, HTTPException, File, UploadFile,Query
	# from fastapi.middleware.cors import CORSMiddleware
	# from PyPDF2 import PdfReader
	# import google.generativeai as genai
	# import json
	# import base64
	# from io import BytesIO
	# from PIL import Image
	# import io
	# import requests
	# import fitz # PyMuPDF
	# import os


	# from dotenv import load_dotenv
	# # Load the environment variables from the .env file
	# load_dotenv()

	# # Configure Gemini API
	# secret = os.environ["GEMINI"]
	# genai.configure(api_key=secret)
	# model_vision = genai.GenerativeModel('gemini-1.5-flash')
	# model_text = genai.GenerativeModel('gemini-pro')






	# app = FastAPI()

	# app.add_middleware(
	# CORSMiddleware,
	# allow_origins=["*"],
	# allow_credentials=True,
	# allow_methods=["*"],
	# allow_headers=["*"],
	# )





	# def vision(file_content):
	# # Open the PDF
	# pdf_document = fitz.open("pdf",file_content)
	# gemini_input = ["extract the whole text"]
	# # Iterate through the pages
	# for page_num in range(len(pdf_document)):
	# # Select the page
	# page = pdf_document.load_page(page_num)

	# # Render the page to a pixmap (image)
	# pix = page.get_pixmap()
	# print(type(pix))

	# # Convert the pixmap to bytes
	# img_bytes = pix.tobytes("png")

	# # Convert bytes to a PIL Image
	# img = Image.open(io.BytesIO(img_bytes))
	# gemini_input.append(img)
	# # # Save the image if needed
	# # img.save(f'page_{page_num + 1}.png')

	# print("PDF pages converted to images successfully!")

	# # Now you can pass the PIL image to the model_vision
	# response = model_vision.generate_content(gemini_input).text
	# return response


	# @app.post("/get_ocr_data/")
	# async def get_data(input_file: UploadFile = File(...)):
	# #try:
	# # Determine the file type by reading the first few bytes
	# file_content = await input_file.read()
	# file_type = input_file.content_type

	# text = ""

	# if file_type == "application/pdf":
	# # Read PDF file using PyPDF2
	# pdf_reader = PdfReader(io.BytesIO(file_content))
	# for page in pdf_reader.pages:
	# text += page.extract_text()

	# if len(text)<10:
	# print("vision called")
	# text = vision(file_content)
	# else:
	# raise HTTPException(status_code=400, detail="Unsupported file type")

	# # Call Gemini (or another model) to extract required data
	# prompt = f"""This is CV data: {text.strip()}
	# IMPORTANT: The output should be a JSON array! Make Sure the JSON is valid.

	# Example Output:
	# [
	# "firstname" : "firstname",
	# "lastname" : "lastname",
	# "gender" : "gender",
	# "email" : "email",
	# "contact_number" : "contact number",
	# "age" : "age",
	# "home_address" : "full home address",
	# "home_town" : "home town or city",
	# "total_years_of_experience" : "total years of experience",
	# "LinkedIn_link" : "LinkedIn link",
	# "positions": [ "Job title 1", "Job title 2", "Job title 3" ],
	# "industry": "industry of work",
	# "experience" : "experience",
	# "skills" : Skills(Identify and list specific skills mentioned in both the skills section and inferred from the experience section)
	# ]
	# """

	# response = model_text.generate_content(prompt)
	# print(response.text)
	# data = json.loads(response.text.replace("JSON", "").replace("json", "").replace("```", ""))
	# return {"data": data}

	# #except Exception as e:
	# #raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")

	from fastapi import FastAPI, HTTPException, File, UploadFile, Query
	from fastapi.middleware.cors import CORSMiddleware
	from PyPDF2 import PdfReader
	import google.generativeai as genai
	import json
	from PIL import Image
	import io
	import fitz # PyMuPDF
	import os
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()
	secret = os.environ["GEMINI"]
	genai.configure(api_key=secret)
	model_vision = genai.GenerativeModel('gemini-1.5-flash')
	model_text = genai.GenerativeModel('gemini-pro')

	app = FastAPI()

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	def process_pdf_text(file_content):
	"""Extract text from PDF using PyPDF2."""
	pdf_reader = PdfReader(io.BytesIO(file_content))
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text

	def process_pdf_images(file_content):
	"""Extract images from PDF and pass to Gemini Vision."""
	pdf_document = fitz.open("pdf", file_content)
	gemini_input = []

	for page_num in range(len(pdf_document)):
	page = pdf_document.load_page(page_num)
	pix = page.get_pixmap()
	img_bytes = pix.tobytes("png")
	img = Image.open(io.BytesIO(img_bytes))
	gemini_input.append(img)

	# Call Gemini Vision with extracted images
	response = model_vision.generate_content(["extract the whole text", *gemini_input])
	return response.text

	@app.post("/get_ocr_data/")
	async def get_data(user_id: str = Query(...), input_file: UploadFile = File(...)):
	try:
	file_content = await input_file.read()
	file_type = input_file.content_type

	if file_type != "application/pdf":
	raise HTTPException(status_code=400, detail="Unsupported file type")

	# Process PDF
	text = process_pdf_text(file_content)
	if len(text.strip()) < 10: # Fallback to image-based OCR if text is minimal
	text = process_pdf_images(file_content)

	# Call Gemini Text model
	prompt = f"""
	This is CV data: {text.strip()}
	IMPORTANT: The output should be a JSON array! Make sure the JSON is valid.
	Example Output:
	[
	"firstname" : "firstname",
	"lastname" : "lastname",
	"email" : "email",
	"contact_number" : "contact number",
	"home_address" : "full home address",
	"home_town" : "home town or city",
	"total_years_of_experience" : "total years of experience",
	"education": "Institution Name, Degree Name",
	"LinkedIn_link" : "LinkedIn link",
	"experience" : "experience",
	"industry": "industry of work",
	"skills" : skills(Identify and list specific skills mentioned in both the skills section and inferred from the experience section),
	"positions": [ "Job title 1", "Job title 2", "Job title 3" ],
	"summary": "Generate a summary of the CV, including key qualifications, notable experiences, and relevant skills."
	]
	"""
	response = model_text.generate_content(prompt)
	data = json.loads(response.text.replace("```", "")) # Sanitize response
	return {"data": data}

	# except Exception as e:
	# raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")