try: from pip._internal.operations import freeze except ImportError: # pip < 10.0 from pip.operations import freeze pkgs = freeze.freeze() for pkg in pkgs: print(pkg) import os from fastapi import FastAPI, HTTPException, File, UploadFile from fastapi.middleware.cors import CORSMiddleware from PyPDF2 import PdfReader import google.generativeai as genai import json import base64 from io import BytesIO from PIL import Image import io import requests from dotenv import load_dotenv # Load the environment variables from the .env file load_dotenv() secret = os.environ["GEMINI"] genai.configure(api_key=secret) model_vision = genai.GenerativeModel('gemini-pro-vision') model_text = genai.GenerativeModel('gemini-pro') app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) def encode_image(image): # Convert image to BytesIO object (in memory) buffered = BytesIO() image.save(buffered, format=image.format) # Use the original image format (e.g., PNG, JPEG) img_bytes = buffered.getvalue() # Encode image to base64 base64_image = base64.b64encode(img_bytes).decode('utf-8') return base64_image def vision(image): # OpenAI API Key api_key = os.environ["OPEN_AI"] # Getting the base64 string base64_image = encode_image(image) headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}" } payload = { "model": "gpt-4o-mini", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "extract all data from this image" }, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } } ] } ], "max_tokens": 300 } response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) return response.json()['choices'][0]['message']['content'] @app.post("/get_ocr_data/") async def get_data(input_file: UploadFile = File(...)): try: # Determine the file type by reading the first few bytes file_content = await input_file.read() file_type = input_file.content_type text = "" if file_type == "application/pdf": # Read PDF file using PyPDF2 pdf_reader = PdfReader(io.BytesIO(file_content)) for page in pdf_reader.pages: text += page.extract_text() elif file_type in ["image/jpeg", "image/png", "image/jpg"]: # Read Image file using PIL and pytesseract image = Image.open(io.BytesIO(file_content)) text = vision(image) else: raise HTTPException(status_code=400, detail="Unsupported file type") # Call Gemini (or another model) to extract required data prompt = f"""This is CV data: {text.strip()} I want only: firstname, lastname, contact number, total years of experience, LinkedIn link, experience, skills in JSON format only""" response = model_text.generate_content(prompt) data = json.loads(response.text.replace("```json", "").replace("```", "")) return {"data": data} except Exception as e: raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")