import logging import cv2 import numpy as np import requests import torch import base64 import gradio as gr from PIL import Image from io import BytesIO from fastapi import FastAPI from models import TextSimilarityRequest from extract_text import extract_text_from_image from text_similarity import analyze_similarity from app import generate_gradio logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') app = FastAPI() @app.post("/text_similarity", summary="Perform images text similarity", response_model=float, tags=["Text Similarities"]) async def text_similarity(request: TextSimilarityRequest): image_info = request.imageInfo key_texts = request.keyTexts similarity_threshold = request.similarityThreshold origin_id = image_info.originId logging.info(f"Checking text similarity for main source with resource id {origin_id}") image = load_image_url(image_info.source) # Extract text from the image using the user's method gpu_available = torch.cuda.is_available() extracted_texts = extract_text_from_image(image, gpu_available) results = analyze_similarity( extracted_texts, key_texts, similarity_threshold=similarity_threshold/100, # Convert percentage to decimal fragment_threshold=100/100 # Convert percentage to decimal ) log_similarity_report(results, origin_id) total_texts = len(key_texts) passed_texts = results["statistics"]["direct_similarity"] + results["statistics"]["combined"] percentage_passed = (passed_texts / total_texts) * 100 logging.info(f"Text similarity for main source with resource id {origin_id} is {percentage_passed}%") return percentage_passed def log_similarity_report(results, originId): # General statistics logging.info(f"[{originId}] Total texts analyzed: {results['statistics']['total_analyzed']}") logging.info(f"[{originId}] Texts with detected similarity: {results['statistics']['total_processed']}") # Similar texts if results["similar_texts"]: logging.info(f"[{originId}] Direct Similar Texts Found: {len(results['similar_texts'])}") for item in results["similar_texts"]: logging.info(f"[{originId}] Similar Text: '{item['text']}' -> Key Text: '{item['key_text']}' with Similarity: {item['similarity']:.2%}") # Detected fragments if results["fragments_detected"]: logging.info(f"[{originId}] Fragments Detected: {len(results['fragments_detected'])}") for item in results["fragments_detected"]: logging.info(f"[{originId}] Fragment: '{item['text']}' -> Key Text: '{item['key_text']}' with Similarity: {item['similarity']:.2%}") # Combined texts if results["combined"]: logging.info(f"[{originId}] Texts to be Combined: {len(results['combined'])}") for item in results["combined"]: logging.info(f"[{originId}] Combined Text: '{item['combined_text']}' -> Key Text: '{item['key_text']}' with Similarity: {item['similarity']:.2%}") # If no significant similarity found if not (results["similar_texts"] or results["fragments_detected"] or results["combined"]): logging.info(f"[{originId}] No significant similarity found.") # Statistics logging.info(f"[{originId}] Direct similarity: {results['statistics']['direct_similarity']}") logging.info(f"[{originId}] Fragments: {results['statistics']['fragments']}") logging.info(f"[{originId}] Combined: {results['statistics']['combined']}") def load_image_url(source): Image.MAX_IMAGE_PIXELS = None if source.startswith('http'): response = requests.get(source) img = np.asarray(bytearray(response.content), dtype=np.uint8) img = cv2.imdecode(img, cv2.IMREAD_GRAYSCALE) else: img = base64.b64decode(source) img = Image.open(BytesIO(img)) img = np.array(img) img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) return img @app.on_event("startup") async def startup_event(): gr.mount_gradio_app(app, generate_gradio(), path="/")