import gradio as gr import pandas as pd import cv2 import numpy as np import requests import torch import base64 import os import logging from io import BytesIO from PIL import Image from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from extract_text import extract_text_from_image from models import TextSimilarityRequest from text_similarity import analyze_similarity from starlette.responses import JSONResponse logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') app = FastAPI() # app.add_middleware( # CORSMiddleware, # allow_origins=["*"], # allow_credentials=True, # allow_methods=["*"], # allow_headers=["*"], # ) @app.post("/text_similarity", summary="Perform images text similarity", response_model=float, tags=["Text Similarities"]) async def text_similarity(request: TextSimilarityRequest): image_info = request.imageInfo key_texts = request.keyTexts similarity_threshold = request.similarityThreshold origin_id = image_info.originId logging.info(f"Checking text similarity for main source with resource id {origin_id}") image = load_image_url(image_info.source) # Extract text from the image using the user's method gpu_available = torch.cuda.is_available() extracted_texts = extract_text_from_image(image, gpu_available) results = analyze_similarity( extracted_texts, key_texts, similarity_threshold=similarity_threshold/100, # Convert percentage to decimal fragment_threshold=100/100 # Convert percentage to decimal ) log_similarity_report(results, origin_id) total_texts = len(key_texts) passed_texts = results["statistics"]["total_processed"] percentage_passed = (passed_texts / total_texts) * 100 logging.info(f"Text similarity for main source with resource id {origin_id} is {percentage_passed}%") return percentage_passed def log_similarity_report(results, originId): # General statistics logging.info(f"[{originId}] Total texts analyzed: {results['statistics']['total_analyzed']}") logging.info(f"[{originId}] Texts with detected similarity: {results['statistics']['total_processed']}") # Similar texts if results["similar_texts"]: logging.info(f"[{originId}] Direct Similar Texts Found: {len(results['similar_texts'])}") for item in results["similar_texts"]: logging.info(f"[{originId}] Similar Text: '{item['text']}' -> Key Text: '{item['key_text']}' with Similarity: {item['similarity']:.2%}") # Detected fragments if results["fragments_detected"]: logging.info(f"[{originId}] Fragments Detected: {len(results['fragments_detected'])}") for item in results["fragments_detected"]: logging.info(f"[{originId}] Fragment: '{item['text']}' -> Key Text: '{item['key_text']}' with Similarity: {item['similarity']:.2%}") # Combined texts if results["combined"]: logging.info(f"[{originId}] Texts to be Combined: {len(results['combined'])}") for item in results["combined"]: logging.info(f"[{originId}] Combined Text: '{item['combined_text']}' -> Key Text: '{item['key_text']}' with Similarity: {item['similarity']:.2%}") # If no significant similarity found if not (results["similar_texts"] or results["fragments_detected"] or results["combined"]): logging.info(f"[{originId}] No significant similarity found.") # Statistics logging.info(f"[{originId}] Direct similarity: {results['statistics']['direct_similarity']}") logging.info(f"[{originId}] Fragments: {results['statistics']['fragments']}") logging.info(f"[{originId}] Combined: {results['statistics']['combined']}") def load_image_url(source): Image.MAX_IMAGE_PIXELS = None if source.startswith('http'): response = requests.get(source) img = np.asarray(bytearray(response.content), dtype=np.uint8) img = cv2.imdecode(img, cv2.IMREAD_GRAYSCALE) else: img = base64.b64decode(source) img = Image.open(BytesIO(img)) img = np.array(img) img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) return img def process_image(image, key_texts, similarity_threshold, fragment_threshold): """Processes the image, extracts text, and analyzes similarities.""" try: if image is None: return "Please upload an image for analysis.", None, None, None, None, None if not key_texts.strip(): return "Please enter key texts for comparison.", None, None, None, None, None # Extract text from the image using the user's method gpu_available = torch.cuda.is_available() extracted_texts = extract_text_from_image(image, gpu_available) if isinstance(key_texts, str): key_texts = [text.strip() for text in key_texts.split('\n') if text.strip()] # Process the analysis results = analyze_similarity( extracted_texts, key_texts, similarity_threshold=similarity_threshold/100, # Convert percentage to decimal fragment_threshold=fragment_threshold/100 # Convert percentage to decimal ) # Gerar relatório HTML html_report = generate_html_report(results) # Gerar DataFrames dfs = generate_results_dataframe(results) # Extrair DataFrames individuais (ou criar vazios se não existirem) df_statistics = dfs.get("statistics", pd.DataFrame()) df_similar = dfs.get("similar", pd.DataFrame(columns=["Index", "Original Text", "Key Text", "Similarity"])) df_fragments = dfs.get("fragments", pd.DataFrame(columns=["Index", "Original Text", "Key Text", "Similarity"])) df_combined = dfs.get("combined", pd.DataFrame(columns=["Indices", "Text 1", "Text 2", "Combined Text", "Key Text", "Similarity"])) return html_report, df_statistics, df_similar, df_fragments, df_combined, extracted_texts, gpu_available except Exception as e: return f"Erro ao processar: {str(e)}", None, None, None, None, None def process_manual_input(texts, key_texts, similarity_threshold, fragment_threshold): """Processes the user's manual text input.""" # Validate input if not texts.strip() or not key_texts.strip(): return "Please enter texts for analysis and key texts for comparison.", None, None, None, None try: # Process the analysis results = analyze_similarity( texts, key_texts, similarity_threshold=similarity_threshold/100, # Convert percentage to decimal fragment_threshold=fragment_threshold/100 # Convert percentage to decimal ) # Generate HTML report html_report = generate_html_report(results) # Gerar DataFrames dfs = generate_results_dataframe(results) # Extract individual DataFrames (or create empty ones if they don't exist) df_statistics = dfs.get("statistics", pd.DataFrame()) df_similar = dfs.get("similar", pd.DataFrame(columns=["Index", "Original Text", "Key Text", "Similarity"])) df_fragments = dfs.get("fragments", pd.DataFrame(columns=["Index", "Original Text", "Key Text", "Similarity"])) df_combined = dfs.get("combined", pd.DataFrame(columns=["Indices", "Text 1", "Text 2", "Combined Text", "Key Text", "Similarity"])) return html_report, df_statistics, df_similar, df_fragments, df_combined except Exception as e: return f"Erro ao processar: {str(e)}", None, None, None, None def generate_html_report(results): """Generates an HTML report about the detected similarities.""" html = "

Similarity Report

" # General statistics html += "
" html += f"

Total texts analyzed: {results['statistics']['total_analyzed']}

" html += f"

Texts with detected similarity: {results['statistics']['total_processed']}

" html += "
" # Results table html += "

Detected Similarities

" # Similar texts if results["similar_texts"]: html += "

Direct Similar Texts

" html += "" html += "" for item in results["similar_texts"]: html += f"" html += "
Original TextKey TextSimilarity
{item['text']}{item['key_text']}{item['similarity']:.2%}
" # Detected fragments if results["fragments_detected"]: html += "

Text with Detected Fragments

" html += "" html += "" for item in results["fragments_detected"]: html += f"" html += "
Original TextKey TextSimilarity
{item['text']}{item['key_text']}{item['similarity']:.2%}
" # Combined texts if results["combined"]: html += "

Text that need to be combined

" html += "" html += "" for item in results["combined"]: html += f"" html += "
Text 1Text 2CombinationKey TextSimilarity
{item['texts'][0]}{item['texts'][1]}{item['combined_text']}{item['key_text']}{item['similarity']:.2%}
" if not (results["similar_texts"] or results["fragments_detected"] or results["combined"]): html += "

No significant similarity found with the current parameters.

" return html def generate_results_dataframe(results): """Generates pandas DataFrames to visualize the results.""" dfs = {} # DataFrame for similar texts if results["similar_texts"]: data = [(item['index'], item['text'], item['key_text'], f"{item['similarity']:.2%}") for item in results["similar_texts"]] dfs["similar"] = pd.DataFrame(data, columns=["Index", "Original Text", "Key Text", "Similarity"]) # DataFrame for fragments if results["fragments_detected"]: data = [(item['index'], item['text'], item['key_text'], f"{item['similarity']:.2%}") for item in results["fragments_detected"]] dfs["fragments"] = pd.DataFrame(data, columns=["Index", "Original Text", "Key Text", "Similarity"]) # DataFrame for combined if results["combined"]: data = [(f"{item['indices'][0]},{item['indices'][1]}", item['texts'][0], item['texts'][1], item['combined_text'], item['key_text'], f"{item['similarity']:.2%}") for item in results["combined"]] dfs["combined"] = pd.DataFrame(data, columns=["Indices", "Text 1", "Text 2", "Combined Text", "Key Text", "Similarity"]) # Statistics DataFrame data = [ ("Total analyzed", results["statistics"]["total_analyzed"]), ("Total with similarity", results["statistics"]["total_processed"]), ("Direct similarity", results["statistics"]["direct_similarity"]), ("Fragments", results["statistics"]["fragments"]), ("Combined", results["statistics"]["combined"]) ] dfs["statistics"] = pd.DataFrame(data, columns=["Metric", "Value"]) return dfs #app = gr.mount_gradio_app(app, demo, path="/") @app.get("/api") def read_root(): return JSONResponse(content={"message": "Hello from FastAPI inside Gradio!"}) # if __name__ == "__main__": # import uvicorn # uvicorn.run(app) # PORT = int(os.getenv("PORT", 7860)) # if __name__ == "__main__": # import uvicorn # print(f"A arrancar na porta {PORT}...") # uvicorn.run(app) #demo.launch(server_name="0.0.0.0", server_port=7860)