Commit
·
4834b21
1
Parent(s):
c072a2e
Add application file
Browse files
app.py
CHANGED
@@ -1,292 +1,7 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import pandas as pd
|
3 |
-
import cv2
|
4 |
-
import numpy as np
|
5 |
-
import requests
|
6 |
-
import torch
|
7 |
-
import base64
|
8 |
-
import os
|
9 |
-
import logging
|
10 |
-
from io import BytesIO
|
11 |
-
from PIL import Image
|
12 |
from fastapi import FastAPI
|
13 |
-
from fastapi.middleware.cors import CORSMiddleware
|
14 |
-
from extract_text import extract_text_from_image
|
15 |
-
from models import TextSimilarityRequest
|
16 |
-
from text_similarity import analyze_similarity
|
17 |
-
from starlette.responses import JSONResponse
|
18 |
|
19 |
-
|
20 |
-
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
21 |
app = FastAPI()
|
22 |
-
# app.add_middleware(
|
23 |
-
# CORSMiddleware,
|
24 |
-
# allow_origins=["*"],
|
25 |
-
# allow_credentials=True,
|
26 |
-
# allow_methods=["*"],
|
27 |
-
# allow_headers=["*"],
|
28 |
-
# )
|
29 |
-
|
30 |
-
@app.post("/text_similarity", summary="Perform images text similarity", response_model=float, tags=["Text Similarities"])
|
31 |
-
async def text_similarity(request: TextSimilarityRequest):
|
32 |
-
image_info = request.imageInfo
|
33 |
-
key_texts = request.keyTexts
|
34 |
-
similarity_threshold = request.similarityThreshold
|
35 |
-
origin_id = image_info.originId
|
36 |
-
|
37 |
-
logging.info(f"Checking text similarity for main source with resource id {origin_id}")
|
38 |
-
|
39 |
-
image = load_image_url(image_info.source)
|
40 |
-
|
41 |
-
# Extract text from the image using the user's method
|
42 |
-
gpu_available = torch.cuda.is_available()
|
43 |
-
extracted_texts = extract_text_from_image(image, gpu_available)
|
44 |
-
|
45 |
-
results = analyze_similarity(
|
46 |
-
extracted_texts,
|
47 |
-
key_texts,
|
48 |
-
similarity_threshold=similarity_threshold/100, # Convert percentage to decimal
|
49 |
-
fragment_threshold=100/100 # Convert percentage to decimal
|
50 |
-
)
|
51 |
-
|
52 |
-
log_similarity_report(results, origin_id)
|
53 |
-
|
54 |
-
total_texts = len(key_texts)
|
55 |
-
passed_texts = results["statistics"]["total_processed"]
|
56 |
-
|
57 |
-
percentage_passed = (passed_texts / total_texts) * 100
|
58 |
-
|
59 |
-
logging.info(f"Text similarity for main source with resource id {origin_id} is {percentage_passed}%")
|
60 |
-
|
61 |
-
return percentage_passed
|
62 |
-
|
63 |
-
def log_similarity_report(results, originId):
|
64 |
-
# General statistics
|
65 |
-
logging.info(f"[{originId}] Total texts analyzed: {results['statistics']['total_analyzed']}")
|
66 |
-
logging.info(f"[{originId}] Texts with detected similarity: {results['statistics']['total_processed']}")
|
67 |
-
|
68 |
-
# Similar texts
|
69 |
-
if results["similar_texts"]:
|
70 |
-
logging.info(f"[{originId}] Direct Similar Texts Found: {len(results['similar_texts'])}")
|
71 |
-
for item in results["similar_texts"]:
|
72 |
-
logging.info(f"[{originId}] Similar Text: '{item['text']}' -> Key Text: '{item['key_text']}' with Similarity: {item['similarity']:.2%}")
|
73 |
-
|
74 |
-
# Detected fragments
|
75 |
-
if results["fragments_detected"]:
|
76 |
-
logging.info(f"[{originId}] Fragments Detected: {len(results['fragments_detected'])}")
|
77 |
-
for item in results["fragments_detected"]:
|
78 |
-
logging.info(f"[{originId}] Fragment: '{item['text']}' -> Key Text: '{item['key_text']}' with Similarity: {item['similarity']:.2%}")
|
79 |
-
|
80 |
-
# Combined texts
|
81 |
-
if results["combined"]:
|
82 |
-
logging.info(f"[{originId}] Texts to be Combined: {len(results['combined'])}")
|
83 |
-
for item in results["combined"]:
|
84 |
-
logging.info(f"[{originId}] Combined Text: '{item['combined_text']}' -> Key Text: '{item['key_text']}' with Similarity: {item['similarity']:.2%}")
|
85 |
-
|
86 |
-
# If no significant similarity found
|
87 |
-
if not (results["similar_texts"] or results["fragments_detected"] or results["combined"]):
|
88 |
-
logging.info(f"[{originId}] No significant similarity found.")
|
89 |
-
|
90 |
-
# Statistics
|
91 |
-
logging.info(f"[{originId}] Direct similarity: {results['statistics']['direct_similarity']}")
|
92 |
-
logging.info(f"[{originId}] Fragments: {results['statistics']['fragments']}")
|
93 |
-
logging.info(f"[{originId}] Combined: {results['statistics']['combined']}")
|
94 |
-
|
95 |
-
def load_image_url(source):
|
96 |
-
Image.MAX_IMAGE_PIXELS = None
|
97 |
-
|
98 |
-
if source.startswith('http'):
|
99 |
-
response = requests.get(source)
|
100 |
-
img = np.asarray(bytearray(response.content), dtype=np.uint8)
|
101 |
-
img = cv2.imdecode(img, cv2.IMREAD_GRAYSCALE)
|
102 |
-
else:
|
103 |
-
img = base64.b64decode(source)
|
104 |
-
img = Image.open(BytesIO(img))
|
105 |
-
img = np.array(img)
|
106 |
-
img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
|
107 |
-
|
108 |
-
return img
|
109 |
-
|
110 |
-
def process_image(image, key_texts, similarity_threshold, fragment_threshold):
|
111 |
-
"""Processes the image, extracts text, and analyzes similarities."""
|
112 |
-
try:
|
113 |
-
if image is None:
|
114 |
-
return "Please upload an image for analysis.", None, None, None, None, None
|
115 |
-
|
116 |
-
if not key_texts.strip():
|
117 |
-
return "Please enter key texts for comparison.", None, None, None, None, None
|
118 |
-
|
119 |
-
# Extract text from the image using the user's method
|
120 |
-
gpu_available = torch.cuda.is_available()
|
121 |
-
extracted_texts = extract_text_from_image(image, gpu_available)
|
122 |
-
|
123 |
-
if isinstance(key_texts, str):
|
124 |
-
key_texts = [text.strip() for text in key_texts.split('\n') if text.strip()]
|
125 |
-
|
126 |
-
# Process the analysis
|
127 |
-
results = analyze_similarity(
|
128 |
-
extracted_texts,
|
129 |
-
key_texts,
|
130 |
-
similarity_threshold=similarity_threshold/100, # Convert percentage to decimal
|
131 |
-
fragment_threshold=fragment_threshold/100 # Convert percentage to decimal
|
132 |
-
)
|
133 |
-
|
134 |
-
# Gerar relatório HTML
|
135 |
-
html_report = generate_html_report(results)
|
136 |
-
|
137 |
-
# Gerar DataFrames
|
138 |
-
dfs = generate_results_dataframe(results)
|
139 |
-
|
140 |
-
# Extrair DataFrames individuais (ou criar vazios se não existirem)
|
141 |
-
df_statistics = dfs.get("statistics", pd.DataFrame())
|
142 |
-
df_similar = dfs.get("similar", pd.DataFrame(columns=["Index", "Original Text", "Key Text", "Similarity"]))
|
143 |
-
df_fragments = dfs.get("fragments", pd.DataFrame(columns=["Index", "Original Text", "Key Text", "Similarity"]))
|
144 |
-
df_combined = dfs.get("combined", pd.DataFrame(columns=["Indices", "Text 1", "Text 2", "Combined Text", "Key Text", "Similarity"]))
|
145 |
-
|
146 |
-
return html_report, df_statistics, df_similar, df_fragments, df_combined, extracted_texts, gpu_available
|
147 |
-
|
148 |
-
|
149 |
-
except Exception as e:
|
150 |
-
return f"Erro ao processar: {str(e)}", None, None, None, None, None
|
151 |
-
|
152 |
-
def process_manual_input(texts, key_texts, similarity_threshold, fragment_threshold):
|
153 |
-
"""Processes the user's manual text input."""
|
154 |
-
# Validate input
|
155 |
-
if not texts.strip() or not key_texts.strip():
|
156 |
-
return "Please enter texts for analysis and key texts for comparison.", None, None, None, None
|
157 |
-
|
158 |
-
try:
|
159 |
-
# Process the analysis
|
160 |
-
results = analyze_similarity(
|
161 |
-
texts,
|
162 |
-
key_texts,
|
163 |
-
similarity_threshold=similarity_threshold/100, # Convert percentage to decimal
|
164 |
-
fragment_threshold=fragment_threshold/100 # Convert percentage to decimal
|
165 |
-
)
|
166 |
-
|
167 |
-
# Generate HTML report
|
168 |
-
html_report = generate_html_report(results)
|
169 |
-
|
170 |
-
# Gerar DataFrames
|
171 |
-
dfs = generate_results_dataframe(results)
|
172 |
-
|
173 |
-
# Extract individual DataFrames (or create empty ones if they don't exist)
|
174 |
-
df_statistics = dfs.get("statistics", pd.DataFrame())
|
175 |
-
df_similar = dfs.get("similar", pd.DataFrame(columns=["Index", "Original Text", "Key Text", "Similarity"]))
|
176 |
-
df_fragments = dfs.get("fragments", pd.DataFrame(columns=["Index", "Original Text", "Key Text", "Similarity"]))
|
177 |
-
df_combined = dfs.get("combined", pd.DataFrame(columns=["Indices", "Text 1", "Text 2", "Combined Text", "Key Text", "Similarity"]))
|
178 |
-
|
179 |
-
return html_report, df_statistics, df_similar, df_fragments, df_combined
|
180 |
-
|
181 |
-
except Exception as e:
|
182 |
-
return f"Erro ao processar: {str(e)}", None, None, None, None
|
183 |
-
|
184 |
-
def generate_html_report(results):
|
185 |
-
"""Generates an HTML report about the detected similarities."""
|
186 |
-
html = "<h2>Similarity Report</h2>"
|
187 |
-
|
188 |
-
# General statistics
|
189 |
-
html += "<div padding: 15px; border-radius: 5px; margin-bottom: 20px;'>"
|
190 |
-
html += f"<p><b>Total texts analyzed:</b> {results['statistics']['total_analyzed']}</p>"
|
191 |
-
html += f"<p><b>Texts with detected similarity:</b> {results['statistics']['total_processed']}</p>"
|
192 |
-
html += "</div>"
|
193 |
-
|
194 |
-
# Results table
|
195 |
-
html += "<h3>Detected Similarities</h3>"
|
196 |
-
|
197 |
-
# Similar texts
|
198 |
-
if results["similar_texts"]:
|
199 |
-
html += "<h4>Direct Similar Texts</h4>"
|
200 |
-
html += "<table width='100%' style='border-collapse: collapse; margin-bottom: 20px;'>"
|
201 |
-
html += "<tr><th style='text-align: left; padding: 8px; border: 1px solid #ddd;'>Original Text</th><th style='text-align: left; padding: 8px; border: 1px solid #ddd;'>Key Text</th><th style='text-align: center; padding: 8px; border: 1px solid #ddd;'>Similarity</th></tr>"
|
202 |
-
|
203 |
-
for item in results["similar_texts"]:
|
204 |
-
html += f"<tr><td style='padding: 8px; border: 1px solid #ddd;'>{item['text']}</td><td style='padding: 8px; border: 1px solid #ddd;'>{item['key_text']}</td><td style='text-align: center; padding: 8px; border: 1px solid #ddd;'>{item['similarity']:.2%}</td></tr>"
|
205 |
-
|
206 |
-
html += "</table>"
|
207 |
-
|
208 |
-
# Detected fragments
|
209 |
-
if results["fragments_detected"]:
|
210 |
-
html += "<h4>Text with Detected Fragments</h4>"
|
211 |
-
html += "<table width='100%' style='border-collapse: collapse; margin-bottom: 20px;'>"
|
212 |
-
html += "<tr><th style='text-align: left; padding: 8px; border: 1px solid #ddd;'>Original Text</th><th style='text-align: left; padding: 8px; border: 1px solid #ddd;'>Key Text</th><th style='text-align: center; padding: 8px; border: 1px solid #ddd;'>Similarity</th></tr>"
|
213 |
-
|
214 |
-
for item in results["fragments_detected"]:
|
215 |
-
html += f"<tr><td style='padding: 8px; border: 1px solid #ddd;'>{item['text']}</td><td style='padding: 8px; border: 1px solid #ddd;'>{item['key_text']}</td><td style='text-align: center; padding: 8px; border: 1px solid #ddd;'>{item['similarity']:.2%}</td></tr>"
|
216 |
-
|
217 |
-
html += "</table>"
|
218 |
-
|
219 |
-
# Combined texts
|
220 |
-
if results["combined"]:
|
221 |
-
html += "<h4>Text that need to be combined</h4>"
|
222 |
-
html += "<table width='100%' style='border-collapse: collapse; margin-bottom: 20px;'>"
|
223 |
-
html += "<tr><th style='text-align: left; padding: 8px; border: 1px solid #ddd;'>Text 1</th><th style='text-align: left; padding: 8px; border: 1px solid #ddd;'>Text 2</th><th style='text-align: left; padding: 8px; border: 1px solid #ddd;'>Combination</th><th style='text-align: left; padding: 8px; border: 1px solid #ddd;'>Key Text</th><th style='text-align: center; padding: 8px; border: 1px solid #ddd;'>Similarity</th></tr>"
|
224 |
-
|
225 |
-
for item in results["combined"]:
|
226 |
-
html += f"<tr><td style='padding: 8px; border: 1px solid #ddd;'>{item['texts'][0]}</td><td style='padding: 8px; border: 1px solid #ddd;'>{item['texts'][1]}</td><td style='padding: 8px; border: 1px solid #ddd;'>{item['combined_text']}</td><td style='padding: 8px; border: 1px solid #ddd;'>{item['key_text']}</td><td style='text-align: center; padding: 8px; border: 1px solid #ddd;'>{item['similarity']:.2%}</td></tr>"
|
227 |
-
|
228 |
-
html += "</table>"
|
229 |
-
|
230 |
-
if not (results["similar_texts"] or results["fragments_detected"] or results["combined"]):
|
231 |
-
html += "<p>No significant similarity found with the current parameters.</p>"
|
232 |
-
|
233 |
-
return html
|
234 |
-
|
235 |
-
def generate_results_dataframe(results):
|
236 |
-
"""Generates pandas DataFrames to visualize the results."""
|
237 |
-
dfs = {}
|
238 |
-
|
239 |
-
# DataFrame for similar texts
|
240 |
-
if results["similar_texts"]:
|
241 |
-
data = [(item['index'], item['text'], item['key_text'], f"{item['similarity']:.2%}")
|
242 |
-
for item in results["similar_texts"]]
|
243 |
-
dfs["similar"] = pd.DataFrame(data, columns=["Index", "Original Text", "Key Text", "Similarity"])
|
244 |
-
|
245 |
-
# DataFrame for fragments
|
246 |
-
if results["fragments_detected"]:
|
247 |
-
data = [(item['index'], item['text'], item['key_text'], f"{item['similarity']:.2%}")
|
248 |
-
for item in results["fragments_detected"]]
|
249 |
-
dfs["fragments"] = pd.DataFrame(data, columns=["Index", "Original Text", "Key Text", "Similarity"])
|
250 |
-
|
251 |
-
# DataFrame for combined
|
252 |
-
if results["combined"]:
|
253 |
-
data = [(f"{item['indices'][0]},{item['indices'][1]}",
|
254 |
-
item['texts'][0],
|
255 |
-
item['texts'][1],
|
256 |
-
item['combined_text'],
|
257 |
-
item['key_text'],
|
258 |
-
f"{item['similarity']:.2%}")
|
259 |
-
for item in results["combined"]]
|
260 |
-
dfs["combined"] = pd.DataFrame(data, columns=["Indices", "Text 1", "Text 2",
|
261 |
-
"Combined Text", "Key Text", "Similarity"])
|
262 |
-
|
263 |
-
# Statistics DataFrame
|
264 |
-
data = [
|
265 |
-
("Total analyzed", results["statistics"]["total_analyzed"]),
|
266 |
-
("Total with similarity", results["statistics"]["total_processed"]),
|
267 |
-
("Direct similarity", results["statistics"]["direct_similarity"]),
|
268 |
-
("Fragments", results["statistics"]["fragments"]),
|
269 |
-
("Combined", results["statistics"]["combined"])
|
270 |
-
]
|
271 |
-
dfs["statistics"] = pd.DataFrame(data, columns=["Metric", "Value"])
|
272 |
-
|
273 |
-
return dfs
|
274 |
-
|
275 |
-
#app = gr.mount_gradio_app(app, demo, path="/")
|
276 |
-
|
277 |
-
@app.get("/api")
|
278 |
-
def read_root():
|
279 |
-
return JSONResponse(content={"message": "Hello from FastAPI inside Gradio!"})
|
280 |
-
|
281 |
-
# if __name__ == "__main__":
|
282 |
-
# import uvicorn
|
283 |
-
# uvicorn.run(app)
|
284 |
-
|
285 |
-
# PORT = int(os.getenv("PORT", 7860))
|
286 |
-
|
287 |
-
# if __name__ == "__main__":
|
288 |
-
# import uvicorn
|
289 |
-
# print(f"A arrancar na porta {PORT}...")
|
290 |
-
# uvicorn.run(app)
|
291 |
|
292 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from fastapi import FastAPI
|
|
|
|
|
|
|
|
|
|
|
2 |
|
|
|
|
|
3 |
app = FastAPI()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
+
@app.get("/")
|
6 |
+
def root():
|
7 |
+
return "oi"
|