MarioPrzBasto commited on
Commit
4834b21
·
1 Parent(s): c072a2e

Add application file

Browse files
Files changed (1) hide show
  1. app.py +3 -288
app.py CHANGED
@@ -1,292 +1,7 @@
1
- import gradio as gr
2
- import pandas as pd
3
- import cv2
4
- import numpy as np
5
- import requests
6
- import torch
7
- import base64
8
- import os
9
- import logging
10
- from io import BytesIO
11
- from PIL import Image
12
  from fastapi import FastAPI
13
- from fastapi.middleware.cors import CORSMiddleware
14
- from extract_text import extract_text_from_image
15
- from models import TextSimilarityRequest
16
- from text_similarity import analyze_similarity
17
- from starlette.responses import JSONResponse
18
 
19
-
20
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
21
  app = FastAPI()
22
- # app.add_middleware(
23
- # CORSMiddleware,
24
- # allow_origins=["*"],
25
- # allow_credentials=True,
26
- # allow_methods=["*"],
27
- # allow_headers=["*"],
28
- # )
29
-
30
- @app.post("/text_similarity", summary="Perform images text similarity", response_model=float, tags=["Text Similarities"])
31
- async def text_similarity(request: TextSimilarityRequest):
32
- image_info = request.imageInfo
33
- key_texts = request.keyTexts
34
- similarity_threshold = request.similarityThreshold
35
- origin_id = image_info.originId
36
-
37
- logging.info(f"Checking text similarity for main source with resource id {origin_id}")
38
-
39
- image = load_image_url(image_info.source)
40
-
41
- # Extract text from the image using the user's method
42
- gpu_available = torch.cuda.is_available()
43
- extracted_texts = extract_text_from_image(image, gpu_available)
44
-
45
- results = analyze_similarity(
46
- extracted_texts,
47
- key_texts,
48
- similarity_threshold=similarity_threshold/100, # Convert percentage to decimal
49
- fragment_threshold=100/100 # Convert percentage to decimal
50
- )
51
-
52
- log_similarity_report(results, origin_id)
53
-
54
- total_texts = len(key_texts)
55
- passed_texts = results["statistics"]["total_processed"]
56
-
57
- percentage_passed = (passed_texts / total_texts) * 100
58
-
59
- logging.info(f"Text similarity for main source with resource id {origin_id} is {percentage_passed}%")
60
-
61
- return percentage_passed
62
-
63
- def log_similarity_report(results, originId):
64
- # General statistics
65
- logging.info(f"[{originId}] Total texts analyzed: {results['statistics']['total_analyzed']}")
66
- logging.info(f"[{originId}] Texts with detected similarity: {results['statistics']['total_processed']}")
67
-
68
- # Similar texts
69
- if results["similar_texts"]:
70
- logging.info(f"[{originId}] Direct Similar Texts Found: {len(results['similar_texts'])}")
71
- for item in results["similar_texts"]:
72
- logging.info(f"[{originId}] Similar Text: '{item['text']}' -> Key Text: '{item['key_text']}' with Similarity: {item['similarity']:.2%}")
73
-
74
- # Detected fragments
75
- if results["fragments_detected"]:
76
- logging.info(f"[{originId}] Fragments Detected: {len(results['fragments_detected'])}")
77
- for item in results["fragments_detected"]:
78
- logging.info(f"[{originId}] Fragment: '{item['text']}' -> Key Text: '{item['key_text']}' with Similarity: {item['similarity']:.2%}")
79
-
80
- # Combined texts
81
- if results["combined"]:
82
- logging.info(f"[{originId}] Texts to be Combined: {len(results['combined'])}")
83
- for item in results["combined"]:
84
- logging.info(f"[{originId}] Combined Text: '{item['combined_text']}' -> Key Text: '{item['key_text']}' with Similarity: {item['similarity']:.2%}")
85
-
86
- # If no significant similarity found
87
- if not (results["similar_texts"] or results["fragments_detected"] or results["combined"]):
88
- logging.info(f"[{originId}] No significant similarity found.")
89
-
90
- # Statistics
91
- logging.info(f"[{originId}] Direct similarity: {results['statistics']['direct_similarity']}")
92
- logging.info(f"[{originId}] Fragments: {results['statistics']['fragments']}")
93
- logging.info(f"[{originId}] Combined: {results['statistics']['combined']}")
94
-
95
- def load_image_url(source):
96
- Image.MAX_IMAGE_PIXELS = None
97
-
98
- if source.startswith('http'):
99
- response = requests.get(source)
100
- img = np.asarray(bytearray(response.content), dtype=np.uint8)
101
- img = cv2.imdecode(img, cv2.IMREAD_GRAYSCALE)
102
- else:
103
- img = base64.b64decode(source)
104
- img = Image.open(BytesIO(img))
105
- img = np.array(img)
106
- img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
107
-
108
- return img
109
-
110
- def process_image(image, key_texts, similarity_threshold, fragment_threshold):
111
- """Processes the image, extracts text, and analyzes similarities."""
112
- try:
113
- if image is None:
114
- return "Please upload an image for analysis.", None, None, None, None, None
115
-
116
- if not key_texts.strip():
117
- return "Please enter key texts for comparison.", None, None, None, None, None
118
-
119
- # Extract text from the image using the user's method
120
- gpu_available = torch.cuda.is_available()
121
- extracted_texts = extract_text_from_image(image, gpu_available)
122
-
123
- if isinstance(key_texts, str):
124
- key_texts = [text.strip() for text in key_texts.split('\n') if text.strip()]
125
-
126
- # Process the analysis
127
- results = analyze_similarity(
128
- extracted_texts,
129
- key_texts,
130
- similarity_threshold=similarity_threshold/100, # Convert percentage to decimal
131
- fragment_threshold=fragment_threshold/100 # Convert percentage to decimal
132
- )
133
-
134
- # Gerar relatório HTML
135
- html_report = generate_html_report(results)
136
-
137
- # Gerar DataFrames
138
- dfs = generate_results_dataframe(results)
139
-
140
- # Extrair DataFrames individuais (ou criar vazios se não existirem)
141
- df_statistics = dfs.get("statistics", pd.DataFrame())
142
- df_similar = dfs.get("similar", pd.DataFrame(columns=["Index", "Original Text", "Key Text", "Similarity"]))
143
- df_fragments = dfs.get("fragments", pd.DataFrame(columns=["Index", "Original Text", "Key Text", "Similarity"]))
144
- df_combined = dfs.get("combined", pd.DataFrame(columns=["Indices", "Text 1", "Text 2", "Combined Text", "Key Text", "Similarity"]))
145
-
146
- return html_report, df_statistics, df_similar, df_fragments, df_combined, extracted_texts, gpu_available
147
-
148
-
149
- except Exception as e:
150
- return f"Erro ao processar: {str(e)}", None, None, None, None, None
151
-
152
- def process_manual_input(texts, key_texts, similarity_threshold, fragment_threshold):
153
- """Processes the user's manual text input."""
154
- # Validate input
155
- if not texts.strip() or not key_texts.strip():
156
- return "Please enter texts for analysis and key texts for comparison.", None, None, None, None
157
-
158
- try:
159
- # Process the analysis
160
- results = analyze_similarity(
161
- texts,
162
- key_texts,
163
- similarity_threshold=similarity_threshold/100, # Convert percentage to decimal
164
- fragment_threshold=fragment_threshold/100 # Convert percentage to decimal
165
- )
166
-
167
- # Generate HTML report
168
- html_report = generate_html_report(results)
169
-
170
- # Gerar DataFrames
171
- dfs = generate_results_dataframe(results)
172
-
173
- # Extract individual DataFrames (or create empty ones if they don't exist)
174
- df_statistics = dfs.get("statistics", pd.DataFrame())
175
- df_similar = dfs.get("similar", pd.DataFrame(columns=["Index", "Original Text", "Key Text", "Similarity"]))
176
- df_fragments = dfs.get("fragments", pd.DataFrame(columns=["Index", "Original Text", "Key Text", "Similarity"]))
177
- df_combined = dfs.get("combined", pd.DataFrame(columns=["Indices", "Text 1", "Text 2", "Combined Text", "Key Text", "Similarity"]))
178
-
179
- return html_report, df_statistics, df_similar, df_fragments, df_combined
180
-
181
- except Exception as e:
182
- return f"Erro ao processar: {str(e)}", None, None, None, None
183
-
184
- def generate_html_report(results):
185
- """Generates an HTML report about the detected similarities."""
186
- html = "<h2>Similarity Report</h2>"
187
-
188
- # General statistics
189
- html += "<div padding: 15px; border-radius: 5px; margin-bottom: 20px;'>"
190
- html += f"<p><b>Total texts analyzed:</b> {results['statistics']['total_analyzed']}</p>"
191
- html += f"<p><b>Texts with detected similarity:</b> {results['statistics']['total_processed']}</p>"
192
- html += "</div>"
193
-
194
- # Results table
195
- html += "<h3>Detected Similarities</h3>"
196
-
197
- # Similar texts
198
- if results["similar_texts"]:
199
- html += "<h4>Direct Similar Texts</h4>"
200
- html += "<table width='100%' style='border-collapse: collapse; margin-bottom: 20px;'>"
201
- html += "<tr><th style='text-align: left; padding: 8px; border: 1px solid #ddd;'>Original Text</th><th style='text-align: left; padding: 8px; border: 1px solid #ddd;'>Key Text</th><th style='text-align: center; padding: 8px; border: 1px solid #ddd;'>Similarity</th></tr>"
202
-
203
- for item in results["similar_texts"]:
204
- html += f"<tr><td style='padding: 8px; border: 1px solid #ddd;'>{item['text']}</td><td style='padding: 8px; border: 1px solid #ddd;'>{item['key_text']}</td><td style='text-align: center; padding: 8px; border: 1px solid #ddd;'>{item['similarity']:.2%}</td></tr>"
205
-
206
- html += "</table>"
207
-
208
- # Detected fragments
209
- if results["fragments_detected"]:
210
- html += "<h4>Text with Detected Fragments</h4>"
211
- html += "<table width='100%' style='border-collapse: collapse; margin-bottom: 20px;'>"
212
- html += "<tr><th style='text-align: left; padding: 8px; border: 1px solid #ddd;'>Original Text</th><th style='text-align: left; padding: 8px; border: 1px solid #ddd;'>Key Text</th><th style='text-align: center; padding: 8px; border: 1px solid #ddd;'>Similarity</th></tr>"
213
-
214
- for item in results["fragments_detected"]:
215
- html += f"<tr><td style='padding: 8px; border: 1px solid #ddd;'>{item['text']}</td><td style='padding: 8px; border: 1px solid #ddd;'>{item['key_text']}</td><td style='text-align: center; padding: 8px; border: 1px solid #ddd;'>{item['similarity']:.2%}</td></tr>"
216
-
217
- html += "</table>"
218
-
219
- # Combined texts
220
- if results["combined"]:
221
- html += "<h4>Text that need to be combined</h4>"
222
- html += "<table width='100%' style='border-collapse: collapse; margin-bottom: 20px;'>"
223
- html += "<tr><th style='text-align: left; padding: 8px; border: 1px solid #ddd;'>Text 1</th><th style='text-align: left; padding: 8px; border: 1px solid #ddd;'>Text 2</th><th style='text-align: left; padding: 8px; border: 1px solid #ddd;'>Combination</th><th style='text-align: left; padding: 8px; border: 1px solid #ddd;'>Key Text</th><th style='text-align: center; padding: 8px; border: 1px solid #ddd;'>Similarity</th></tr>"
224
-
225
- for item in results["combined"]:
226
- html += f"<tr><td style='padding: 8px; border: 1px solid #ddd;'>{item['texts'][0]}</td><td style='padding: 8px; border: 1px solid #ddd;'>{item['texts'][1]}</td><td style='padding: 8px; border: 1px solid #ddd;'>{item['combined_text']}</td><td style='padding: 8px; border: 1px solid #ddd;'>{item['key_text']}</td><td style='text-align: center; padding: 8px; border: 1px solid #ddd;'>{item['similarity']:.2%}</td></tr>"
227
-
228
- html += "</table>"
229
-
230
- if not (results["similar_texts"] or results["fragments_detected"] or results["combined"]):
231
- html += "<p>No significant similarity found with the current parameters.</p>"
232
-
233
- return html
234
-
235
- def generate_results_dataframe(results):
236
- """Generates pandas DataFrames to visualize the results."""
237
- dfs = {}
238
-
239
- # DataFrame for similar texts
240
- if results["similar_texts"]:
241
- data = [(item['index'], item['text'], item['key_text'], f"{item['similarity']:.2%}")
242
- for item in results["similar_texts"]]
243
- dfs["similar"] = pd.DataFrame(data, columns=["Index", "Original Text", "Key Text", "Similarity"])
244
-
245
- # DataFrame for fragments
246
- if results["fragments_detected"]:
247
- data = [(item['index'], item['text'], item['key_text'], f"{item['similarity']:.2%}")
248
- for item in results["fragments_detected"]]
249
- dfs["fragments"] = pd.DataFrame(data, columns=["Index", "Original Text", "Key Text", "Similarity"])
250
-
251
- # DataFrame for combined
252
- if results["combined"]:
253
- data = [(f"{item['indices'][0]},{item['indices'][1]}",
254
- item['texts'][0],
255
- item['texts'][1],
256
- item['combined_text'],
257
- item['key_text'],
258
- f"{item['similarity']:.2%}")
259
- for item in results["combined"]]
260
- dfs["combined"] = pd.DataFrame(data, columns=["Indices", "Text 1", "Text 2",
261
- "Combined Text", "Key Text", "Similarity"])
262
-
263
- # Statistics DataFrame
264
- data = [
265
- ("Total analyzed", results["statistics"]["total_analyzed"]),
266
- ("Total with similarity", results["statistics"]["total_processed"]),
267
- ("Direct similarity", results["statistics"]["direct_similarity"]),
268
- ("Fragments", results["statistics"]["fragments"]),
269
- ("Combined", results["statistics"]["combined"])
270
- ]
271
- dfs["statistics"] = pd.DataFrame(data, columns=["Metric", "Value"])
272
-
273
- return dfs
274
-
275
- #app = gr.mount_gradio_app(app, demo, path="/")
276
-
277
- @app.get("/api")
278
- def read_root():
279
- return JSONResponse(content={"message": "Hello from FastAPI inside Gradio!"})
280
-
281
- # if __name__ == "__main__":
282
- # import uvicorn
283
- # uvicorn.run(app)
284
-
285
- # PORT = int(os.getenv("PORT", 7860))
286
-
287
- # if __name__ == "__main__":
288
- # import uvicorn
289
- # print(f"A arrancar na porta {PORT}...")
290
- # uvicorn.run(app)
291
 
292
- #demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI
 
 
 
 
 
2
 
 
 
3
  app = FastAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ @app.get("/")
6
+ def root():
7
+ return "oi"