Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,27 +1,42 @@
|
|
1 |
import os
|
2 |
import re
|
3 |
import base64
|
|
|
|
|
|
|
4 |
import gradio as gr
|
5 |
import pdfplumber # For PDF document parsing
|
6 |
-
import fitz # PyMuPDF for advanced PDF handling (alternative to pdfplumber)
|
7 |
import pytesseract # OCR for extracting text from images
|
8 |
from PIL import Image
|
9 |
-
from io import BytesIO
|
10 |
-
from transformers import pipeline # For semantic analysis tasks
|
11 |
from huggingface_hub import InferenceClient
|
12 |
from mistralai import Mistral
|
13 |
|
14 |
-
# Initialize
|
15 |
client = InferenceClient(api_key=os.getenv('HF_TOKEN'))
|
16 |
client.headers["x-use-cache"] = "0"
|
|
|
17 |
api_key = os.getenv("MISTRAL_API_KEY")
|
18 |
Mistralclient = Mistral(api_key=api_key)
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
def encode_image(image_path):
|
27 |
"""Resizes and encodes an image to base64."""
|
@@ -39,21 +54,22 @@ def encode_image(image_path):
|
|
39 |
return None
|
40 |
|
41 |
def extract_text_from_document(file_path):
|
42 |
-
"""Extracts text from a PDF or image document."""
|
43 |
text = ""
|
44 |
-
# Try PDF parsing with pdfplumber
|
45 |
if file_path.lower().endswith(".pdf"):
|
46 |
try:
|
47 |
with pdfplumber.open(file_path) as pdf:
|
48 |
for page in pdf.pages:
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
51 |
except Exception as e:
|
52 |
print(f"PDF parsing error: {e}")
|
53 |
|
54 |
-
#
|
55 |
try:
|
56 |
-
# Open the file as an image for OCR
|
57 |
image = Image.open(file_path)
|
58 |
text = pytesseract.image_to_string(image)
|
59 |
except Exception as e:
|
@@ -61,27 +77,24 @@ def extract_text_from_document(file_path):
|
|
61 |
return text.strip()
|
62 |
|
63 |
def perform_semantic_analysis(text, analysis_type):
|
64 |
-
"""Applies semantic analysis tasks to the provided text."""
|
65 |
if analysis_type == "Summarization":
|
|
|
66 |
return summarizer(text, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
|
67 |
elif analysis_type == "Sentiment Analysis":
|
|
|
68 |
return sentiment_analyzer(text)[0]
|
69 |
elif analysis_type == "Named Entity Recognition":
|
|
|
70 |
return ner_tagger(text)
|
71 |
-
# Add more analysis types as needed
|
72 |
return text
|
73 |
|
74 |
def process_text_input(message_text, history, model_choice, analysis_type):
|
75 |
-
"""
|
76 |
-
Process text-based inputs using selected model and apply semantic analysis if requested.
|
77 |
-
"""
|
78 |
-
# Optionally perform semantic analysis before sending to the model
|
79 |
if analysis_type and analysis_type != "None":
|
80 |
analysis_result = perform_semantic_analysis(message_text, analysis_type)
|
81 |
-
# Incorporate analysis_result into prompt or display separately
|
82 |
message_text += f"\n\n[Analysis Result]: {analysis_result}"
|
83 |
|
84 |
-
# Construct a prompt for model inference
|
85 |
input_prompt = [{"role": "user", "content": message_text}]
|
86 |
|
87 |
if model_choice == "mistralai/Mistral-Nemo-Instruct-2411":
|
@@ -106,19 +119,14 @@ def process_text_input(message_text, history, model_choice, analysis_type):
|
|
106 |
yield temp
|
107 |
|
108 |
def process_image_input(image_file, message_text, image_mod, model_choice, analysis_type):
|
109 |
-
"""
|
110 |
-
|
111 |
-
Applies OCR if needed and semantic analysis.
|
112 |
-
"""
|
113 |
-
# Save uploaded image temporarily to extract text if necessary
|
114 |
temp_image_path = "temp_upload.jpg"
|
115 |
image_file.save(temp_image_path)
|
116 |
|
117 |
-
# Extract text from document/image using OCR if needed
|
118 |
extracted_text = extract_text_from_document(temp_image_path)
|
119 |
if extracted_text:
|
120 |
message_text += f"\n\n[Extracted Text]: {extracted_text}"
|
121 |
-
# Optionally perform semantic analysis on the extracted text
|
122 |
if analysis_type and analysis_type != "None":
|
123 |
analysis_result = perform_semantic_analysis(extracted_text, analysis_type)
|
124 |
message_text += f"\n\n[Analysis Result]: {analysis_result}"
|
@@ -157,21 +165,17 @@ def process_image_input(image_file, message_text, image_mod, model_choice, analy
|
|
157 |
yield partial_message
|
158 |
|
159 |
def multimodal_response(message, history, analyzer_mode, model_choice, image_mod, analysis_type):
|
160 |
-
"""
|
161 |
-
Main response function that handles text and image inputs, applies parsing, OCR, and semantic analysis.
|
162 |
-
"""
|
163 |
message_text = message.get("text", "")
|
164 |
message_files = message.get("files", [])
|
165 |
|
166 |
if message_files:
|
167 |
-
# If an image/document is uploaded, process it
|
168 |
image_file = message_files[0]
|
169 |
yield from process_image_input(image_file, message_text, image_mod, model_choice, analysis_type)
|
170 |
else:
|
171 |
-
# Process plain text inputs
|
172 |
yield from process_text_input(message_text, history, model_choice, analysis_type)
|
173 |
|
174 |
-
# Set up the Gradio interface with
|
175 |
MultiModalAnalyzer = gr.ChatInterface(
|
176 |
fn=multimodal_response,
|
177 |
type="messages",
|
|
|
1 |
import os
|
2 |
import re
|
3 |
import base64
|
4 |
+
from io import BytesIO
|
5 |
+
from functools import lru_cache
|
6 |
+
|
7 |
import gradio as gr
|
8 |
import pdfplumber # For PDF document parsing
|
|
|
9 |
import pytesseract # OCR for extracting text from images
|
10 |
from PIL import Image
|
|
|
|
|
11 |
from huggingface_hub import InferenceClient
|
12 |
from mistralai import Mistral
|
13 |
|
14 |
+
# Initialize clients that don't require heavy model loading
|
15 |
client = InferenceClient(api_key=os.getenv('HF_TOKEN'))
|
16 |
client.headers["x-use-cache"] = "0"
|
17 |
+
|
18 |
api_key = os.getenv("MISTRAL_API_KEY")
|
19 |
Mistralclient = Mistral(api_key=api_key)
|
20 |
|
21 |
+
### Lazy Loading and Caching for Transformers Pipelines ###
|
22 |
+
|
23 |
+
@lru_cache(maxsize=1)
|
24 |
+
def get_summarizer():
|
25 |
+
from transformers import pipeline
|
26 |
+
# Use a smaller model for faster loading
|
27 |
+
return pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
28 |
+
|
29 |
+
@lru_cache(maxsize=1)
|
30 |
+
def get_sentiment_analyzer():
|
31 |
+
from transformers import pipeline
|
32 |
+
return pipeline("sentiment-analysis")
|
33 |
+
|
34 |
+
@lru_cache(maxsize=1)
|
35 |
+
def get_ner_tagger():
|
36 |
+
from transformers import pipeline
|
37 |
+
return pipeline("ner")
|
38 |
+
|
39 |
+
### Helper Functions ###
|
40 |
|
41 |
def encode_image(image_path):
|
42 |
"""Resizes and encodes an image to base64."""
|
|
|
54 |
return None
|
55 |
|
56 |
def extract_text_from_document(file_path):
|
57 |
+
"""Extracts text from a PDF or image document using pdfplumber and OCR."""
|
58 |
text = ""
|
|
|
59 |
if file_path.lower().endswith(".pdf"):
|
60 |
try:
|
61 |
with pdfplumber.open(file_path) as pdf:
|
62 |
for page in pdf.pages:
|
63 |
+
page_text = page.extract_text()
|
64 |
+
if page_text:
|
65 |
+
text += page_text + "\n"
|
66 |
+
if text.strip():
|
67 |
+
return text.strip()
|
68 |
except Exception as e:
|
69 |
print(f"PDF parsing error: {e}")
|
70 |
|
71 |
+
# Fallback to OCR for non-PDF or if PDF parsing yields no text
|
72 |
try:
|
|
|
73 |
image = Image.open(file_path)
|
74 |
text = pytesseract.image_to_string(image)
|
75 |
except Exception as e:
|
|
|
77 |
return text.strip()
|
78 |
|
79 |
def perform_semantic_analysis(text, analysis_type):
|
80 |
+
"""Applies semantic analysis tasks to the provided text using cached pipelines."""
|
81 |
if analysis_type == "Summarization":
|
82 |
+
summarizer = get_summarizer()
|
83 |
return summarizer(text, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
|
84 |
elif analysis_type == "Sentiment Analysis":
|
85 |
+
sentiment_analyzer = get_sentiment_analyzer()
|
86 |
return sentiment_analyzer(text)[0]
|
87 |
elif analysis_type == "Named Entity Recognition":
|
88 |
+
ner_tagger = get_ner_tagger()
|
89 |
return ner_tagger(text)
|
|
|
90 |
return text
|
91 |
|
92 |
def process_text_input(message_text, history, model_choice, analysis_type):
|
93 |
+
"""Processes text-based inputs using selected model and optional semantic analysis."""
|
|
|
|
|
|
|
94 |
if analysis_type and analysis_type != "None":
|
95 |
analysis_result = perform_semantic_analysis(message_text, analysis_type)
|
|
|
96 |
message_text += f"\n\n[Analysis Result]: {analysis_result}"
|
97 |
|
|
|
98 |
input_prompt = [{"role": "user", "content": message_text}]
|
99 |
|
100 |
if model_choice == "mistralai/Mistral-Nemo-Instruct-2411":
|
|
|
119 |
yield temp
|
120 |
|
121 |
def process_image_input(image_file, message_text, image_mod, model_choice, analysis_type):
|
122 |
+
"""Processes image-based inputs, applies OCR, and optional semantic analysis."""
|
123 |
+
# Save the uploaded image temporarily
|
|
|
|
|
|
|
124 |
temp_image_path = "temp_upload.jpg"
|
125 |
image_file.save(temp_image_path)
|
126 |
|
|
|
127 |
extracted_text = extract_text_from_document(temp_image_path)
|
128 |
if extracted_text:
|
129 |
message_text += f"\n\n[Extracted Text]: {extracted_text}"
|
|
|
130 |
if analysis_type and analysis_type != "None":
|
131 |
analysis_result = perform_semantic_analysis(extracted_text, analysis_type)
|
132 |
message_text += f"\n\n[Analysis Result]: {analysis_result}"
|
|
|
165 |
yield partial_message
|
166 |
|
167 |
def multimodal_response(message, history, analyzer_mode, model_choice, image_mod, analysis_type):
|
168 |
+
"""Main response function handling both text and image inputs with analysis."""
|
|
|
|
|
169 |
message_text = message.get("text", "")
|
170 |
message_files = message.get("files", [])
|
171 |
|
172 |
if message_files:
|
|
|
173 |
image_file = message_files[0]
|
174 |
yield from process_image_input(image_file, message_text, image_mod, model_choice, analysis_type)
|
175 |
else:
|
|
|
176 |
yield from process_text_input(message_text, history, model_choice, analysis_type)
|
177 |
|
178 |
+
# Set up the Gradio interface with user customization options
|
179 |
MultiModalAnalyzer = gr.ChatInterface(
|
180 |
fn=multimodal_response,
|
181 |
type="messages",
|