mgbam commited on
Commit
4d8b824
·
verified ·
1 Parent(s): 6c73cac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -36
app.py CHANGED
@@ -1,27 +1,42 @@
1
  import os
2
  import re
3
  import base64
 
 
 
4
  import gradio as gr
5
  import pdfplumber # For PDF document parsing
6
- import fitz # PyMuPDF for advanced PDF handling (alternative to pdfplumber)
7
  import pytesseract # OCR for extracting text from images
8
  from PIL import Image
9
- from io import BytesIO
10
- from transformers import pipeline # For semantic analysis tasks
11
  from huggingface_hub import InferenceClient
12
  from mistralai import Mistral
13
 
14
- # Initialize inference clients for different models
15
  client = InferenceClient(api_key=os.getenv('HF_TOKEN'))
16
  client.headers["x-use-cache"] = "0"
 
17
  api_key = os.getenv("MISTRAL_API_KEY")
18
  Mistralclient = Mistral(api_key=api_key)
19
 
20
- # Initialize semantic analysis pipelines using transformers (for local tasks)
21
- # Example: summarization, sentiment-analysis, named-entity-recognition, etc.
22
- summarizer = pipeline("summarization")
23
- sentiment_analyzer = pipeline("sentiment-analysis")
24
- ner_tagger = pipeline("ner")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def encode_image(image_path):
27
  """Resizes and encodes an image to base64."""
@@ -39,21 +54,22 @@ def encode_image(image_path):
39
  return None
40
 
41
  def extract_text_from_document(file_path):
42
- """Extracts text from a PDF or image document."""
43
  text = ""
44
- # Try PDF parsing with pdfplumber
45
  if file_path.lower().endswith(".pdf"):
46
  try:
47
  with pdfplumber.open(file_path) as pdf:
48
  for page in pdf.pages:
49
- text += page.extract_text() + "\n"
50
- return text.strip()
 
 
 
51
  except Exception as e:
52
  print(f"PDF parsing error: {e}")
53
 
54
- # If not PDF or parsing fails, attempt OCR on the first page of an image-based PDF or an image file.
55
  try:
56
- # Open the file as an image for OCR
57
  image = Image.open(file_path)
58
  text = pytesseract.image_to_string(image)
59
  except Exception as e:
@@ -61,27 +77,24 @@ def extract_text_from_document(file_path):
61
  return text.strip()
62
 
63
  def perform_semantic_analysis(text, analysis_type):
64
- """Applies semantic analysis tasks to the provided text."""
65
  if analysis_type == "Summarization":
 
66
  return summarizer(text, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
67
  elif analysis_type == "Sentiment Analysis":
 
68
  return sentiment_analyzer(text)[0]
69
  elif analysis_type == "Named Entity Recognition":
 
70
  return ner_tagger(text)
71
- # Add more analysis types as needed
72
  return text
73
 
74
  def process_text_input(message_text, history, model_choice, analysis_type):
75
- """
76
- Process text-based inputs using selected model and apply semantic analysis if requested.
77
- """
78
- # Optionally perform semantic analysis before sending to the model
79
  if analysis_type and analysis_type != "None":
80
  analysis_result = perform_semantic_analysis(message_text, analysis_type)
81
- # Incorporate analysis_result into prompt or display separately
82
  message_text += f"\n\n[Analysis Result]: {analysis_result}"
83
 
84
- # Construct a prompt for model inference
85
  input_prompt = [{"role": "user", "content": message_text}]
86
 
87
  if model_choice == "mistralai/Mistral-Nemo-Instruct-2411":
@@ -106,19 +119,14 @@ def process_text_input(message_text, history, model_choice, analysis_type):
106
  yield temp
107
 
108
  def process_image_input(image_file, message_text, image_mod, model_choice, analysis_type):
109
- """
110
- Process image-based inputs using selected model and mode.
111
- Applies OCR if needed and semantic analysis.
112
- """
113
- # Save uploaded image temporarily to extract text if necessary
114
  temp_image_path = "temp_upload.jpg"
115
  image_file.save(temp_image_path)
116
 
117
- # Extract text from document/image using OCR if needed
118
  extracted_text = extract_text_from_document(temp_image_path)
119
  if extracted_text:
120
  message_text += f"\n\n[Extracted Text]: {extracted_text}"
121
- # Optionally perform semantic analysis on the extracted text
122
  if analysis_type and analysis_type != "None":
123
  analysis_result = perform_semantic_analysis(extracted_text, analysis_type)
124
  message_text += f"\n\n[Analysis Result]: {analysis_result}"
@@ -157,21 +165,17 @@ def process_image_input(image_file, message_text, image_mod, model_choice, analy
157
  yield partial_message
158
 
159
  def multimodal_response(message, history, analyzer_mode, model_choice, image_mod, analysis_type):
160
- """
161
- Main response function that handles text and image inputs, applies parsing, OCR, and semantic analysis.
162
- """
163
  message_text = message.get("text", "")
164
  message_files = message.get("files", [])
165
 
166
  if message_files:
167
- # If an image/document is uploaded, process it
168
  image_file = message_files[0]
169
  yield from process_image_input(image_file, message_text, image_mod, model_choice, analysis_type)
170
  else:
171
- # Process plain text inputs
172
  yield from process_text_input(message_text, history, model_choice, analysis_type)
173
 
174
- # Set up the Gradio interface with additional user customization options
175
  MultiModalAnalyzer = gr.ChatInterface(
176
  fn=multimodal_response,
177
  type="messages",
 
1
  import os
2
  import re
3
  import base64
4
+ from io import BytesIO
5
+ from functools import lru_cache
6
+
7
  import gradio as gr
8
  import pdfplumber # For PDF document parsing
 
9
  import pytesseract # OCR for extracting text from images
10
  from PIL import Image
 
 
11
  from huggingface_hub import InferenceClient
12
  from mistralai import Mistral
13
 
14
+ # Initialize clients that don't require heavy model loading
15
  client = InferenceClient(api_key=os.getenv('HF_TOKEN'))
16
  client.headers["x-use-cache"] = "0"
17
+
18
  api_key = os.getenv("MISTRAL_API_KEY")
19
  Mistralclient = Mistral(api_key=api_key)
20
 
21
+ ### Lazy Loading and Caching for Transformers Pipelines ###
22
+
23
+ @lru_cache(maxsize=1)
24
+ def get_summarizer():
25
+ from transformers import pipeline
26
+ # Use a smaller model for faster loading
27
+ return pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
28
+
29
+ @lru_cache(maxsize=1)
30
+ def get_sentiment_analyzer():
31
+ from transformers import pipeline
32
+ return pipeline("sentiment-analysis")
33
+
34
+ @lru_cache(maxsize=1)
35
+ def get_ner_tagger():
36
+ from transformers import pipeline
37
+ return pipeline("ner")
38
+
39
+ ### Helper Functions ###
40
 
41
  def encode_image(image_path):
42
  """Resizes and encodes an image to base64."""
 
54
  return None
55
 
56
  def extract_text_from_document(file_path):
57
+ """Extracts text from a PDF or image document using pdfplumber and OCR."""
58
  text = ""
 
59
  if file_path.lower().endswith(".pdf"):
60
  try:
61
  with pdfplumber.open(file_path) as pdf:
62
  for page in pdf.pages:
63
+ page_text = page.extract_text()
64
+ if page_text:
65
+ text += page_text + "\n"
66
+ if text.strip():
67
+ return text.strip()
68
  except Exception as e:
69
  print(f"PDF parsing error: {e}")
70
 
71
+ # Fallback to OCR for non-PDF or if PDF parsing yields no text
72
  try:
 
73
  image = Image.open(file_path)
74
  text = pytesseract.image_to_string(image)
75
  except Exception as e:
 
77
  return text.strip()
78
 
79
  def perform_semantic_analysis(text, analysis_type):
80
+ """Applies semantic analysis tasks to the provided text using cached pipelines."""
81
  if analysis_type == "Summarization":
82
+ summarizer = get_summarizer()
83
  return summarizer(text, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
84
  elif analysis_type == "Sentiment Analysis":
85
+ sentiment_analyzer = get_sentiment_analyzer()
86
  return sentiment_analyzer(text)[0]
87
  elif analysis_type == "Named Entity Recognition":
88
+ ner_tagger = get_ner_tagger()
89
  return ner_tagger(text)
 
90
  return text
91
 
92
  def process_text_input(message_text, history, model_choice, analysis_type):
93
+ """Processes text-based inputs using selected model and optional semantic analysis."""
 
 
 
94
  if analysis_type and analysis_type != "None":
95
  analysis_result = perform_semantic_analysis(message_text, analysis_type)
 
96
  message_text += f"\n\n[Analysis Result]: {analysis_result}"
97
 
 
98
  input_prompt = [{"role": "user", "content": message_text}]
99
 
100
  if model_choice == "mistralai/Mistral-Nemo-Instruct-2411":
 
119
  yield temp
120
 
121
  def process_image_input(image_file, message_text, image_mod, model_choice, analysis_type):
122
+ """Processes image-based inputs, applies OCR, and optional semantic analysis."""
123
+ # Save the uploaded image temporarily
 
 
 
124
  temp_image_path = "temp_upload.jpg"
125
  image_file.save(temp_image_path)
126
 
 
127
  extracted_text = extract_text_from_document(temp_image_path)
128
  if extracted_text:
129
  message_text += f"\n\n[Extracted Text]: {extracted_text}"
 
130
  if analysis_type and analysis_type != "None":
131
  analysis_result = perform_semantic_analysis(extracted_text, analysis_type)
132
  message_text += f"\n\n[Analysis Result]: {analysis_result}"
 
165
  yield partial_message
166
 
167
  def multimodal_response(message, history, analyzer_mode, model_choice, image_mod, analysis_type):
168
+ """Main response function handling both text and image inputs with analysis."""
 
 
169
  message_text = message.get("text", "")
170
  message_files = message.get("files", [])
171
 
172
  if message_files:
 
173
  image_file = message_files[0]
174
  yield from process_image_input(image_file, message_text, image_mod, model_choice, analysis_type)
175
  else:
 
176
  yield from process_text_input(message_text, history, model_choice, analysis_type)
177
 
178
+ # Set up the Gradio interface with user customization options
179
  MultiModalAnalyzer = gr.ChatInterface(
180
  fn=multimodal_response,
181
  type="messages",