Spaces:

mgbam
/

SmartDocAnalyzer

Sleeping

App Files Files Community

mgbam commited on Jan 9

Commit

4d8b824

verified ·

1 Parent(s): 6c73cac

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -36

app.py CHANGED Viewed

@@ -1,27 +1,42 @@
 import os
 import re
 import base64
 import gradio as gr
 import pdfplumber  # For PDF document parsing
-import fitz  # PyMuPDF for advanced PDF handling (alternative to pdfplumber)
 import pytesseract  # OCR for extracting text from images
 from PIL import Image
-from io import BytesIO
-from transformers import pipeline  # For semantic analysis tasks
 from huggingface_hub import InferenceClient
 from mistralai import Mistral
-# Initialize inference clients for different models
 client = InferenceClient(api_key=os.getenv('HF_TOKEN'))
 client.headers["x-use-cache"] = "0"
 api_key = os.getenv("MISTRAL_API_KEY")
 Mistralclient = Mistral(api_key=api_key)
-# Initialize semantic analysis pipelines using transformers (for local tasks)
-# Example: summarization, sentiment-analysis, named-entity-recognition, etc.
-summarizer = pipeline("summarization")
-sentiment_analyzer = pipeline("sentiment-analysis")
-ner_tagger = pipeline("ner")
 def encode_image(image_path):
     """Resizes and encodes an image to base64."""
@@ -39,21 +54,22 @@ def encode_image(image_path):
         return None
 def extract_text_from_document(file_path):
-    """Extracts text from a PDF or image document."""
     text = ""
-    # Try PDF parsing with pdfplumber
     if file_path.lower().endswith(".pdf"):
         try:
             with pdfplumber.open(file_path) as pdf:
                 for page in pdf.pages:
-                    text += page.extract_text() + "\n"
-            return text.strip()
         except Exception as e:
             print(f"PDF parsing error: {e}")
-    # If not PDF or parsing fails, attempt OCR on the first page of an image-based PDF or an image file.
     try:
-        # Open the file as an image for OCR
         image = Image.open(file_path)
         text = pytesseract.image_to_string(image)
     except Exception as e:
@@ -61,27 +77,24 @@ def extract_text_from_document(file_path):
     return text.strip()
 def perform_semantic_analysis(text, analysis_type):
-    """Applies semantic analysis tasks to the provided text."""
     if analysis_type == "Summarization":
         return summarizer(text, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
     elif analysis_type == "Sentiment Analysis":
         return sentiment_analyzer(text)[0]
     elif analysis_type == "Named Entity Recognition":
         return ner_tagger(text)
-    # Add more analysis types as needed
     return text
 def process_text_input(message_text, history, model_choice, analysis_type):
-    """
-    Process text-based inputs using selected model and apply semantic analysis if requested.
-    """
-    # Optionally perform semantic analysis before sending to the model
     if analysis_type and analysis_type != "None":
         analysis_result = perform_semantic_analysis(message_text, analysis_type)
-        # Incorporate analysis_result into prompt or display separately
         message_text += f"\n\n[Analysis Result]: {analysis_result}"
-    # Construct a prompt for model inference
     input_prompt = [{"role": "user", "content": message_text}]
     if model_choice == "mistralai/Mistral-Nemo-Instruct-2411":
@@ -106,19 +119,14 @@ def process_text_input(message_text, history, model_choice, analysis_type):
                 yield temp
 def process_image_input(image_file, message_text, image_mod, model_choice, analysis_type):
-    """
-    Process image-based inputs using selected model and mode.
-    Applies OCR if needed and semantic analysis.
-    """
-    # Save uploaded image temporarily to extract text if necessary
     temp_image_path = "temp_upload.jpg"
     image_file.save(temp_image_path)
-    # Extract text from document/image using OCR if needed
     extracted_text = extract_text_from_document(temp_image_path)
     if extracted_text:
         message_text += f"\n\n[Extracted Text]: {extracted_text}"
-        # Optionally perform semantic analysis on the extracted text
         if analysis_type and analysis_type != "None":
             analysis_result = perform_semantic_analysis(extracted_text, analysis_type)
             message_text += f"\n\n[Analysis Result]: {analysis_result}"
@@ -157,21 +165,17 @@ def process_image_input(image_file, message_text, image_mod, model_choice, analy
                 yield partial_message
 def multimodal_response(message, history, analyzer_mode, model_choice, image_mod, analysis_type):
-    """
-    Main response function that handles text and image inputs, applies parsing, OCR, and semantic analysis.
-    """
     message_text = message.get("text", "")
     message_files = message.get("files", [])
     if message_files:
-        # If an image/document is uploaded, process it
         image_file = message_files[0]
         yield from process_image_input(image_file, message_text, image_mod, model_choice, analysis_type)
     else:
-        # Process plain text inputs
         yield from process_text_input(message_text, history, model_choice, analysis_type)
-# Set up the Gradio interface with additional user customization options
 MultiModalAnalyzer = gr.ChatInterface(
     fn=multimodal_response,
     type="messages",

 import os
 import re
 import base64
+from io import BytesIO
+from functools import lru_cache
 import gradio as gr
 import pdfplumber  # For PDF document parsing
 import pytesseract  # OCR for extracting text from images
 from PIL import Image
 from huggingface_hub import InferenceClient
 from mistralai import Mistral
+# Initialize clients that don't require heavy model loading
 client = InferenceClient(api_key=os.getenv('HF_TOKEN'))
 client.headers["x-use-cache"] = "0"
 api_key = os.getenv("MISTRAL_API_KEY")
 Mistralclient = Mistral(api_key=api_key)
+### Lazy Loading and Caching for Transformers Pipelines ###
+@lru_cache(maxsize=1)
+def get_summarizer():
+    from transformers import pipeline
+    # Use a smaller model for faster loading
+    return pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
+@lru_cache(maxsize=1)
+def get_sentiment_analyzer():
+    from transformers import pipeline
+    return pipeline("sentiment-analysis")
+@lru_cache(maxsize=1)
+def get_ner_tagger():
+    from transformers import pipeline
+    return pipeline("ner")
+### Helper Functions ###
 def encode_image(image_path):
     """Resizes and encodes an image to base64."""
         return None
 def extract_text_from_document(file_path):
+    """Extracts text from a PDF or image document using pdfplumber and OCR."""
     text = ""
     if file_path.lower().endswith(".pdf"):
         try:
             with pdfplumber.open(file_path) as pdf:
                 for page in pdf.pages:
+                    page_text = page.extract_text()
+                    if page_text:
+                        text += page_text + "\n"
+            if text.strip():
+                return text.strip()
         except Exception as e:
             print(f"PDF parsing error: {e}")
+    # Fallback to OCR for non-PDF or if PDF parsing yields no text
     try:
         image = Image.open(file_path)
         text = pytesseract.image_to_string(image)
     except Exception as e:
     return text.strip()
 def perform_semantic_analysis(text, analysis_type):
+    """Applies semantic analysis tasks to the provided text using cached pipelines."""
     if analysis_type == "Summarization":
+        summarizer = get_summarizer()
         return summarizer(text, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
     elif analysis_type == "Sentiment Analysis":
+        sentiment_analyzer = get_sentiment_analyzer()
         return sentiment_analyzer(text)[0]
     elif analysis_type == "Named Entity Recognition":
+        ner_tagger = get_ner_tagger()
         return ner_tagger(text)
     return text
 def process_text_input(message_text, history, model_choice, analysis_type):
+    """Processes text-based inputs using selected model and optional semantic analysis."""
     if analysis_type and analysis_type != "None":
         analysis_result = perform_semantic_analysis(message_text, analysis_type)
         message_text += f"\n\n[Analysis Result]: {analysis_result}"
     input_prompt = [{"role": "user", "content": message_text}]
     if model_choice == "mistralai/Mistral-Nemo-Instruct-2411":
                 yield temp
 def process_image_input(image_file, message_text, image_mod, model_choice, analysis_type):
+    """Processes image-based inputs, applies OCR, and optional semantic analysis."""
+    # Save the uploaded image temporarily
     temp_image_path = "temp_upload.jpg"
     image_file.save(temp_image_path)
     extracted_text = extract_text_from_document(temp_image_path)
     if extracted_text:
         message_text += f"\n\n[Extracted Text]: {extracted_text}"
         if analysis_type and analysis_type != "None":
             analysis_result = perform_semantic_analysis(extracted_text, analysis_type)
             message_text += f"\n\n[Analysis Result]: {analysis_result}"
                 yield partial_message
 def multimodal_response(message, history, analyzer_mode, model_choice, image_mod, analysis_type):
+    """Main response function handling both text and image inputs with analysis."""
     message_text = message.get("text", "")
     message_files = message.get("files", [])
     if message_files:
         image_file = message_files[0]
         yield from process_image_input(image_file, message_text, image_mod, model_choice, analysis_type)
     else:
         yield from process_text_input(message_text, history, model_choice, analysis_type)
+# Set up the Gradio interface with user customization options
 MultiModalAnalyzer = gr.ChatInterface(
     fn=multimodal_response,
     type="messages",