Spaces:

CosmickVisions
/

Tech-Vision

Build error

App Files Files Community

CosmickVisions commited on Mar 20

Commit

9611f6e

verified ·

1 Parent(s): 58a9554

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -80

app.py CHANGED Viewed

@@ -1,27 +1,30 @@
-import gradio as gr
-import groq
 import os
 import tempfile
 import uuid
-from dotenv import load_dotenv
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.vectorstores import FAISS
-from langchain.embeddings import HuggingFaceEmbeddings
-import fitz  # PyMuPDF
 import base64
-from PIL import Image
 import io
-import requests
 import json
 import re
 from datetime import datetime, timedelta
-import speech_recognition as sr
-import pyttsx3
-import torch
-from transformers import AutoProcessor, AutoModelForVision2Seq
 import numpy as np
 import pandas as pd
 import openpyxl
 # Load environment variables
 load_dotenv()
@@ -49,12 +52,6 @@ def load_docling_model():
 # Initialize SmolDocling model
 docling_processor, docling_model = load_docling_model()
-# Initialize text-to-speech engine
-tts_engine = pyttsx3.init()
-# Set properties for better speech
-tts_engine.setProperty('rate', 150)    # Speed of speech
-tts_engine.setProperty('volume', 0.9)  # Volume (0.0 to 1.0)
 # Custom CSS for Tech theme
 custom_css = """
 :root {
@@ -315,67 +312,6 @@ def analyze_image(image_file):
     except Exception as e:
         return f"Error analyzing image: {str(e)}"
-# Improved function for speech-to-text conversion with status updates
-def speech_to_text(audio_status):
-    try:
-        # Update status to show we're listening
-        audio_status = "Listening... Speak now"
-        yield audio_status, gr.update(visible=True), None
-        r = sr.Recognizer()
-        with sr.Microphone() as source:
-            r.adjust_for_ambient_noise(source)
-            audio = r.listen(source, timeout=5, phrase_time_limit=15)
-            # Update status to show processing
-            audio_status = "Processing speech..."
-            yield audio_status, gr.update(visible=True), None
-            text = r.recognize_google(audio)
-            audio_status = "Speech recognized!"
-            return audio_status, gr.update(visible=False), text
-    except sr.UnknownValueError:
-        audio_status = "Could not understand audio. Please try again."
-        return audio_status, gr.update(visible=False), None
-    except sr.RequestError as e:
-        audio_status = f"Error with speech recognition service: {e}"
-        return audio_status, gr.update(visible=False), None
-    except Exception as e:
-        audio_status = f"Error: {str(e)}"
-        return audio_status, gr.update(visible=False), None
-# Improved function for text-to-speech conversion with pyttsx3
-def text_to_speech(audio_status, history):
-    if not history:
-        return "No text to speak", gr.update(visible=False), None
-    try:
-        # Get the last bot response
-        last_response = history[-1][1]
-        # Clean up the text (remove markdown and other formatting)
-        clean_text = re.sub(r'\*\*|__', '', last_response) # Remove bold/underline
-        clean_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', clean_text) # Remove links
-        clean_text = re.sub(r'#+ ', '', clean_text) # Remove headers
-        clean_text = re.sub(r'```[^`]*```', ' Code block removed for speech. ', clean_text) # Remove code blocks
-        # Update status
-        audio_status = "Generating speech..."
-        yield audio_status, gr.update(visible=True), None
-        # Save to a temporary file
-        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
-        # Use pyttsx3 to generate speech
-        tts_engine.save_to_file(clean_text, temp_file.name)
-        tts_engine.runAndWait()
-        audio_status = "Speech ready!"
-        return audio_status, gr.update(visible=False), temp_file.name
-    except Exception as e:
-        audio_status = f"Error in text-to-speech: {str(e)}"
-        return audio_status, gr.update(visible=False), None
 # Function to handle different file types
 def process_file(file_data, file_type):
     if file_data is None:

+# Standard library imports
 import os
 import tempfile
 import uuid
 import base64
 import io
 import json
 import re
 from datetime import datetime, timedelta
+# Third-party imports
+import gradio as gr
+import groq
 import numpy as np
 import pandas as pd
 import openpyxl
+import requests
+import fitz  # PyMuPDF
+from PIL import Image
+from dotenv import load_dotenv
+from transformers import AutoProcessor, AutoModelForVision2Seq
+import torch
+# LangChain imports
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 # Load environment variables
 load_dotenv()
 # Initialize SmolDocling model
 docling_processor, docling_model = load_docling_model()
 # Custom CSS for Tech theme
 custom_css = """
 :root {
     except Exception as e:
         return f"Error analyzing image: {str(e)}"
 # Function to handle different file types
 def process_file(file_data, file_type):
     if file_data is None: