Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,27 +1,30 @@
|
|
1 |
-
|
2 |
-
import groq
|
3 |
import os
|
4 |
import tempfile
|
5 |
import uuid
|
6 |
-
from dotenv import load_dotenv
|
7 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
8 |
-
from langchain.vectorstores import FAISS
|
9 |
-
from langchain.embeddings import HuggingFaceEmbeddings
|
10 |
-
import fitz # PyMuPDF
|
11 |
import base64
|
12 |
-
from PIL import Image
|
13 |
import io
|
14 |
-
import requests
|
15 |
import json
|
16 |
import re
|
17 |
from datetime import datetime, timedelta
|
18 |
-
|
19 |
-
|
20 |
-
import
|
21 |
-
|
22 |
import numpy as np
|
23 |
import pandas as pd
|
24 |
import openpyxl
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
# Load environment variables
|
27 |
load_dotenv()
|
@@ -49,12 +52,6 @@ def load_docling_model():
|
|
49 |
# Initialize SmolDocling model
|
50 |
docling_processor, docling_model = load_docling_model()
|
51 |
|
52 |
-
# Initialize text-to-speech engine
|
53 |
-
tts_engine = pyttsx3.init()
|
54 |
-
# Set properties for better speech
|
55 |
-
tts_engine.setProperty('rate', 150) # Speed of speech
|
56 |
-
tts_engine.setProperty('volume', 0.9) # Volume (0.0 to 1.0)
|
57 |
-
|
58 |
# Custom CSS for Tech theme
|
59 |
custom_css = """
|
60 |
:root {
|
@@ -315,67 +312,6 @@ def analyze_image(image_file):
|
|
315 |
except Exception as e:
|
316 |
return f"Error analyzing image: {str(e)}"
|
317 |
|
318 |
-
# Improved function for speech-to-text conversion with status updates
|
319 |
-
def speech_to_text(audio_status):
|
320 |
-
try:
|
321 |
-
# Update status to show we're listening
|
322 |
-
audio_status = "Listening... Speak now"
|
323 |
-
yield audio_status, gr.update(visible=True), None
|
324 |
-
|
325 |
-
r = sr.Recognizer()
|
326 |
-
with sr.Microphone() as source:
|
327 |
-
r.adjust_for_ambient_noise(source)
|
328 |
-
audio = r.listen(source, timeout=5, phrase_time_limit=15)
|
329 |
-
|
330 |
-
# Update status to show processing
|
331 |
-
audio_status = "Processing speech..."
|
332 |
-
yield audio_status, gr.update(visible=True), None
|
333 |
-
|
334 |
-
text = r.recognize_google(audio)
|
335 |
-
audio_status = "Speech recognized!"
|
336 |
-
return audio_status, gr.update(visible=False), text
|
337 |
-
except sr.UnknownValueError:
|
338 |
-
audio_status = "Could not understand audio. Please try again."
|
339 |
-
return audio_status, gr.update(visible=False), None
|
340 |
-
except sr.RequestError as e:
|
341 |
-
audio_status = f"Error with speech recognition service: {e}"
|
342 |
-
return audio_status, gr.update(visible=False), None
|
343 |
-
except Exception as e:
|
344 |
-
audio_status = f"Error: {str(e)}"
|
345 |
-
return audio_status, gr.update(visible=False), None
|
346 |
-
|
347 |
-
# Improved function for text-to-speech conversion with pyttsx3
|
348 |
-
def text_to_speech(audio_status, history):
|
349 |
-
if not history:
|
350 |
-
return "No text to speak", gr.update(visible=False), None
|
351 |
-
|
352 |
-
try:
|
353 |
-
# Get the last bot response
|
354 |
-
last_response = history[-1][1]
|
355 |
-
|
356 |
-
# Clean up the text (remove markdown and other formatting)
|
357 |
-
clean_text = re.sub(r'\*\*|__', '', last_response) # Remove bold/underline
|
358 |
-
clean_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', clean_text) # Remove links
|
359 |
-
clean_text = re.sub(r'#+ ', '', clean_text) # Remove headers
|
360 |
-
clean_text = re.sub(r'```[^`]*```', ' Code block removed for speech. ', clean_text) # Remove code blocks
|
361 |
-
|
362 |
-
# Update status
|
363 |
-
audio_status = "Generating speech..."
|
364 |
-
yield audio_status, gr.update(visible=True), None
|
365 |
-
|
366 |
-
# Save to a temporary file
|
367 |
-
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
|
368 |
-
|
369 |
-
# Use pyttsx3 to generate speech
|
370 |
-
tts_engine.save_to_file(clean_text, temp_file.name)
|
371 |
-
tts_engine.runAndWait()
|
372 |
-
|
373 |
-
audio_status = "Speech ready!"
|
374 |
-
return audio_status, gr.update(visible=False), temp_file.name
|
375 |
-
except Exception as e:
|
376 |
-
audio_status = f"Error in text-to-speech: {str(e)}"
|
377 |
-
return audio_status, gr.update(visible=False), None
|
378 |
-
|
379 |
# Function to handle different file types
|
380 |
def process_file(file_data, file_type):
|
381 |
if file_data is None:
|
|
|
1 |
+
# Standard library imports
|
|
|
2 |
import os
|
3 |
import tempfile
|
4 |
import uuid
|
|
|
|
|
|
|
|
|
|
|
5 |
import base64
|
|
|
6 |
import io
|
|
|
7 |
import json
|
8 |
import re
|
9 |
from datetime import datetime, timedelta
|
10 |
+
|
11 |
+
# Third-party imports
|
12 |
+
import gradio as gr
|
13 |
+
import groq
|
14 |
import numpy as np
|
15 |
import pandas as pd
|
16 |
import openpyxl
|
17 |
+
import requests
|
18 |
+
import fitz # PyMuPDF
|
19 |
+
from PIL import Image
|
20 |
+
from dotenv import load_dotenv
|
21 |
+
from transformers import AutoProcessor, AutoModelForVision2Seq
|
22 |
+
import torch
|
23 |
+
|
24 |
+
# LangChain imports
|
25 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
26 |
+
from langchain_community.vectorstores import FAISS
|
27 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
28 |
|
29 |
# Load environment variables
|
30 |
load_dotenv()
|
|
|
52 |
# Initialize SmolDocling model
|
53 |
docling_processor, docling_model = load_docling_model()
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
# Custom CSS for Tech theme
|
56 |
custom_css = """
|
57 |
:root {
|
|
|
312 |
except Exception as e:
|
313 |
return f"Error analyzing image: {str(e)}"
|
314 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
315 |
# Function to handle different file types
|
316 |
def process_file(file_data, file_type):
|
317 |
if file_data is None:
|