CosmickVisions commited on
Commit
9611f6e
·
verified ·
1 Parent(s): 58a9554

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -80
app.py CHANGED
@@ -1,27 +1,30 @@
1
- import gradio as gr
2
- import groq
3
  import os
4
  import tempfile
5
  import uuid
6
- from dotenv import load_dotenv
7
- from langchain.text_splitter import RecursiveCharacterTextSplitter
8
- from langchain.vectorstores import FAISS
9
- from langchain.embeddings import HuggingFaceEmbeddings
10
- import fitz # PyMuPDF
11
  import base64
12
- from PIL import Image
13
  import io
14
- import requests
15
  import json
16
  import re
17
  from datetime import datetime, timedelta
18
- import speech_recognition as sr
19
- import pyttsx3
20
- import torch
21
- from transformers import AutoProcessor, AutoModelForVision2Seq
22
  import numpy as np
23
  import pandas as pd
24
  import openpyxl
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  # Load environment variables
27
  load_dotenv()
@@ -49,12 +52,6 @@ def load_docling_model():
49
  # Initialize SmolDocling model
50
  docling_processor, docling_model = load_docling_model()
51
 
52
- # Initialize text-to-speech engine
53
- tts_engine = pyttsx3.init()
54
- # Set properties for better speech
55
- tts_engine.setProperty('rate', 150) # Speed of speech
56
- tts_engine.setProperty('volume', 0.9) # Volume (0.0 to 1.0)
57
-
58
  # Custom CSS for Tech theme
59
  custom_css = """
60
  :root {
@@ -315,67 +312,6 @@ def analyze_image(image_file):
315
  except Exception as e:
316
  return f"Error analyzing image: {str(e)}"
317
 
318
- # Improved function for speech-to-text conversion with status updates
319
- def speech_to_text(audio_status):
320
- try:
321
- # Update status to show we're listening
322
- audio_status = "Listening... Speak now"
323
- yield audio_status, gr.update(visible=True), None
324
-
325
- r = sr.Recognizer()
326
- with sr.Microphone() as source:
327
- r.adjust_for_ambient_noise(source)
328
- audio = r.listen(source, timeout=5, phrase_time_limit=15)
329
-
330
- # Update status to show processing
331
- audio_status = "Processing speech..."
332
- yield audio_status, gr.update(visible=True), None
333
-
334
- text = r.recognize_google(audio)
335
- audio_status = "Speech recognized!"
336
- return audio_status, gr.update(visible=False), text
337
- except sr.UnknownValueError:
338
- audio_status = "Could not understand audio. Please try again."
339
- return audio_status, gr.update(visible=False), None
340
- except sr.RequestError as e:
341
- audio_status = f"Error with speech recognition service: {e}"
342
- return audio_status, gr.update(visible=False), None
343
- except Exception as e:
344
- audio_status = f"Error: {str(e)}"
345
- return audio_status, gr.update(visible=False), None
346
-
347
- # Improved function for text-to-speech conversion with pyttsx3
348
- def text_to_speech(audio_status, history):
349
- if not history:
350
- return "No text to speak", gr.update(visible=False), None
351
-
352
- try:
353
- # Get the last bot response
354
- last_response = history[-1][1]
355
-
356
- # Clean up the text (remove markdown and other formatting)
357
- clean_text = re.sub(r'\*\*|__', '', last_response) # Remove bold/underline
358
- clean_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', clean_text) # Remove links
359
- clean_text = re.sub(r'#+ ', '', clean_text) # Remove headers
360
- clean_text = re.sub(r'```[^`]*```', ' Code block removed for speech. ', clean_text) # Remove code blocks
361
-
362
- # Update status
363
- audio_status = "Generating speech..."
364
- yield audio_status, gr.update(visible=True), None
365
-
366
- # Save to a temporary file
367
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
368
-
369
- # Use pyttsx3 to generate speech
370
- tts_engine.save_to_file(clean_text, temp_file.name)
371
- tts_engine.runAndWait()
372
-
373
- audio_status = "Speech ready!"
374
- return audio_status, gr.update(visible=False), temp_file.name
375
- except Exception as e:
376
- audio_status = f"Error in text-to-speech: {str(e)}"
377
- return audio_status, gr.update(visible=False), None
378
-
379
  # Function to handle different file types
380
  def process_file(file_data, file_type):
381
  if file_data is None:
 
1
+ # Standard library imports
 
2
  import os
3
  import tempfile
4
  import uuid
 
 
 
 
 
5
  import base64
 
6
  import io
 
7
  import json
8
  import re
9
  from datetime import datetime, timedelta
10
+
11
+ # Third-party imports
12
+ import gradio as gr
13
+ import groq
14
  import numpy as np
15
  import pandas as pd
16
  import openpyxl
17
+ import requests
18
+ import fitz # PyMuPDF
19
+ from PIL import Image
20
+ from dotenv import load_dotenv
21
+ from transformers import AutoProcessor, AutoModelForVision2Seq
22
+ import torch
23
+
24
+ # LangChain imports
25
+ from langchain_community.embeddings import HuggingFaceEmbeddings
26
+ from langchain_community.vectorstores import FAISS
27
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
28
 
29
  # Load environment variables
30
  load_dotenv()
 
52
  # Initialize SmolDocling model
53
  docling_processor, docling_model = load_docling_model()
54
 
 
 
 
 
 
 
55
  # Custom CSS for Tech theme
56
  custom_css = """
57
  :root {
 
312
  except Exception as e:
313
  return f"Error analyzing image: {str(e)}"
314
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
  # Function to handle different file types
316
  def process_file(file_data, file_type):
317
  if file_data is None: