Spaces:

GoodML
/

dishDecode

Running

App Files Files Community

GoodML commited on Nov 26, 2024

Commit

d509284

verified ·

1 Parent(s): 9893fb1

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -16

app.py CHANGED Viewed

@@ -5,6 +5,8 @@ import asyncio
 import aiohttp  # For making async HTTP requests
 from quart import Quart, request, jsonify, render_template
 from dotenv import load_dotenv
 import warnings
 warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
@@ -18,11 +20,17 @@ print("ENV LOADED, ANIKET")
 # Fetch the API key from the .env file
 API_KEY = os.getenv("FIRST_API_KEY")
 # Ensure the API key is loaded correctly
 if not API_KEY:
     raise ValueError("API Key not found. Make sure it is set in the .env file.")
 GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
 GEMINI_API_KEY = API_KEY
@@ -77,26 +85,96 @@ async def process_audio():
         return jsonify({"error": str(e)}), 500
-async def transcribe_audio(audio_file):
-    """
-    Transcribe audio using Whisper AI (async function).
-    """
-    print("CAME IN THE transcribe audio function")
-    try:
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
-            audio_file.save(temp_audio_file.name)
-            print(f"Temporary audio file saved: {temp_audio_file.name}")
-            # Run Whisper transcription asynchronously
-            loop = asyncio.get_event_loop()
-            result = await loop.run_in_executor(None, whisper_model.transcribe, temp_audio_file.name)
-            print("THE RESULTS ARE", result)
-        return result.get("text", "").strip()
     except Exception as e:
-        print(f"Error in transcription: {e}")
-        return None
 async def query_gemini_api(transcription):

 import aiohttp  # For making async HTTP requests
 from quart import Quart, request, jsonify, render_template
 from dotenv import load_dotenv
+from deepgram import DeepgramClient, PrerecordedOptions
 import warnings
 warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
 # Fetch the API key from the .env file
 API_KEY = os.getenv("FIRST_API_KEY")
+DEEPGRAM_API_KEY = os.getenv("SECOND_API_KEY")
 # Ensure the API key is loaded correctly
 if not API_KEY:
     raise ValueError("API Key not found. Make sure it is set in the .env file.")
+# Ensure the API key is loaded correctly
+if not DEEPGRAM_API_KEY:
+    raise ValueError("DEEPGRAM_API_KEY not found. Make sure it is set in the .env file.")
 GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
 GEMINI_API_KEY = API_KEY
         return jsonify({"error": str(e)}), 500
+import subprocess
+import os
+import json
+from deepgram.clients import DeepgramClient
+from deepgram.options import PrerecordedOptions
+# Replace with your actual Deepgram API key
+DEEPGRAM_API_KEY = "your_deepgram_api_key"
+async def transcribe_audio(video_file_path, wav_file_path):
+    """
+    Transcribe audio from a video file using Whisper AI (async function).
+    Args:
+        video_file_path (str): Path to the input video file.
+        wav_file_path (str): Path to save the converted WAV file.
+    Returns:
+        dict: A dictionary containing status, transcript, or error message.
+    """
+    print("Entered the transcribe_audio function")
+    try:
+        # Initialize Deepgram client
+        deepgram = DeepgramClient(DEEPGRAM_API_KEY)
+        # Convert video to audio in WAV format using FFmpeg
+        print("Converting video to audio (WAV format)...")
+        ffmpeg_command = [
+            "ffmpeg", "-i", video_file_path, "-q:a", "0", "-map", "a", wav_file_path
+        ]
+        subprocess.run(ffmpeg_command, check=True)
+        print(f"Conversion successful! WAV file saved at: {wav_file_path}")
+        # Open the converted WAV file
+        with open(wav_file_path, 'rb') as buffer_data:
+            payload = {'buffer': buffer_data}
+            # Configure transcription options
+            options = PrerecordedOptions(
+                smart_format=True, model="nova-2", language="en-US"
+            )
+            # Transcribe the audio
+            response = deepgram.listen.prerecorded.v('1').transcribe_file(payload, options)
+            # Check if the response is valid
+            if response:
+                print("Request successful! Processing response.")
+                # Convert response to JSON string
+                try:
+                    data_str = response.to_json(indent=4)
+                except AttributeError as e:
+                    return {"status": "error", "message": f"Error converting response to JSON: {e}"}
+                # Parse the JSON string to a Python dictionary
+                try:
+                    data = json.loads(data_str)
+                except json.JSONDecodeError as e:
+                    return {"status": "error", "message": f"Error parsing JSON string: {e}"}
+                # Extract the transcript
+                try:
+                    transcript = data["results"]["channels"][0]["alternatives"][0]["transcript"]
+                except KeyError as e:
+                    return {"status": "error", "message": f"Error extracting transcript: {e}"}
+                # Path to the text file
+                output_text_file = "deepGramNovaTranscript.txt"
+                # Write the transcript to the text file
+                with open(output_text_file, "w", encoding="utf-8") as file:
+                    file.write(transcript)
+                print(f"Transcript saved to: {output_text_file}")
+                return {"status": "success", "transcript": transcript, "file_path": output_text_file}
+            else:
+                return {"status": "error", "message": "Invalid response from Deepgram."}
+    except FileNotFoundError:
+        return {"status": "error", "message": f"Video file not found: {video_file_path}"}
+    except subprocess.CalledProcessError as e:
+        return {"status": "error", "message": f"Error during audio conversion: {e}"}
     except Exception as e:
+        return {"status": "error", "message": f"Unexpected error: {e}"}
+    finally:
+        # Clean up the temporary WAV file
+        if os.path.exists(wav_file_path):
+            os.remove(wav_file_path)
+            print(f"Temporary WAV file deleted: {wav_file_path}")
 async def query_gemini_api(transcription):