Spaces:

GoodML
/

dishDecode

Running

App Files Files Community

GoodML commited on Nov 26, 2024

Commit

e0b3b4f

verified ·

1 Parent(s): bfd0ee5

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -49

app.py CHANGED Viewed

@@ -1,16 +1,14 @@
 import os
 import whisper
 import requests
-import asyncio
-import aiohttp  # For making async HTTP requests
-from quart import Quart, request, jsonify, render_template
 from dotenv import load_dotenv
 from deepgram import DeepgramClient, PrerecordedOptions
 import warnings
 warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
-app = Quart(__name__)
 print("APP IS RUNNING, ANIKET")
 # Load the .env file
@@ -26,11 +24,9 @@ DEEPGRAM_API_KEY = os.getenv("SECOND_API_KEY")
 if not API_KEY:
     raise ValueError("API Key not found. Make sure it is set in the .env file.")
-# Ensure the API key is loaded correctly
 if not DEEPGRAM_API_KEY:
     raise ValueError("DEEPGRAM_API_KEY not found. Make sure it is set in the .env file.")
 GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
 GEMINI_API_KEY = API_KEY
@@ -41,17 +37,17 @@ GEMINI_API_KEY = API_KEY
 @app.route("/", methods=["GET"])
-async def health_check():
     return jsonify({"status": "success", "message": "API is running successfully!"}), 200
 @app.route("/mbsa")
-async def mbsa():
-    return await render_template("mbsa.html")
 @app.route('/process-audio', methods=['POST'])
-async def process_audio():
     print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
     if 'audio' not in request.files:
@@ -63,8 +59,14 @@ async def process_audio():
     try:
         print("STARTING TRANSCRIPTION, ANIKET")
-        # Step 1: Transcribe the uploaded audio file asynchronously
-        transcription = await transcribe_audio(audio_file)
         print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
@@ -73,21 +75,21 @@ async def process_audio():
         print("GOT THE transcription")
         print("Starting the GEMINI REQUEST TO STRUCTURE IT")
-        # Step 2: Generate structured recipe information using Gemini API asynchronously
-        structured_data = await query_gemini_api(transcription)
         print("GOT THE STRUCTURED DATA", structured_data)
-        # Step 3: Return the structured data
         return jsonify(structured_data)
     except Exception as e:
         return jsonify({"error": str(e)}), 500
-async def transcribe_audio(wav_file_path):
     """
-    Transcribe audio from a video file using Whisper AI (async function).
     Args:
         wav_file_path (str): Path to save the converted WAV file.
@@ -100,14 +102,6 @@ async def transcribe_audio(wav_file_path):
         # Initialize Deepgram client
         deepgram = DeepgramClient(DEEPGRAM_API_KEY)
-        # # Convert video to audio in WAV format using FFmpeg
-        # print("Converting video to audio (WAV format)...")
-        # ffmpeg_command = [
-        #     "ffmpeg", "-i", video_file_path, "-q:a", "0", "-map", "a", wav_file_path
-        # ]
-        # subprocess.run(ffmpeg_command, check=True)
-        # print(f"Conversion successful! WAV file saved at: {wav_file_path}")
         # Open the converted WAV file
         with open(wav_file_path, 'rb') as buffer_data:
             payload = {'buffer': buffer_data}
@@ -142,22 +136,13 @@ async def transcribe_audio(wav_file_path):
                 except KeyError as e:
                     return {"status": "error", "message": f"Error extracting transcript: {e}"}
-                # Path to the text file
-                # output_text_file = "deepGramNovaTranscript.txt"
-                # Write the transcript to the text file
-                # with open(output_text_file, "w", encoding="utf-8") as file:
-                #     file.write(transcript)
-                print(f"Transcript saved to: {output_text_file}")
                 return transcript
             else:
                 return {"status": "error", "message": "Invalid response from Deepgram."}
     except FileNotFoundError:
-        return {"status": "error", "message": f"Video file not found: {video_file_path}"}
-    except subprocess.CalledProcessError as e:
-        return {"status": "error", "message": f"Error during audio conversion: {e}"}
     except Exception as e:
         return {"status": "error", "message": f"Unexpected error: {e}"}
     finally:
@@ -167,9 +152,9 @@ async def transcribe_audio(wav_file_path):
             print(f"Temporary WAV file deleted: {wav_file_path}")
-async def query_gemini_api(transcription):
     """
-    Send transcription text to Gemini API and fetch structured recipe information (async function).
     """
     try:
         # Define the structured prompt
@@ -199,20 +184,22 @@ async def query_gemini_api(transcription):
         }
         headers = {"Content-Type": "application/json"}
-        # Send request to Gemini API asynchronously
-        async with aiohttp.ClientSession() as session:
-            async with session.post(
-                f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
-                json=payload,
-                headers=headers,
-                timeout=60  # 60 seconds timeout for the request
-            ) as response:
-                response.raise_for_status()  # Raise error if response code is not 200
-                data = await response.json()
         return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
-    except aiohttp.ClientError as e:
         print(f"Error querying Gemini API: {e}")
         return {"error": str(e)}

 import os
 import whisper
 import requests
+from flask import Flask, request, jsonify, render_template
 from dotenv import load_dotenv
 from deepgram import DeepgramClient, PrerecordedOptions
 import warnings
 warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
+app = Flask(__name__)
 print("APP IS RUNNING, ANIKET")
 # Load the .env file
 if not API_KEY:
     raise ValueError("API Key not found. Make sure it is set in the .env file.")
 if not DEEPGRAM_API_KEY:
     raise ValueError("DEEPGRAM_API_KEY not found. Make sure it is set in the .env file.")
 GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
 GEMINI_API_KEY = API_KEY
 @app.route("/", methods=["GET"])
+def health_check():
     return jsonify({"status": "success", "message": "API is running successfully!"}), 200
 @app.route("/mbsa")
+def mbsa():
+    return render_template("mbsa.html")
 @app.route('/process-audio', methods=['POST'])
+def process_audio():
     print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
     if 'audio' not in request.files:
     try:
         print("STARTING TRANSCRIPTION, ANIKET")
+        # Step 1: Save the audio file temporarily
+        # Save the audio file to a temporary location for processing
+        temp_audio_path = "/path/to/save/audio.wav"  # Adjust this as needed
+        with open(temp_audio_path, 'wb') as f:
+            f.write(audio_file.read())
+        # Step 2: Transcribe the uploaded audio file synchronously
+        transcription = transcribe_audio(temp_audio_path)
         print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
         print("GOT THE transcription")
+        # Step 3: Generate structured recipe information using Gemini API synchronously
         print("Starting the GEMINI REQUEST TO STRUCTURE IT")
+        structured_data = query_gemini_api(transcription)
         print("GOT THE STRUCTURED DATA", structured_data)
+        # Step 4: Return the structured data
         return jsonify(structured_data)
     except Exception as e:
         return jsonify({"error": str(e)}), 500
+def transcribe_audio(wav_file_path):
     """
+    Transcribe audio from a video file using Deepgram API synchronously.
     Args:
         wav_file_path (str): Path to save the converted WAV file.
         # Initialize Deepgram client
         deepgram = DeepgramClient(DEEPGRAM_API_KEY)
         # Open the converted WAV file
         with open(wav_file_path, 'rb') as buffer_data:
             payload = {'buffer': buffer_data}
                 except KeyError as e:
                     return {"status": "error", "message": f"Error extracting transcript: {e}"}
+                print(f"Transcript obtained: {transcript}")
                 return transcript
             else:
                 return {"status": "error", "message": "Invalid response from Deepgram."}
     except FileNotFoundError:
+        return {"status": "error", "message": f"Video file not found: {wav_file_path}"}
     except Exception as e:
         return {"status": "error", "message": f"Unexpected error: {e}"}
     finally:
             print(f"Temporary WAV file deleted: {wav_file_path}")
+def query_gemini_api(transcription):
     """
+    Send transcription text to Gemini API and fetch structured recipe information synchronously.
     """
     try:
         # Define the structured prompt
         }
         headers = {"Content-Type": "application/json"}
+        # Send request to Gemini API synchronously
+        response = requests.post(
+            f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
+            json=payload,
+            headers=headers,
+            timeout=60  # 60 seconds timeout for the request
+        )
+        # Raise error if response code is not 200
+        response.raise_for_status()
+        data = response.json()
         return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
+    except requests.exceptions.RequestException as e:
         print(f"Error querying Gemini API: {e}")
         return {"error": str(e)}