Spaces:

GoodML
/

dishDecode

Running

App Files Files Community

GoodML commited on Nov 25, 2024

Commit

a0fefdf

verified ·

1 Parent(s): 22596d6

Debugging

Browse files

Files changed (1) hide show

app.py +25 -6

app.py CHANGED Viewed

@@ -160,12 +160,14 @@ from flask import Flask, request, jsonify, render_template
 import tempfile
 app = Flask(__name__)
 # Gemini API settings
 from dotenv import load_dotenv
 # Load the .env file
 load_dotenv()
 # Fetch the API key from the .env file
 API_KEY = os.getenv("FIRST_API_KEY")
@@ -176,10 +178,11 @@ if not API_KEY:
 GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
 GEMINI_API_KEY = API_KEY
 # Load Whisper AI model at startup
-print("Loading Whisper AI model...")
 whisper_model = whisper.load_model("base")  # Choose model size: tiny, base, small, medium, large
-print("Whisper AI model loaded successfully.")
 # Define the "/" endpoint for health check
@@ -193,38 +196,50 @@ def mbsa():
 @app.route('/process-video', methods=['POST'])
 def process_video():
     """
     Flask endpoint to process video:
     1. Extract audio and transcribe using Whisper AI.
     2. Send transcription to Gemini API for recipe information extraction.
     3. Return structured data in the response.
     """
     if 'video' not in request.files:
         return jsonify({"error": "No video file provided"}), 400
     video_file = request.files['video']
     try:
         # Step 1: Save video to a temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
             video_file.save(temp_video_file.name)
             print(f"Video file saved: {temp_video_file.name}")
             # Step 2: Extract audio from video using ffmpeg (waiting for completion)
             audio_path = extract_audio(temp_video_file.name)
             if not audio_path:
                 return jsonify({"error": "Audio extraction failed"}), 500
             # Step 3: Transcribe the audio using Whisper AI (waiting for completion)
             transcription = transcribe_audio(audio_path)
             if not transcription:
                 return jsonify({"error": "Audio transcription failed"}), 500
             # Step 4: Generate structured recipe information using Gemini API (waiting for completion)
             structured_data = query_gemini_api(transcription)
             # Step 5: Return the structured data
             return jsonify(structured_data)
@@ -266,10 +281,14 @@ def transcribe_audio(audio_path):
     """
     Transcribe audio using Whisper AI.
     """
     try:
         # Transcribe audio using Whisper AI
         print("Transcribing audio...")
         result = whisper_model.transcribe(audio_path)
         return result.get("text", "").strip()
     except Exception as e:

 import tempfile
 app = Flask(__name__)
+print("APP IS RUNNING, ANIKET")
 # Gemini API settings
 from dotenv import load_dotenv
 # Load the .env file
 load_dotenv()
+print("ENV LOADED, ANIKET")
 # Fetch the API key from the .env file
 API_KEY = os.getenv("FIRST_API_KEY")
 GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
 GEMINI_API_KEY = API_KEY
 # Load Whisper AI model at startup
+print("Loading Whisper AI model..., ANIKET")
 whisper_model = whisper.load_model("base")  # Choose model size: tiny, base, small, medium, large
+print("Whisper AI model loaded successfully, ANIKET")
 # Define the "/" endpoint for health check
 @app.route('/process-video', methods=['POST'])
 def process_video():
+    print("GOT THE PROCESS VIDEO REQUEST, ANIKET")
     """
     Flask endpoint to process video:
     1. Extract audio and transcribe using Whisper AI.
     2. Send transcription to Gemini API for recipe information extraction.
     3. Return structured data in the response.
     """
     if 'video' not in request.files:
         return jsonify({"error": "No video file provided"}), 400
     video_file = request.files['video']
+    print("VIDEO FILE NAME: ", video_file)
     try:
+        print("SAVING THE FILE TEMPO, ANIKET")
         # Step 1: Save video to a temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
             video_file.save(temp_video_file.name)
             print(f"Video file saved: {temp_video_file.name}")
             # Step 2: Extract audio from video using ffmpeg (waiting for completion)
             audio_path = extract_audio(temp_video_file.name)
+            print("AUDIO PATH FROM LINE 221, ANIKET", audio_path)
             if not audio_path:
                 return jsonify({"error": "Audio extraction failed"}), 500
+            print("STARTING TRANSCRIPTION, GOT THE .WAV AUDIO PATH THAT WAS STORED TEMPO, ANIKET")
             # Step 3: Transcribe the audio using Whisper AI (waiting for completion)
             transcription = transcribe_audio(audio_path)
+)
+            print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
             if not transcription:
                 return jsonify({"error": "Audio transcription failed"}), 500
+            print("GOT THE transcription")
+            print("Starting the GEMINI REQUEST TO STRUCTURE IT")
             # Step 4: Generate structured recipe information using Gemini API (waiting for completion)
             structured_data = query_gemini_api(transcription)
+            print("GOT THE STRUCTURED DATA", structured_data)
             # Step 5: Return the structured data
             return jsonify(structured_data)
     """
     Transcribe audio using Whisper AI.
     """
+    print("CAME IN THE transcribe audio folder")
     try:
         # Transcribe audio using Whisper AI
         print("Transcribing audio...")
         result = whisper_model.transcribe(audio_path)
+        print("THE RESULTS ARE", result)
         return result.get("text", "").strip()
     except Exception as e: