Spaces:

GoodML
/

dishDecode

Running

App Files Files Community

GoodML commited on Nov 25, 2024

Commit

3066087

verified ·

1 Parent(s): 35b7e36

INtroduced sleep times

Browse files

Files changed (1) hide show

app.py +190 -20

app.py CHANGED Viewed

@@ -1,24 +1,172 @@
 import os
-import subprocess
 import whisper
 import requests
 import tempfile
 import warnings
 import threading
-from flask import Flask, request, jsonify, send_file, render_template
 from dotenv import load_dotenv
-import requests
 warnings.filterwarnings("ignore", category=UserWarning, module="whisper")
 app = Flask(__name__)
 # Gemini API settings
 load_dotenv()
 API_KEY = os.getenv("FIRST_API_KEY")
@@ -41,7 +189,7 @@ def health_check():
     return jsonify({"status": "success", "message": "API is running successfully!"}), 200
-def process_video_in_background(video_file, temp_video_file_name):
     """
     This function is executed in a separate thread to handle the long-running
     video processing tasks such as transcription and querying the Gemini API.
@@ -50,16 +198,16 @@ def process_video_in_background(video_file, temp_video_file_name):
         transcription = transcribe_audio(temp_video_file_name)
         if not transcription:
-            print("Audio transcription failed")
             return
         structured_data = query_gemini_api(transcription)
-        # Send structured data back or store it in a database, depending on your use case
-        print("Processing complete. Structured data:", structured_data)
     except Exception as e:
-        print(f"Error processing video: {e}")
     finally:
         # Clean up temporary files
@@ -73,6 +221,7 @@ def process_video():
         return jsonify({"error": "No video file provided"}), 400
     video_file = request.files['video']
     try:
         # Save video to a temporary file
@@ -81,9 +230,18 @@ def process_video():
             print(f"Video file saved: {temp_video_file.name}")
             # Start the video processing in a background thread
-            threading.Thread(target=process_video_in_background, args=(video_file, temp_video_file.name)).start()
-            return jsonify({"message": "Video is being processed in the background."}), 202
     except Exception as e:
         return jsonify({"error": str(e)}), 500
@@ -137,9 +295,21 @@ def query_gemini_api(transcription):
         )
         response.raise_for_status()
-        # Extract and return the structured data
-        data = response.json()
-        return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
     except requests.exceptions.RequestException as e:
         print(f"Error querying Gemini API: {e}")
@@ -147,4 +317,4 @@ def query_gemini_api(transcription):
 if __name__ == '__main__':
-    app.run(debug=True)

+# import os
+# import subprocess
+# import whisper
+# import requests
+# import tempfile
+# import warnings
+# import threading
+# from flask import Flask, request, jsonify, send_file, render_template
+# from dotenv import load_dotenv
+# import requests
+# warnings.filterwarnings("ignore", category=UserWarning, module="whisper")
+# app = Flask(__name__)
+# # Gemini API settings
+# load_dotenv()
+# API_KEY = os.getenv("FIRST_API_KEY")
+# # Ensure the API key is loaded correctly
+# if not API_KEY:
+#     raise ValueError("API Key not found. Make sure it is set in the .env file.")
+# GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
+# GEMINI_API_KEY = API_KEY
+# # Load Whisper AI model at startup
+# print("Loading Whisper AI model...")
+# whisper_model = whisper.load_model("base")
+# print("Whisper AI model loaded successfully.")
+# # Define the "/" endpoint for health check
+# @app.route("/", methods=["GET"])
+# def health_check():
+#     return jsonify({"status": "success", "message": "API is running successfully!"}), 200
+# def process_video_in_background(video_file, temp_video_file_name):
+#     """
+#     This function is executed in a separate thread to handle the long-running
+#     video processing tasks such as transcription and querying the Gemini API.
+#     """
+#     try:
+#         transcription = transcribe_audio(temp_video_file_name)
+#         if not transcription:
+#             print("Audio transcription failed")
+#             return
+#         structured_data = query_gemini_api(transcription)
+#         # Send structured data back or store it in a database, depending on your use case
+#         print("Processing complete. Structured data:", structured_data)
+#     except Exception as e:
+#         print(f"Error processing video: {e}")
+#     finally:
+#         # Clean up temporary files
+#         if os.path.exists(temp_video_file_name):
+#             os.remove(temp_video_file_name)
+# @app.route('/process-video', methods=['POST'])
+# def process_video():
+#     if 'video' not in request.files:
+#         return jsonify({"error": "No video file provided"}), 400
+#     video_file = request.files['video']
+#     try:
+#         # Save video to a temporary file
+#         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
+#             video_file.save(temp_video_file.name)
+#             print(f"Video file saved: {temp_video_file.name}")
+#             # Start the video processing in a background thread
+#             threading.Thread(target=process_video_in_background, args=(video_file, temp_video_file.name)).start()
+#             return jsonify({"message": "Video is being processed in the background."}), 202
+#     except Exception as e:
+#         return jsonify({"error": str(e)}), 500
+# def transcribe_audio(video_path):
+#     """
+#     Transcribe audio directly from a video file using Whisper AI.
+#     """
+#     try:
+#         print(f"Transcribing video: {video_path}")
+#         result = whisper_model.transcribe(video_path)
+#         return result['text']
+#     except Exception as e:
+#         print(f"Error in transcription: {e}")
+#         return None
+# def query_gemini_api(transcription):
+#     """
+#     Send transcription text to Gemini API and fetch structured recipe information.
+#     """
+#     try:
+#         # Define the structured prompt
+#         prompt = (
+#             "Analyze the provided cooking video transcription and extract the following structured information:\n"
+#             "1. Recipe Name: Identify the name of the dish being prepared.\n"
+#             "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
+#             "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
+#             "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
+#             "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
+#             "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
+#             "7. Serving size: In count of people or portion size.\n"
+#             "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
+#             "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
+#             f"Text: {transcription}\n"
+#         )
+#         payload = {
+#             "contents": [
+#                 {"parts": [{"text": prompt}]}
+#             ]
+#         }
+#         headers = {"Content-Type": "application/json"}
+#         # Send request to Gemini API
+#         response = requests.post(
+#             f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
+#             json=payload,
+#             headers=headers
+#         )
+#         response.raise_for_status()
+#         # Extract and return the structured data
+#         data = response.json()
+#         return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
+#     except requests.exceptions.RequestException as e:
+#         print(f"Error querying Gemini API: {e}")
+#         return {"error": str(e)}
+# if __name__ == '__main__':
+#     app.run(debug=True)
+# Above code is without polling and sleep
+# Below is the latest code
 import os
 import whisper
 import requests
 import tempfile
 import warnings
 import threading
+import time
+from flask import Flask, request, jsonify
 from dotenv import load_dotenv
 warnings.filterwarnings("ignore", category=UserWarning, module="whisper")
 app = Flask(__name__)
 # Gemini API settings
 load_dotenv()
 API_KEY = os.getenv("FIRST_API_KEY")
     return jsonify({"status": "success", "message": "API is running successfully!"}), 200
+def process_video_in_background(video_file, temp_video_file_name, result_container):
     """
     This function is executed in a separate thread to handle the long-running
     video processing tasks such as transcription and querying the Gemini API.
         transcription = transcribe_audio(temp_video_file_name)
         if not transcription:
+            result_container["error"] = "Audio transcription failed"
             return
         structured_data = query_gemini_api(transcription)
+        # Save structured data to the result container to return later
+        result_container["data"] = structured_data
     except Exception as e:
+        result_container["error"] = f"Error processing video: {e}"
     finally:
         # Clean up temporary files
         return jsonify({"error": "No video file provided"}), 400
     video_file = request.files['video']
+    result_container = {}
     try:
         # Save video to a temporary file
             print(f"Video file saved: {temp_video_file.name}")
             # Start the video processing in a background thread
+            threading.Thread(target=process_video_in_background, args=(video_file, temp_video_file.name, result_container)).start()
+            # Poll every 5 seconds to check if the result is available
+            while "data" not in result_container and "error" not in result_container:
+                print("Waiting for processing to complete...")
+                time.sleep(5)  # Sleep for 5 seconds before checking again
+            # Check for the result
+            if "error" in result_container:
+                return jsonify({"error": result_container["error"]}), 500
+            else:
+                return jsonify({"message": "Processing complete", "data": result_container["data"]}), 200
     except Exception as e:
         return jsonify({"error": str(e)}), 500
         )
         response.raise_for_status()
+        # Polling for response (in case Gemini takes time to process)
+        polling_wait_time = 5  # Time to wait between polling attempts
+        polling_max_retries = 60  # Maximum number of retries
+        for attempt in range(polling_max_retries):
+            print(f"Attempt {attempt + 1} to fetch Gemini API response...")
+            response_data = response.json()
+            # Check if the response is ready
+            if "candidates" in response_data and len(response_data["candidates"]) > 0:
+                return response_data["candidates"][0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
+            time.sleep(polling_wait_time)  # Wait before trying again
+        return "Gemini API response not ready after multiple attempts."
     except requests.exceptions.RequestException as e:
         print(f"Error querying Gemini API: {e}")
 if __name__ == '__main__':
+    app.run(debug=True)