Spaces:

GoodML
/

dishDecode

Running

App Files Files Community

GoodML commited on Nov 25, 2024

Commit

58e0366

verified ·

1 Parent(s): 44f1555

changed flask to quart

Browse files

Files changed (1) hide show

app.py +321 -157

app.py CHANGED Viewed

@@ -1,3 +1,324 @@
 # import os
 # import subprocess
 # import whisper
@@ -149,160 +470,3 @@
 # if __name__ == '__main__':
 #     app.run(debug=True)
-# Above code is without polling and sleep
-import os
-import whisper
-import requests
-from flask import Flask, request, jsonify, render_template
-import tempfile
-import warnings
-warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
-app = Flask(__name__)
-print("APP IS RUNNING, ANIKET")
-# Gemini API settings
-from dotenv import load_dotenv
-# Load the .env file
-load_dotenv()
-print("ENV LOADED, ANIKET")
-# Fetch the API key from the .env file
-API_KEY = os.getenv("FIRST_API_KEY")
-# Ensure the API key is loaded correctly
-if not API_KEY:
-    raise ValueError("API Key not found. Make sure it is set in the .env file.")
-GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
-GEMINI_API_KEY = API_KEY
-# Load Whisper AI model at startup
-print("Loading Whisper AI model..., ANIKET")
-whisper_model = whisper.load_model("base")  # Choose model size: tiny, base, small, medium, large
-print("Whisper AI model loaded successfully, ANIKET")
-# Define the "/" endpoint for health check
-@app.route("/", methods=["GET"])
-def health_check():
-    return jsonify({"status": "success", "message": "API is running successfully!"}), 200
-@app.route("/mbsa")
-def mbsa():
-    return render_template("mbsa.html")
-@app.route('/process-audio', methods=['POST'])
-def process_audio():
-    print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
-    """
-    Flask endpoint to process audio:
-    1. Transcribe provided audio file using Whisper AI.
-    2. Send transcription to Gemini API for recipe information extraction.
-    3. Return structured data in the response.
-    """
-    if 'audio' not in request.files:
-        return jsonify({"error": "No audio file provided"}), 400
-    audio_file = request.files['audio']
-    print("AUDIO FILE NAME: ", audio_file)
-    try:
-        print("STARTING TRANSCRIPTION, ANIKET")
-        # Step 1: Transcribe the uploaded audio file directly
-        audio_file = request.files['audio']
-        transcription = transcribe_audio(audio_file)
-        print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
-        if not transcription:
-            return jsonify({"error": "Audio transcription failed"}), 500
-        print("GOT THE transcription")
-        print("Starting the GEMINI REQUEST TO STRUCTURE IT")
-        # Step 2: Generate structured recipe information using Gemini API
-        structured_data = query_gemini_api(transcription)
-        print("GOT THE STRUCTURED DATA", structured_data)
-        # Step 3: Return the structured data
-        return jsonify(structured_data)
-    except Exception as e:
-        return jsonify({"error": str(e)}), 500
-def transcribe_audio(audio_path):
-    """
-    Transcribe audio using Whisper AI.
-    """
-    print("CAME IN THE transcribe audio function")
-    try:
-        # Transcribe audio using Whisper AI
-        print("Transcribing audio...")
-        result = whisper_model.transcribe(audio_path)
-        print("THE RESULTS ARE", result)
-        return result.get("text", "").strip()
-    except Exception as e:
-        print(f"Error in transcription: {e}")
-        return None
-def query_gemini_api(transcription):
-    """
-    Send transcription text to Gemini API and fetch structured recipe information.
-    """
-    try:
-        # Define the structured prompt
-        prompt = (
-            "Analyze the provided cooking video transcription and extract the following structured information:\n"
-            "1. Recipe Name: Identify the name of the dish being prepared.\n"
-            "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
-            "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
-            "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
-            "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
-            "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
-            "7. Serving size: In count of people or portion size.\n"
-            "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
-            "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
-            f"Text: {transcription}\n"
-        )
-        # Prepare the payload and headers
-        payload = {
-            "contents": [
-                {
-                    "parts": [
-                        {"text": prompt}
-                    ]
-                }
-            ]
-        }
-        headers = {"Content-Type": "application/json"}
-        # Send request to Gemini API and wait for the response
-        print("Querying Gemini API...")
-        response = requests.post(
-            f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
-            json=payload,
-            headers=headers,
-            timeout=60  # 60 seconds timeout for the request
-        )
-        response.raise_for_status()
-        # Extract and return the structured data
-        data = response.json()
-        return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
-    except requests.exceptions.RequestException as e:
-        print(f"Error querying Gemini API: {e}")
-        return {"error": str(e)}
-if __name__ == '__main__':
-    app.run(debug=True)

+import os
+import whisper
+import requests
+import asyncio
+import aiohttp  # For making async HTTP requests
+from quart import Quart, request, jsonify, render_template
+from dotenv import load_dotenv
+import warnings
+warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
+app = Quart(__name__)
+print("APP IS RUNNING, ANIKET")
+# Load the .env file
+load_dotenv()
+print("ENV LOADED, ANIKET")
+# Fetch the API key from the .env file
+API_KEY = os.getenv("FIRST_API_KEY")
+# Ensure the API key is loaded correctly
+if not API_KEY:
+    raise ValueError("API Key not found. Make sure it is set in the .env file.")
+GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
+GEMINI_API_KEY = API_KEY
+# Load Whisper AI model at startup
+print("Loading Whisper AI model..., ANIKET")
+whisper_model = whisper.load_model("base")  # Choose model size: tiny, base, small, medium, large
+print("Whisper AI model loaded successfully, ANIKET")
+@app.route("/", methods=["GET"])
+async def health_check():
+    return jsonify({"status": "success", "message": "API is running successfully!"}), 200
+@app.route("/mbsa")
+async def mbsa():
+    return await render_template("mbsa.html")
+@app.route('/process-audio', methods=['POST'])
+async def process_audio():
+    print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
+    if 'audio' not in request.files:
+        return jsonify({"error": "No audio file provided"}), 400
+    audio_file = request.files['audio']
+    print("AUDIO FILE NAME: ", audio_file)
+    try:
+        print("STARTING TRANSCRIPTION, ANIKET")
+        # Step 1: Transcribe the uploaded audio file asynchronously
+        transcription = await transcribe_audio(audio_file)
+        print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
+        if not transcription:
+            return jsonify({"error": "Audio transcription failed"}), 500
+        print("GOT THE transcription")
+        print("Starting the GEMINI REQUEST TO STRUCTURE IT")
+        # Step 2: Generate structured recipe information using Gemini API asynchronously
+        structured_data = await query_gemini_api(transcription)
+        print("GOT THE STRUCTURED DATA", structured_data)
+        # Step 3: Return the structured data
+        return jsonify(structured_data)
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+async def transcribe_audio(audio_file):
+    """
+    Transcribe audio using Whisper AI (async function).
+    """
+    print("CAME IN THE transcribe audio function")
+    try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
+            audio_file.save(temp_audio_file.name)
+            print(f"Temporary audio file saved: {temp_audio_file.name}")
+            # Run Whisper transcription asynchronously
+            loop = asyncio.get_event_loop()
+            result = await loop.run_in_executor(None, whisper_model.transcribe, temp_audio_file.name)
+            print("THE RESULTS ARE", result)
+        return result.get("text", "").strip()
+    except Exception as e:
+        print(f"Error in transcription: {e}")
+        return None
+async def query_gemini_api(transcription):
+    """
+    Send transcription text to Gemini API and fetch structured recipe information (async function).
+    """
+    try:
+        # Define the structured prompt
+        prompt = (
+            "Analyze the provided cooking video transcription and extract the following structured information:\n"
+            "1. Recipe Name: Identify the name of the dish being prepared.\n"
+            "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
+            "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
+            "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
+            "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
+            "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
+            "7. Serving size: In count of people or portion size.\n"
+            "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
+            "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
+            f"Text: {transcription}\n"
+        )
+        # Prepare the payload and headers
+        payload = {
+            "contents": [
+                {
+                    "parts": [
+                        {"text": prompt}
+                    ]
+                }
+            ]
+        }
+        headers = {"Content-Type": "application/json"}
+        # Send request to Gemini API asynchronously
+        async with aiohttp.ClientSession() as session:
+            async with session.post(
+                f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
+                json=payload,
+                headers=headers,
+                timeout=60  # 60 seconds timeout for the request
+            ) as response:
+                response.raise_for_status()  # Raise error if response code is not 200
+                data = await response.json()
+        return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
+    except aiohttp.ClientError as e:
+        print(f"Error querying Gemini API: {e}")
+        return {"error": str(e)}
+if __name__ == '__main__':
+    app.run(debug=True)
+# # Above code is without polling and sleep
+# import os
+# import whisper
+# import requests
+# from flask import Flask, request, jsonify, render_template
+# import tempfile
+# import warnings
+# warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
+# app = Flask(__name__)
+# print("APP IS RUNNING, ANIKET")
+# # Gemini API settings
+# from dotenv import load_dotenv
+# # Load the .env file
+# load_dotenv()
+# print("ENV LOADED, ANIKET")
+# # Fetch the API key from the .env file
+# API_KEY = os.getenv("FIRST_API_KEY")
+# # Ensure the API key is loaded correctly
+# if not API_KEY:
+#     raise ValueError("API Key not found. Make sure it is set in the .env file.")
+# GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
+# GEMINI_API_KEY = API_KEY
+# # Load Whisper AI model at startup
+# print("Loading Whisper AI model..., ANIKET")
+# whisper_model = whisper.load_model("base")  # Choose model size: tiny, base, small, medium, large
+# print("Whisper AI model loaded successfully, ANIKET")
+# # Define the "/" endpoint for health check
+# @app.route("/", methods=["GET"])
+# def health_check():
+#     return jsonify({"status": "success", "message": "API is running successfully!"}), 200
+# @app.route("/mbsa")
+# def mbsa():
+#     return render_template("mbsa.html")
+# @app.route('/process-audio', methods=['POST'])
+# def process_audio():
+#     print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
+#     """
+#     Flask endpoint to process audio:
+#     1. Transcribe provided audio file using Whisper AI.
+#     2. Send transcription to Gemini API for recipe information extraction.
+#     3. Return structured data in the response.
+#     """
+#     if 'audio' not in request.files:
+#         return jsonify({"error": "No audio file provided"}), 400
+#     audio_file = request.files['audio']
+#     print("AUDIO FILE NAME: ", audio_file)
+#     try:
+#         print("STARTING TRANSCRIPTION, ANIKET")
+#         # Step 1: Transcribe the uploaded audio file directly
+#         audio_file = request.files['audio']
+#         transcription = transcribe_audio(audio_file)
+#         print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
+#         if not transcription:
+#             return jsonify({"error": "Audio transcription failed"}), 500
+#         print("GOT THE transcription")
+#         print("Starting the GEMINI REQUEST TO STRUCTURE IT")
+#         # Step 2: Generate structured recipe information using Gemini API
+#         structured_data = query_gemini_api(transcription)
+#         print("GOT THE STRUCTURED DATA", structured_data)
+#         # Step 3: Return the structured data
+#         return jsonify(structured_data)
+#     except Exception as e:
+#         return jsonify({"error": str(e)}), 500
+# def transcribe_audio(audio_path):
+#     """
+#     Transcribe audio using Whisper AI.
+#     """
+#     print("CAME IN THE transcribe audio function")
+#     try:
+#         # Transcribe audio using Whisper AI
+#         print("Transcribing audio...")
+#         result = whisper_model.transcribe(audio_path)
+#         print("THE RESULTS ARE", result)
+#         return result.get("text", "").strip()
+#     except Exception as e:
+#         print(f"Error in transcription: {e}")
+#         return None
+# def query_gemini_api(transcription):
+#     """
+#     Send transcription text to Gemini API and fetch structured recipe information.
+#     """
+#     try:
+#         # Define the structured prompt
+#         prompt = (
+#             "Analyze the provided cooking video transcription and extract the following structured information:\n"
+#             "1. Recipe Name: Identify the name of the dish being prepared.\n"
+#             "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
+#             "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
+#             "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
+#             "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
+#             "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
+#             "7. Serving size: In count of people or portion size.\n"
+#             "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
+#             "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
+#             f"Text: {transcription}\n"
+#         )
+#         # Prepare the payload and headers
+#         payload = {
+#             "contents": [
+#                 {
+#                     "parts": [
+#                         {"text": prompt}
+#                     ]
+#                 }
+#             ]
+#         }
+#         headers = {"Content-Type": "application/json"}
+#         # Send request to Gemini API and wait for the response
+#         print("Querying Gemini API...")
+#         response = requests.post(
+#             f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
+#             json=payload,
+#             headers=headers,
+#             timeout=60  # 60 seconds timeout for the request
+#         )
+#         response.raise_for_status()
+#         # Extract and return the structured data
+#         data = response.json()
+#         return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
+#     except requests.exceptions.RequestException as e:
+#         print(f"Error querying Gemini API: {e}")
+#         return {"error": str(e)}
+# if __name__ == '__main__':
+#     app.run(debug=True)
 # import os
 # import subprocess
 # import whisper
 # if __name__ == '__main__':
 #     app.run(debug=True)