dishDecode / app.py
GoodML's picture
Update app.py
9dac3f4 verified
raw
history blame
17.3 kB
# import os
# import whisper
# import requests
# import asyncio
# import aiohttp # For making async HTTP requests
# from quart import Quart, request, jsonify, render_template
# from dotenv import load_dotenv
# import warnings
# warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
# app = Quart(__name__)
# print("APP IS RUNNING, ANIKET")
# # Load the .env file
# load_dotenv()
# print("ENV LOADED, ANIKET")
# # Fetch the API key from the .env file
# API_KEY = os.getenv("FIRST_API_KEY")
# # Ensure the API key is loaded correctly
# if not API_KEY:
# raise ValueError("API Key not found. Make sure it is set in the .env file.")
# GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
# GEMINI_API_KEY = API_KEY
# # Load Whisper AI model at startup
# print("Loading Whisper AI model..., ANIKET")
# whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
# print("Whisper AI model loaded successfully, ANIKET")
# @app.route("/", methods=["GET"])
# async def health_check():
# return jsonify({"status": "success", "message": "API is running successfully!"}), 200
# @app.route("/mbsa")
# async def mbsa():
# return await render_template("mbsa.html")
# @app.route('/process-audio', methods=['POST'])
# async def process_audio():
# print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
# if 'audio' not in request.files:
# return jsonify({"error": "No audio file provided"}), 400
# audio_file = request.files['audio']
# print("AUDIO FILE NAME: ", audio_file)
# try:
# print("STARTING TRANSCRIPTION, ANIKET")
# # Step 1: Transcribe the uploaded audio file asynchronously
# transcription = await transcribe_audio(audio_file)
# print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
# if not transcription:
# return jsonify({"error": "Audio transcription failed"}), 500
# print("GOT THE transcription")
# print("Starting the GEMINI REQUEST TO STRUCTURE IT")
# # Step 2: Generate structured recipe information using Gemini API asynchronously
# structured_data = await query_gemini_api(transcription)
# print("GOT THE STRUCTURED DATA", structured_data)
# # Step 3: Return the structured data
# return jsonify(structured_data)
# except Exception as e:
# return jsonify({"error": str(e)}), 500
# async def transcribe_audio(audio_file):
# """
# Transcribe audio using Whisper AI (async function).
# """
# print("CAME IN THE transcribe audio function")
# try:
# with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
# audio_file.save(temp_audio_file.name)
# print(f"Temporary audio file saved: {temp_audio_file.name}")
# # Run Whisper transcription asynchronously
# loop = asyncio.get_event_loop()
# result = await loop.run_in_executor(None, whisper_model.transcribe, temp_audio_file.name)
# print("THE RESULTS ARE", result)
# return result.get("text", "").strip()
# except Exception as e:
# print(f"Error in transcription: {e}")
# return None
# async def query_gemini_api(transcription):
# """
# Send transcription text to Gemini API and fetch structured recipe information (async function).
# """
# try:
# # Define the structured prompt
# prompt = (
# "Analyze the provided cooking video transcription and extract the following structured information:\n"
# "1. Recipe Name: Identify the name of the dish being prepared.\n"
# "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
# "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
# "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
# "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
# "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
# "7. Serving size: In count of people or portion size.\n"
# "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
# "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
# f"Text: {transcription}\n"
# )
# # Prepare the payload and headers
# payload = {
# "contents": [
# {
# "parts": [
# {"text": prompt}
# ]
# }
# ]
# }
# headers = {"Content-Type": "application/json"}
# # Send request to Gemini API asynchronously
# async with aiohttp.ClientSession() as session:
# async with session.post(
# f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
# json=payload,
# headers=headers,
# timeout=60 # 60 seconds timeout for the request
# ) as response:
# response.raise_for_status() # Raise error if response code is not 200
# data = await response.json()
# return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
# except aiohttp.ClientError as e:
# print(f"Error querying Gemini API: {e}")
# return {"error": str(e)}
# if __name__ == '__main__':
# app.run(debug=True)
# Above code is without polling and sleep
import os
import whisper
import requests
from flask import Flask, request, jsonify, render_template
import tempfile
import warnings
warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
app = Flask(__name__)
print("APP IS RUNNING, ANIKET")
# Gemini API settings
from dotenv import load_dotenv
# Load the .env file
load_dotenv()
print("ENV LOADED, ANIKET")
# Fetch the API key from the .env file
API_KEY = os.getenv("FIRST_API_KEY")
# Ensure the API key is loaded correctly
if not API_KEY:
raise ValueError("API Key not found. Make sure it is set in the .env file.")
GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
GEMINI_API_KEY = API_KEY
# Load Whisper AI model at startup
print("Loading Whisper AI model..., ANIKET")
whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
print("Whisper AI model loaded successfully, ANIKET")
# Define the "/" endpoint for health check
@app.route("/", methods=["GET"])
def health_check():
return jsonify({"status": "success", "message": "API is running successfully!"}), 200
@app.route("/mbsa")
def mbsa():
return render_template("mbsa.html")
@app.route('/process-audio', methods=['POST'])
def process_audio():
print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
"""
Flask endpoint to process audio:
1. Transcribe provided audio file using Whisper AI.
2. Send transcription to Gemini API for recipe information extraction.
3. Return structured data in the response.
"""
if 'audio' not in request.files:
return jsonify({"error": "No audio file provided"}), 400
audio_file = request.files['audio']
print("AUDIO FILE NAME: ", audio_file)
try:
print("STARTING TRANSCRIPTION, ANIKET")
# Step 1: Transcribe the uploaded audio file directly
audio_file = request.files['audio']
transcription = transcribe_audio(audio_file)
print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
if not transcription:
return jsonify({"error": "Audio transcription failed"}), 500
print("GOT THE transcription")
print("Starting the GEMINI REQUEST TO STRUCTURE IT")
# Step 2: Generate structured recipe information using Gemini API
structured_data = query_gemini_api(transcription)
print("GOT THE STRUCTURED DATA", structured_data)
# Step 3: Return the structured data
return jsonify(structured_data)
except Exception as e:
return jsonify({"error": str(e)}), 500
def transcribe_audio(audio_path):
"""
Transcribe audio using Whisper AI.
"""
print("CAME IN THE transcribe audio function")
try:
# Transcribe audio using Whisper AI
print("Transcribing audio...")
result = whisper_model.transcribe(audio_path)
print("THE RESULTS ARE", result)
return result.get("text", "").strip()
except Exception as e:
print(f"Error in transcription: {e}")
return None
def query_gemini_api(transcription):
"""
Send transcription text to Gemini API and fetch structured recipe information.
"""
try:
# Define the structured prompt
prompt = (
"Analyze the provided cooking video transcription and extract the following structured information:\n"
"1. Recipe Name: Identify the name of the dish being prepared.\n"
"2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
"3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
"4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
"5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
"6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
"7. Serving size: In count of people or portion size.\n"
"8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
"9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
f"Text: {transcription}\n"
)
# Prepare the payload and headers
payload = {
"contents": [
{
"parts": [
{"text": prompt}
]
}
]
}
headers = {"Content-Type": "application/json"}
# Send request to Gemini API and wait for the response
print("Querying Gemini API...")
response = requests.post(
f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
json=payload,
headers=headers,
timeout=60 # 60 seconds timeout for the request
)
response.raise_for_status()
# Extract and return the structured data
data = response.json()
return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
except requests.exceptions.RequestException as e:
print(f"Error querying Gemini API: {e}")
return {"error": str(e)}
if __name__ == '__main__':
app.run(debug=True)
# import os
# import subprocess
# import whisper
# import requests
# import tempfile
# import warnings
# import threading
# from flask import Flask, request, jsonify, send_file, render_template
# from dotenv import load_dotenv
# import requests
# warnings.filterwarnings("ignore", category=UserWarning, module="whisper")
# app = Flask(__name__)
# # Gemini API settings
# load_dotenv()
# API_KEY = os.getenv("FIRST_API_KEY")
# # Ensure the API key is loaded correctly
# if not API_KEY:
# raise ValueError("API Key not found. Make sure it is set in the .env file.")
# GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
# GEMINI_API_KEY = API_KEY
# # Load Whisper AI model at startup
# print("Loading Whisper AI model...")
# whisper_model = whisper.load_model("base")
# print("Whisper AI model loaded successfully.")
# # Define the "/" endpoint for health check
# @app.route("/", methods=["GET"])
# def health_check():
# return jsonify({"status": "success", "message": "API is running successfully!"}), 200
# def process_video_in_background(video_file, temp_video_file_name):
# """
# This function is executed in a separate thread to handle the long-running
# video processing tasks such as transcription and querying the Gemini API.
# """
# try:
# transcription = transcribe_audio(temp_video_file_name)
# if not transcription:
# print("Audio transcription failed")
# return
# structured_data = query_gemini_api(transcription)
# # Send structured data back or store it in a database, depending on your use case
# print("Processing complete. Structured data:", structured_data)
# except Exception as e:
# print(f"Error processing video: {e}")
# finally:
# # Clean up temporary files
# if os.path.exists(temp_video_file_name):
# os.remove(temp_video_file_name)
# @app.route('/process-video', methods=['POST'])
# def process_video():
# if 'video' not in request.files:
# return jsonify({"error": "No video file provided"}), 400
# video_file = request.files['video']
# try:
# # Save video to a temporary file
# with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
# video_file.save(temp_video_file.name)
# print(f"Video file saved: {temp_video_file.name}")
# # Start the video processing in a background thread
# threading.Thread(target=process_video_in_background, args=(video_file, temp_video_file.name)).start()
# return jsonify({"message": "Video is being processed in the background."}), 202
# except Exception as e:
# return jsonify({"error": str(e)}), 500
# def transcribe_audio(video_path):
# """
# Transcribe audio directly from a video file using Whisper AI.
# """
# try:
# print(f"Transcribing video: {video_path}")
# result = whisper_model.transcribe(video_path)
# return result['text']
# except Exception as e:
# print(f"Error in transcription: {e}")
# return None
# def query_gemini_api(transcription):
# """
# Send transcription text to Gemini API and fetch structured recipe information.
# """
# try:
# # Define the structured prompt
# prompt = (
# "Analyze the provided cooking video transcription and extract the following structured information:\n"
# "1. Recipe Name: Identify the name of the dish being prepared.\n"
# "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
# "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
# "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
# "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
# "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
# "7. Serving size: In count of people or portion size.\n"
# "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
# "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
# f"Text: {transcription}\n"
# )
# payload = {
# "contents": [
# {"parts": [{"text": prompt}]}
# ]
# }
# headers = {"Content-Type": "application/json"}
# # Send request to Gemini API
# response = requests.post(
# f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
# json=payload,
# headers=headers
# )
# response.raise_for_status()
# # Extract and return the structured data
# data = response.json()
# return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
# except requests.exceptions.RequestException as e:
# print(f"Error querying Gemini API: {e}")
# return {"error": str(e)}
# if __name__ == '__main__':
# app.run(debug=True)