Spaces:

GoodML
/

dishDecode

Running

App Files Files Community

dishDecode / app.py

GoodML

Update app.py

9dac3f4 verified 7 months ago

raw

history blame

17.3 kB

	# import os
	# import whisper
	# import requests
	# import asyncio
	# import aiohttp # For making async HTTP requests
	# from quart import Quart, request, jsonify, render_template
	# from dotenv import load_dotenv
	# import warnings
	# warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")

	# app = Quart(__name__)
	# print("APP IS RUNNING, ANIKET")

	# # Load the .env file
	# load_dotenv()

	# print("ENV LOADED, ANIKET")

	# # Fetch the API key from the .env file
	# API_KEY = os.getenv("FIRST_API_KEY")

	# # Ensure the API key is loaded correctly
	# if not API_KEY:
	# raise ValueError("API Key not found. Make sure it is set in the .env file.")

	# GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
	# GEMINI_API_KEY = API_KEY

	# # Load Whisper AI model at startup
	# print("Loading Whisper AI model..., ANIKET")
	# whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
	# print("Whisper AI model loaded successfully, ANIKET")


	# @app.route("/", methods=["GET"])
	# async def health_check():
	# return jsonify({"status": "success", "message": "API is running successfully!"}), 200


	# @app.route("/mbsa")
	# async def mbsa():
	# return await render_template("mbsa.html")


	# @app.route('/process-audio', methods=['POST'])
	# async def process_audio():
	# print("GOT THE PROCESS AUDIO REQUEST, ANIKET")

	# if 'audio' not in request.files:
	# return jsonify({"error": "No audio file provided"}), 400

	# audio_file = request.files['audio']
	# print("AUDIO FILE NAME: ", audio_file)

	# try:
	# print("STARTING TRANSCRIPTION, ANIKET")

	# # Step 1: Transcribe the uploaded audio file asynchronously
	# transcription = await transcribe_audio(audio_file)

	# print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)

	# if not transcription:
	# return jsonify({"error": "Audio transcription failed"}), 500

	# print("GOT THE transcription")

	# print("Starting the GEMINI REQUEST TO STRUCTURE IT")
	# # Step 2: Generate structured recipe information using Gemini API asynchronously
	# structured_data = await query_gemini_api(transcription)

	# print("GOT THE STRUCTURED DATA", structured_data)
	# # Step 3: Return the structured data
	# return jsonify(structured_data)

	# except Exception as e:
	# return jsonify({"error": str(e)}), 500


	# async def transcribe_audio(audio_file):
	# """
	# Transcribe audio using Whisper AI (async function).
	# """
	# print("CAME IN THE transcribe audio function")
	# try:
	# with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
	# audio_file.save(temp_audio_file.name)
	# print(f"Temporary audio file saved: {temp_audio_file.name}")

	# # Run Whisper transcription asynchronously
	# loop = asyncio.get_event_loop()
	# result = await loop.run_in_executor(None, whisper_model.transcribe, temp_audio_file.name)
	# print("THE RESULTS ARE", result)

	# return result.get("text", "").strip()

	# except Exception as e:
	# print(f"Error in transcription: {e}")
	# return None


	# async def query_gemini_api(transcription):
	# """
	# Send transcription text to Gemini API and fetch structured recipe information (async function).
	# """
	# try:
	# # Define the structured prompt
	# prompt = (
	# "Analyze the provided cooking video transcription and extract the following structured information:\n"
	# "1. Recipe Name: Identify the name of the dish being prepared.\n"
	# "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
	# "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
	# "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
	# "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
	# "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
	# "7. Serving size: In count of people or portion size.\n"
	# "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
	# "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
	# f"Text: {transcription}\n"
	# )

	# # Prepare the payload and headers
	# payload = {
	# "contents": [
	# {
	# "parts": [
	# {"text": prompt}
	# ]
	# }
	# ]
	# }
	# headers = {"Content-Type": "application/json"}

	# # Send request to Gemini API asynchronously
	# async with aiohttp.ClientSession() as session:
	# async with session.post(
	# f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
	# json=payload,
	# headers=headers,
	# timeout=60 # 60 seconds timeout for the request
	# ) as response:
	# response.raise_for_status() # Raise error if response code is not 200
	# data = await response.json()

	# return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")

	# except aiohttp.ClientError as e:
	# print(f"Error querying Gemini API: {e}")
	# return {"error": str(e)}


	# if __name__ == '__main__':
	# app.run(debug=True)





	# Above code is without polling and sleep
	import os
	import whisper
	import requests
	from flask import Flask, request, jsonify, render_template
	import tempfile
	import warnings
	warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")

	app = Flask(__name__)
	print("APP IS RUNNING, ANIKET")

	# Gemini API settings
	from dotenv import load_dotenv
	# Load the .env file
	load_dotenv()

	print("ENV LOADED, ANIKET")

	# Fetch the API key from the .env file
	API_KEY = os.getenv("FIRST_API_KEY")

	# Ensure the API key is loaded correctly
	if not API_KEY:
	raise ValueError("API Key not found. Make sure it is set in the .env file.")

	GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
	GEMINI_API_KEY = API_KEY


	# Load Whisper AI model at startup
	print("Loading Whisper AI model..., ANIKET")
	whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
	print("Whisper AI model loaded successfully, ANIKET")


	# Define the "/" endpoint for health check
	@app.route("/", methods=["GET"])
	def health_check():
	return jsonify({"status": "success", "message": "API is running successfully!"}), 200

	@app.route("/mbsa")
	def mbsa():
	return render_template("mbsa.html")

	@app.route('/process-audio', methods=['POST'])
	def process_audio():
	print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
	"""
	Flask endpoint to process audio:
	1. Transcribe provided audio file using Whisper AI.
	2. Send transcription to Gemini API for recipe information extraction.
	3. Return structured data in the response.
	"""

	if 'audio' not in request.files:
	return jsonify({"error": "No audio file provided"}), 400

	audio_file = request.files['audio']
	print("AUDIO FILE NAME: ", audio_file)

	try:
	print("STARTING TRANSCRIPTION, ANIKET")
	# Step 1: Transcribe the uploaded audio file directly
	audio_file = request.files['audio']
	transcription = transcribe_audio(audio_file)

	print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)

	if not transcription:
	return jsonify({"error": "Audio transcription failed"}), 500

	print("GOT THE transcription")

	print("Starting the GEMINI REQUEST TO STRUCTURE IT")
	# Step 2: Generate structured recipe information using Gemini API
	structured_data = query_gemini_api(transcription)

	print("GOT THE STRUCTURED DATA", structured_data)
	# Step 3: Return the structured data
	return jsonify(structured_data)

	except Exception as e:
	return jsonify({"error": str(e)}), 500

	def transcribe_audio(audio_path):
	"""
	Transcribe audio using Whisper AI.
	"""
	print("CAME IN THE transcribe audio function")
	try:
	# Transcribe audio using Whisper AI
	print("Transcribing audio...")
	result = whisper_model.transcribe(audio_path)
	print("THE RESULTS ARE", result)

	return result.get("text", "").strip()

	except Exception as e:
	print(f"Error in transcription: {e}")
	return None


	def query_gemini_api(transcription):
	"""
	Send transcription text to Gemini API and fetch structured recipe information.
	"""
	try:
	# Define the structured prompt
	prompt = (
	"Analyze the provided cooking video transcription and extract the following structured information:\n"
	"1. Recipe Name: Identify the name of the dish being prepared.\n"
	"2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
	"3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
	"4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
	"5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
	"6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
	"7. Serving size: In count of people or portion size.\n"
	"8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
	"9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
	f"Text: {transcription}\n"
	)

	# Prepare the payload and headers
	payload = {
	"contents": [
	{
	"parts": [
	{"text": prompt}
	]
	}
	]
	}
	headers = {"Content-Type": "application/json"}

	# Send request to Gemini API and wait for the response
	print("Querying Gemini API...")
	response = requests.post(
	f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
	json=payload,
	headers=headers,
	timeout=60 # 60 seconds timeout for the request
	)
	response.raise_for_status()

	# Extract and return the structured data
	data = response.json()
	return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")

	except requests.exceptions.RequestException as e:
	print(f"Error querying Gemini API: {e}")
	return {"error": str(e)}


	if __name__ == '__main__':
	app.run(debug=True)







	# import os
	# import subprocess
	# import whisper
	# import requests
	# import tempfile
	# import warnings
	# import threading
	# from flask import Flask, request, jsonify, send_file, render_template

	# from dotenv import load_dotenv
	# import requests




	# warnings.filterwarnings("ignore", category=UserWarning, module="whisper")


	# app = Flask(__name__)


	# # Gemini API settings
	# load_dotenv()
	# API_KEY = os.getenv("FIRST_API_KEY")

	# # Ensure the API key is loaded correctly
	# if not API_KEY:
	# raise ValueError("API Key not found. Make sure it is set in the .env file.")

	# GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
	# GEMINI_API_KEY = API_KEY

	# # Load Whisper AI model at startup
	# print("Loading Whisper AI model...")
	# whisper_model = whisper.load_model("base")
	# print("Whisper AI model loaded successfully.")

	# # Define the "/" endpoint for health check
	# @app.route("/", methods=["GET"])
	# def health_check():
	# return jsonify({"status": "success", "message": "API is running successfully!"}), 200


	# def process_video_in_background(video_file, temp_video_file_name):
	# """
	# This function is executed in a separate thread to handle the long-running
	# video processing tasks such as transcription and querying the Gemini API.
	# """
	# try:
	# transcription = transcribe_audio(temp_video_file_name)

	# if not transcription:
	# print("Audio transcription failed")
	# return

	# structured_data = query_gemini_api(transcription)

	# # Send structured data back or store it in a database, depending on your use case
	# print("Processing complete. Structured data:", structured_data)

	# except Exception as e:
	# print(f"Error processing video: {e}")

	# finally:
	# # Clean up temporary files
	# if os.path.exists(temp_video_file_name):
	# os.remove(temp_video_file_name)


	# @app.route('/process-video', methods=['POST'])
	# def process_video():
	# if 'video' not in request.files:
	# return jsonify({"error": "No video file provided"}), 400

	# video_file = request.files['video']

	# try:
	# # Save video to a temporary file
	# with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
	# video_file.save(temp_video_file.name)
	# print(f"Video file saved: {temp_video_file.name}")

	# # Start the video processing in a background thread
	# threading.Thread(target=process_video_in_background, args=(video_file, temp_video_file.name)).start()

	# return jsonify({"message": "Video is being processed in the background."}), 202

	# except Exception as e:
	# return jsonify({"error": str(e)}), 500


	# def transcribe_audio(video_path):
	# """
	# Transcribe audio directly from a video file using Whisper AI.
	# """
	# try:
	# print(f"Transcribing video: {video_path}")
	# result = whisper_model.transcribe(video_path)
	# return result['text']
	# except Exception as e:
	# print(f"Error in transcription: {e}")
	# return None


	# def query_gemini_api(transcription):
	# """
	# Send transcription text to Gemini API and fetch structured recipe information.
	# """
	# try:
	# # Define the structured prompt
	# prompt = (
	# "Analyze the provided cooking video transcription and extract the following structured information:\n"
	# "1. Recipe Name: Identify the name of the dish being prepared.\n"
	# "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
	# "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
	# "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
	# "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
	# "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
	# "7. Serving size: In count of people or portion size.\n"
	# "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
	# "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
	# f"Text: {transcription}\n"
	# )

	# payload = {
	# "contents": [
	# {"parts": [{"text": prompt}]}
	# ]
	# }
	# headers = {"Content-Type": "application/json"}

	# # Send request to Gemini API
	# response = requests.post(
	# f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
	# json=payload,
	# headers=headers
	# )
	# response.raise_for_status()

	# # Extract and return the structured data
	# data = response.json()
	# return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")

	# except requests.exceptions.RequestException as e:
	# print(f"Error querying Gemini API: {e}")
	# return {"error": str(e)}


	# if __name__ == '__main__':
	# app.run(debug=True)