GoodML commited on
Commit
c37b36e
·
verified ·
1 Parent(s): a611d12

Create app.py

Browse files

Added flask source code for the API

Files changed (1) hide show
  1. app.py +153 -0
app.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import whisper
4
+ import requests
5
+ from flask import Flask, request, jsonify, send_file
6
+ import tempfile
7
+
8
+ app = Flask(__name__)
9
+
10
+ # Gemini API settings
11
+ from dotenv import load_dotenv
12
+ import requests
13
+ # Load the .env file
14
+ load_dotenv()
15
+
16
+ # Fetch the API key from the .env file
17
+ API_KEY = os.getenv("FIRST_API_KEY")
18
+
19
+ # Ensure the API key is loaded correctly
20
+ if not API_KEY:
21
+ raise ValueError("API Key not found. Make sure it is set in the .env file.")
22
+
23
+ GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
24
+ GEMINI_API_KEY = API_KEY
25
+
26
+ # Load Whisper AI model at startup
27
+ print("Loading Whisper AI model...")
28
+ whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
29
+ print("Whisper AI model loaded successfully.")
30
+
31
+ @app.route('/process-video', methods=['POST'])
32
+ def process_video():
33
+ """
34
+ Flask endpoint to process video:
35
+ 1. Extract audio and transcribe using Whisper AI.
36
+ 2. Send transcription to Gemini API for recipe information extraction.
37
+ 3. Return structured data in the response.
38
+ """
39
+ if 'video' not in request.files:
40
+ return jsonify({"error": "No video file provided"}), 400
41
+
42
+ video_file = request.files['video']
43
+
44
+ try:
45
+ # Save video to a temporary file
46
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
47
+ video_file.save(temp_video_file.name)
48
+ print(f"Video file saved: {temp_video_file.name}")
49
+
50
+ # Extract audio and transcribe using Whisper AI
51
+ transcription = transcribe_audio(temp_video_file.name)
52
+
53
+ if not transcription:
54
+ return jsonify({"error": "Audio transcription failed"}), 500
55
+
56
+ # Generate structured recipe information using Gemini API
57
+ structured_data = query_gemini_api(transcription)
58
+
59
+ return jsonify(structured_data)
60
+
61
+ except Exception as e:
62
+ return jsonify({"error": str(e)}), 500
63
+
64
+ finally:
65
+ # Clean up temporary files
66
+ if os.path.exists(temp_video_file.name):
67
+ os.remove(temp_video_file.name)
68
+
69
+
70
+ def transcribe_audio(video_path):
71
+ """
72
+ Extract audio from video file and transcribe using Whisper AI.
73
+ """
74
+ try:
75
+ # Extract audio using ffmpeg
76
+ audio_path = video_path.replace(".mp4", ".wav")
77
+ command = [
78
+ "ffmpeg",
79
+ "-i", video_path,
80
+ "-q:a", "0",
81
+ "-map", "a",
82
+ audio_path
83
+ ]
84
+ subprocess.run(command, check=True)
85
+ print(f"Audio extracted to: {audio_path}")
86
+
87
+ # Transcribe audio using Whisper AI
88
+ print("Transcribing audio...")
89
+ result = whisper_model.transcribe(audio_path)
90
+
91
+ # Clean up audio file after transcription
92
+ if os.path.exists(audio_path):
93
+ os.remove(audio_path)
94
+
95
+ return result.get("text", "").strip()
96
+
97
+ except Exception as e:
98
+ print(f"Error in transcription: {e}")
99
+ return None
100
+
101
+
102
+ def query_gemini_api(transcription):
103
+ """
104
+ Send transcription text to Gemini API and fetch structured recipe information.
105
+ """
106
+ try:
107
+ # Define the structured prompt
108
+ prompt = (
109
+ "Analyze the provided cooking video transcription and extract the following structured information:\n"
110
+ "1. Recipe Name: Identify the name of the dish being prepared.\n"
111
+ "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
112
+ "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
113
+ "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
114
+ "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
115
+ "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
116
+ "7. Serving size: In count of people or portion size.\n"
117
+ "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
118
+ "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
119
+ f"Text: {transcription}\n"
120
+ )
121
+
122
+ # Prepare the payload and headers
123
+ payload = {
124
+ "contents": [
125
+ {
126
+ "parts": [
127
+ {"text": prompt}
128
+ ]
129
+ }
130
+ ]
131
+ }
132
+ headers = {"Content-Type": "application/json"}
133
+
134
+ # Send request to Gemini API
135
+ print("Querying Gemini API...")
136
+ response = requests.post(
137
+ f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
138
+ json=payload,
139
+ headers=headers
140
+ )
141
+ response.raise_for_status()
142
+
143
+ # Extract and return the structured data
144
+ data = response.json()
145
+ return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
146
+
147
+ except requests.exceptions.RequestException as e:
148
+ print(f"Error querying Gemini API: {e}")
149
+ return {"error": str(e)}
150
+
151
+
152
+ if __name__ == '__main__':
153
+ app.run(debug=True)