GoodML commited on
Commit
9dac3f4
·
verified ·
1 Parent(s): 04d660b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +209 -209
app.py CHANGED
@@ -1,175 +1,16 @@
1
- import os
2
- import whisper
3
- import requests
4
- import asyncio
5
- import aiohttp # For making async HTTP requests
6
- from quart import Quart, request, jsonify, render_template
7
- from dotenv import load_dotenv
8
- import warnings
9
- warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
10
-
11
- app = Quart(__name__)
12
- print("APP IS RUNNING, ANIKET")
13
-
14
- # Load the .env file
15
- load_dotenv()
16
-
17
- print("ENV LOADED, ANIKET")
18
-
19
- # Fetch the API key from the .env file
20
- API_KEY = os.getenv("FIRST_API_KEY")
21
-
22
- # Ensure the API key is loaded correctly
23
- if not API_KEY:
24
- raise ValueError("API Key not found. Make sure it is set in the .env file.")
25
-
26
- GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
27
- GEMINI_API_KEY = API_KEY
28
-
29
- # Load Whisper AI model at startup
30
- print("Loading Whisper AI model..., ANIKET")
31
- whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
32
- print("Whisper AI model loaded successfully, ANIKET")
33
-
34
-
35
- @app.route("/", methods=["GET"])
36
- async def health_check():
37
- return jsonify({"status": "success", "message": "API is running successfully!"}), 200
38
-
39
-
40
- @app.route("/mbsa")
41
- async def mbsa():
42
- return await render_template("mbsa.html")
43
-
44
-
45
- @app.route('/process-audio', methods=['POST'])
46
- async def process_audio():
47
- print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
48
-
49
- if 'audio' not in request.files:
50
- return jsonify({"error": "No audio file provided"}), 400
51
-
52
- audio_file = request.files['audio']
53
- print("AUDIO FILE NAME: ", audio_file)
54
-
55
- try:
56
- print("STARTING TRANSCRIPTION, ANIKET")
57
-
58
- # Step 1: Transcribe the uploaded audio file asynchronously
59
- transcription = await transcribe_audio(audio_file)
60
-
61
- print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
62
-
63
- if not transcription:
64
- return jsonify({"error": "Audio transcription failed"}), 500
65
-
66
- print("GOT THE transcription")
67
-
68
- print("Starting the GEMINI REQUEST TO STRUCTURE IT")
69
- # Step 2: Generate structured recipe information using Gemini API asynchronously
70
- structured_data = await query_gemini_api(transcription)
71
-
72
- print("GOT THE STRUCTURED DATA", structured_data)
73
- # Step 3: Return the structured data
74
- return jsonify(structured_data)
75
-
76
- except Exception as e:
77
- return jsonify({"error": str(e)}), 500
78
-
79
-
80
- async def transcribe_audio(audio_file):
81
- """
82
- Transcribe audio using Whisper AI (async function).
83
- """
84
- print("CAME IN THE transcribe audio function")
85
- try:
86
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
87
- audio_file.save(temp_audio_file.name)
88
- print(f"Temporary audio file saved: {temp_audio_file.name}")
89
-
90
- # Run Whisper transcription asynchronously
91
- loop = asyncio.get_event_loop()
92
- result = await loop.run_in_executor(None, whisper_model.transcribe, temp_audio_file.name)
93
- print("THE RESULTS ARE", result)
94
-
95
- return result.get("text", "").strip()
96
-
97
- except Exception as e:
98
- print(f"Error in transcription: {e}")
99
- return None
100
-
101
-
102
- async def query_gemini_api(transcription):
103
- """
104
- Send transcription text to Gemini API and fetch structured recipe information (async function).
105
- """
106
- try:
107
- # Define the structured prompt
108
- prompt = (
109
- "Analyze the provided cooking video transcription and extract the following structured information:\n"
110
- "1. Recipe Name: Identify the name of the dish being prepared.\n"
111
- "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
112
- "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
113
- "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
114
- "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
115
- "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
116
- "7. Serving size: In count of people or portion size.\n"
117
- "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
118
- "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
119
- f"Text: {transcription}\n"
120
- )
121
-
122
- # Prepare the payload and headers
123
- payload = {
124
- "contents": [
125
- {
126
- "parts": [
127
- {"text": prompt}
128
- ]
129
- }
130
- ]
131
- }
132
- headers = {"Content-Type": "application/json"}
133
-
134
- # Send request to Gemini API asynchronously
135
- async with aiohttp.ClientSession() as session:
136
- async with session.post(
137
- f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
138
- json=payload,
139
- headers=headers,
140
- timeout=60 # 60 seconds timeout for the request
141
- ) as response:
142
- response.raise_for_status() # Raise error if response code is not 200
143
- data = await response.json()
144
-
145
- return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
146
-
147
- except aiohttp.ClientError as e:
148
- print(f"Error querying Gemini API: {e}")
149
- return {"error": str(e)}
150
-
151
-
152
- if __name__ == '__main__':
153
- app.run(debug=True)
154
-
155
-
156
-
157
-
158
-
159
- # # Above code is without polling and sleep
160
  # import os
161
  # import whisper
162
  # import requests
163
- # from flask import Flask, request, jsonify, render_template
164
- # import tempfile
 
 
165
  # import warnings
166
  # warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
167
 
168
- # app = Flask(__name__)
169
  # print("APP IS RUNNING, ANIKET")
170
 
171
- # # Gemini API settings
172
- # from dotenv import load_dotenv
173
  # # Load the .env file
174
  # load_dotenv()
175
 
@@ -185,73 +26,72 @@ if __name__ == '__main__':
185
  # GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
186
  # GEMINI_API_KEY = API_KEY
187
 
188
-
189
  # # Load Whisper AI model at startup
190
  # print("Loading Whisper AI model..., ANIKET")
191
  # whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
192
  # print("Whisper AI model loaded successfully, ANIKET")
193
 
194
 
195
- # # Define the "/" endpoint for health check
196
  # @app.route("/", methods=["GET"])
197
- # def health_check():
198
  # return jsonify({"status": "success", "message": "API is running successfully!"}), 200
199
 
 
200
  # @app.route("/mbsa")
201
- # def mbsa():
202
- # return render_template("mbsa.html")
 
203
 
204
  # @app.route('/process-audio', methods=['POST'])
205
- # def process_audio():
206
  # print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
207
- # """
208
- # Flask endpoint to process audio:
209
- # 1. Transcribe provided audio file using Whisper AI.
210
- # 2. Send transcription to Gemini API for recipe information extraction.
211
- # 3. Return structured data in the response.
212
- # """
213
 
214
  # if 'audio' not in request.files:
215
  # return jsonify({"error": "No audio file provided"}), 400
216
 
217
  # audio_file = request.files['audio']
218
  # print("AUDIO FILE NAME: ", audio_file)
219
-
220
  # try:
221
  # print("STARTING TRANSCRIPTION, ANIKET")
222
- # # Step 1: Transcribe the uploaded audio file directly
223
- # audio_file = request.files['audio']
224
- # transcription = transcribe_audio(audio_file)
225
-
226
  # print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
227
-
228
  # if not transcription:
229
  # return jsonify({"error": "Audio transcription failed"}), 500
230
-
231
  # print("GOT THE transcription")
232
-
233
  # print("Starting the GEMINI REQUEST TO STRUCTURE IT")
234
- # # Step 2: Generate structured recipe information using Gemini API
235
- # structured_data = query_gemini_api(transcription)
236
-
237
  # print("GOT THE STRUCTURED DATA", structured_data)
238
  # # Step 3: Return the structured data
239
  # return jsonify(structured_data)
240
-
241
  # except Exception as e:
242
  # return jsonify({"error": str(e)}), 500
243
 
244
- # def transcribe_audio(audio_path):
 
245
  # """
246
- # Transcribe audio using Whisper AI.
247
  # """
248
  # print("CAME IN THE transcribe audio function")
249
  # try:
250
- # # Transcribe audio using Whisper AI
251
- # print("Transcribing audio...")
252
- # result = whisper_model.transcribe(audio_path)
253
- # print("THE RESULTS ARE", result)
254
-
 
 
 
 
255
  # return result.get("text", "").strip()
256
 
257
  # except Exception as e:
@@ -259,9 +99,9 @@ if __name__ == '__main__':
259
  # return None
260
 
261
 
262
- # def query_gemini_api(transcription):
263
  # """
264
- # Send transcription text to Gemini API and fetch structured recipe information.
265
  # """
266
  # try:
267
  # # Define the structured prompt
@@ -291,21 +131,20 @@ if __name__ == '__main__':
291
  # }
292
  # headers = {"Content-Type": "application/json"}
293
 
294
- # # Send request to Gemini API and wait for the response
295
- # print("Querying Gemini API...")
296
- # response = requests.post(
297
- # f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
298
- # json=payload,
299
- # headers=headers,
300
- # timeout=60 # 60 seconds timeout for the request
301
- # )
302
- # response.raise_for_status()
 
303
 
304
- # # Extract and return the structured data
305
- # data = response.json()
306
  # return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
307
 
308
- # except requests.exceptions.RequestException as e:
309
  # print(f"Error querying Gemini API: {e}")
310
  # return {"error": str(e)}
311
 
@@ -317,6 +156,167 @@ if __name__ == '__main__':
317
 
318
 
319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
 
321
 
322
  # import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # import os
2
  # import whisper
3
  # import requests
4
+ # import asyncio
5
+ # import aiohttp # For making async HTTP requests
6
+ # from quart import Quart, request, jsonify, render_template
7
+ # from dotenv import load_dotenv
8
  # import warnings
9
  # warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
10
 
11
+ # app = Quart(__name__)
12
  # print("APP IS RUNNING, ANIKET")
13
 
 
 
14
  # # Load the .env file
15
  # load_dotenv()
16
 
 
26
  # GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
27
  # GEMINI_API_KEY = API_KEY
28
 
 
29
  # # Load Whisper AI model at startup
30
  # print("Loading Whisper AI model..., ANIKET")
31
  # whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
32
  # print("Whisper AI model loaded successfully, ANIKET")
33
 
34
 
 
35
  # @app.route("/", methods=["GET"])
36
+ # async def health_check():
37
  # return jsonify({"status": "success", "message": "API is running successfully!"}), 200
38
 
39
+
40
  # @app.route("/mbsa")
41
+ # async def mbsa():
42
+ # return await render_template("mbsa.html")
43
+
44
 
45
  # @app.route('/process-audio', methods=['POST'])
46
+ # async def process_audio():
47
  # print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
 
 
 
 
 
 
48
 
49
  # if 'audio' not in request.files:
50
  # return jsonify({"error": "No audio file provided"}), 400
51
 
52
  # audio_file = request.files['audio']
53
  # print("AUDIO FILE NAME: ", audio_file)
54
+
55
  # try:
56
  # print("STARTING TRANSCRIPTION, ANIKET")
57
+
58
+ # # Step 1: Transcribe the uploaded audio file asynchronously
59
+ # transcription = await transcribe_audio(audio_file)
60
+
61
  # print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
62
+
63
  # if not transcription:
64
  # return jsonify({"error": "Audio transcription failed"}), 500
65
+
66
  # print("GOT THE transcription")
67
+
68
  # print("Starting the GEMINI REQUEST TO STRUCTURE IT")
69
+ # # Step 2: Generate structured recipe information using Gemini API asynchronously
70
+ # structured_data = await query_gemini_api(transcription)
71
+
72
  # print("GOT THE STRUCTURED DATA", structured_data)
73
  # # Step 3: Return the structured data
74
  # return jsonify(structured_data)
75
+
76
  # except Exception as e:
77
  # return jsonify({"error": str(e)}), 500
78
 
79
+
80
+ # async def transcribe_audio(audio_file):
81
  # """
82
+ # Transcribe audio using Whisper AI (async function).
83
  # """
84
  # print("CAME IN THE transcribe audio function")
85
  # try:
86
+ # with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
87
+ # audio_file.save(temp_audio_file.name)
88
+ # print(f"Temporary audio file saved: {temp_audio_file.name}")
89
+
90
+ # # Run Whisper transcription asynchronously
91
+ # loop = asyncio.get_event_loop()
92
+ # result = await loop.run_in_executor(None, whisper_model.transcribe, temp_audio_file.name)
93
+ # print("THE RESULTS ARE", result)
94
+
95
  # return result.get("text", "").strip()
96
 
97
  # except Exception as e:
 
99
  # return None
100
 
101
 
102
+ # async def query_gemini_api(transcription):
103
  # """
104
+ # Send transcription text to Gemini API and fetch structured recipe information (async function).
105
  # """
106
  # try:
107
  # # Define the structured prompt
 
131
  # }
132
  # headers = {"Content-Type": "application/json"}
133
 
134
+ # # Send request to Gemini API asynchronously
135
+ # async with aiohttp.ClientSession() as session:
136
+ # async with session.post(
137
+ # f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
138
+ # json=payload,
139
+ # headers=headers,
140
+ # timeout=60 # 60 seconds timeout for the request
141
+ # ) as response:
142
+ # response.raise_for_status() # Raise error if response code is not 200
143
+ # data = await response.json()
144
 
 
 
145
  # return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
146
 
147
+ # except aiohttp.ClientError as e:
148
  # print(f"Error querying Gemini API: {e}")
149
  # return {"error": str(e)}
150
 
 
156
 
157
 
158
 
159
+ # Above code is without polling and sleep
160
+ import os
161
+ import whisper
162
+ import requests
163
+ from flask import Flask, request, jsonify, render_template
164
+ import tempfile
165
+ import warnings
166
+ warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
167
+
168
+ app = Flask(__name__)
169
+ print("APP IS RUNNING, ANIKET")
170
+
171
+ # Gemini API settings
172
+ from dotenv import load_dotenv
173
+ # Load the .env file
174
+ load_dotenv()
175
+
176
+ print("ENV LOADED, ANIKET")
177
+
178
+ # Fetch the API key from the .env file
179
+ API_KEY = os.getenv("FIRST_API_KEY")
180
+
181
+ # Ensure the API key is loaded correctly
182
+ if not API_KEY:
183
+ raise ValueError("API Key not found. Make sure it is set in the .env file.")
184
+
185
+ GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
186
+ GEMINI_API_KEY = API_KEY
187
+
188
+
189
+ # Load Whisper AI model at startup
190
+ print("Loading Whisper AI model..., ANIKET")
191
+ whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
192
+ print("Whisper AI model loaded successfully, ANIKET")
193
+
194
+
195
+ # Define the "/" endpoint for health check
196
+ @app.route("/", methods=["GET"])
197
+ def health_check():
198
+ return jsonify({"status": "success", "message": "API is running successfully!"}), 200
199
+
200
+ @app.route("/mbsa")
201
+ def mbsa():
202
+ return render_template("mbsa.html")
203
+
204
+ @app.route('/process-audio', methods=['POST'])
205
+ def process_audio():
206
+ print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
207
+ """
208
+ Flask endpoint to process audio:
209
+ 1. Transcribe provided audio file using Whisper AI.
210
+ 2. Send transcription to Gemini API for recipe information extraction.
211
+ 3. Return structured data in the response.
212
+ """
213
+
214
+ if 'audio' not in request.files:
215
+ return jsonify({"error": "No audio file provided"}), 400
216
+
217
+ audio_file = request.files['audio']
218
+ print("AUDIO FILE NAME: ", audio_file)
219
+
220
+ try:
221
+ print("STARTING TRANSCRIPTION, ANIKET")
222
+ # Step 1: Transcribe the uploaded audio file directly
223
+ audio_file = request.files['audio']
224
+ transcription = transcribe_audio(audio_file)
225
+
226
+ print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
227
+
228
+ if not transcription:
229
+ return jsonify({"error": "Audio transcription failed"}), 500
230
+
231
+ print("GOT THE transcription")
232
+
233
+ print("Starting the GEMINI REQUEST TO STRUCTURE IT")
234
+ # Step 2: Generate structured recipe information using Gemini API
235
+ structured_data = query_gemini_api(transcription)
236
+
237
+ print("GOT THE STRUCTURED DATA", structured_data)
238
+ # Step 3: Return the structured data
239
+ return jsonify(structured_data)
240
+
241
+ except Exception as e:
242
+ return jsonify({"error": str(e)}), 500
243
+
244
+ def transcribe_audio(audio_path):
245
+ """
246
+ Transcribe audio using Whisper AI.
247
+ """
248
+ print("CAME IN THE transcribe audio function")
249
+ try:
250
+ # Transcribe audio using Whisper AI
251
+ print("Transcribing audio...")
252
+ result = whisper_model.transcribe(audio_path)
253
+ print("THE RESULTS ARE", result)
254
+
255
+ return result.get("text", "").strip()
256
+
257
+ except Exception as e:
258
+ print(f"Error in transcription: {e}")
259
+ return None
260
+
261
+
262
+ def query_gemini_api(transcription):
263
+ """
264
+ Send transcription text to Gemini API and fetch structured recipe information.
265
+ """
266
+ try:
267
+ # Define the structured prompt
268
+ prompt = (
269
+ "Analyze the provided cooking video transcription and extract the following structured information:\n"
270
+ "1. Recipe Name: Identify the name of the dish being prepared.\n"
271
+ "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
272
+ "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
273
+ "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
274
+ "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
275
+ "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
276
+ "7. Serving size: In count of people or portion size.\n"
277
+ "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
278
+ "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
279
+ f"Text: {transcription}\n"
280
+ )
281
+
282
+ # Prepare the payload and headers
283
+ payload = {
284
+ "contents": [
285
+ {
286
+ "parts": [
287
+ {"text": prompt}
288
+ ]
289
+ }
290
+ ]
291
+ }
292
+ headers = {"Content-Type": "application/json"}
293
+
294
+ # Send request to Gemini API and wait for the response
295
+ print("Querying Gemini API...")
296
+ response = requests.post(
297
+ f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
298
+ json=payload,
299
+ headers=headers,
300
+ timeout=60 # 60 seconds timeout for the request
301
+ )
302
+ response.raise_for_status()
303
+
304
+ # Extract and return the structured data
305
+ data = response.json()
306
+ return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
307
+
308
+ except requests.exceptions.RequestException as e:
309
+ print(f"Error querying Gemini API: {e}")
310
+ return {"error": str(e)}
311
+
312
+
313
+ if __name__ == '__main__':
314
+ app.run(debug=True)
315
+
316
+
317
+
318
+
319
+
320
 
321
 
322
  # import os