GoodML commited on
Commit
58e0366
·
verified ·
1 Parent(s): 44f1555

changed flask to quart

Browse files
Files changed (1) hide show
  1. app.py +321 -157
app.py CHANGED
@@ -1,3 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # import os
2
  # import subprocess
3
  # import whisper
@@ -149,160 +470,3 @@
149
  # if __name__ == '__main__':
150
  # app.run(debug=True)
151
 
152
-
153
- # Above code is without polling and sleep
154
- import os
155
- import whisper
156
- import requests
157
- from flask import Flask, request, jsonify, render_template
158
- import tempfile
159
- import warnings
160
- warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
161
-
162
- app = Flask(__name__)
163
- print("APP IS RUNNING, ANIKET")
164
-
165
- # Gemini API settings
166
- from dotenv import load_dotenv
167
- # Load the .env file
168
- load_dotenv()
169
-
170
- print("ENV LOADED, ANIKET")
171
-
172
- # Fetch the API key from the .env file
173
- API_KEY = os.getenv("FIRST_API_KEY")
174
-
175
- # Ensure the API key is loaded correctly
176
- if not API_KEY:
177
- raise ValueError("API Key not found. Make sure it is set in the .env file.")
178
-
179
- GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
180
- GEMINI_API_KEY = API_KEY
181
-
182
-
183
- # Load Whisper AI model at startup
184
- print("Loading Whisper AI model..., ANIKET")
185
- whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
186
- print("Whisper AI model loaded successfully, ANIKET")
187
-
188
-
189
- # Define the "/" endpoint for health check
190
- @app.route("/", methods=["GET"])
191
- def health_check():
192
- return jsonify({"status": "success", "message": "API is running successfully!"}), 200
193
-
194
- @app.route("/mbsa")
195
- def mbsa():
196
- return render_template("mbsa.html")
197
-
198
- @app.route('/process-audio', methods=['POST'])
199
- def process_audio():
200
- print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
201
- """
202
- Flask endpoint to process audio:
203
- 1. Transcribe provided audio file using Whisper AI.
204
- 2. Send transcription to Gemini API for recipe information extraction.
205
- 3. Return structured data in the response.
206
- """
207
-
208
- if 'audio' not in request.files:
209
- return jsonify({"error": "No audio file provided"}), 400
210
-
211
- audio_file = request.files['audio']
212
- print("AUDIO FILE NAME: ", audio_file)
213
-
214
- try:
215
- print("STARTING TRANSCRIPTION, ANIKET")
216
- # Step 1: Transcribe the uploaded audio file directly
217
- audio_file = request.files['audio']
218
- transcription = transcribe_audio(audio_file)
219
-
220
- print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
221
-
222
- if not transcription:
223
- return jsonify({"error": "Audio transcription failed"}), 500
224
-
225
- print("GOT THE transcription")
226
-
227
- print("Starting the GEMINI REQUEST TO STRUCTURE IT")
228
- # Step 2: Generate structured recipe information using Gemini API
229
- structured_data = query_gemini_api(transcription)
230
-
231
- print("GOT THE STRUCTURED DATA", structured_data)
232
- # Step 3: Return the structured data
233
- return jsonify(structured_data)
234
-
235
- except Exception as e:
236
- return jsonify({"error": str(e)}), 500
237
-
238
- def transcribe_audio(audio_path):
239
- """
240
- Transcribe audio using Whisper AI.
241
- """
242
- print("CAME IN THE transcribe audio function")
243
- try:
244
- # Transcribe audio using Whisper AI
245
- print("Transcribing audio...")
246
- result = whisper_model.transcribe(audio_path)
247
- print("THE RESULTS ARE", result)
248
-
249
- return result.get("text", "").strip()
250
-
251
- except Exception as e:
252
- print(f"Error in transcription: {e}")
253
- return None
254
-
255
-
256
- def query_gemini_api(transcription):
257
- """
258
- Send transcription text to Gemini API and fetch structured recipe information.
259
- """
260
- try:
261
- # Define the structured prompt
262
- prompt = (
263
- "Analyze the provided cooking video transcription and extract the following structured information:\n"
264
- "1. Recipe Name: Identify the name of the dish being prepared.\n"
265
- "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
266
- "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
267
- "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
268
- "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
269
- "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
270
- "7. Serving size: In count of people or portion size.\n"
271
- "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
272
- "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
273
- f"Text: {transcription}\n"
274
- )
275
-
276
- # Prepare the payload and headers
277
- payload = {
278
- "contents": [
279
- {
280
- "parts": [
281
- {"text": prompt}
282
- ]
283
- }
284
- ]
285
- }
286
- headers = {"Content-Type": "application/json"}
287
-
288
- # Send request to Gemini API and wait for the response
289
- print("Querying Gemini API...")
290
- response = requests.post(
291
- f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
292
- json=payload,
293
- headers=headers,
294
- timeout=60 # 60 seconds timeout for the request
295
- )
296
- response.raise_for_status()
297
-
298
- # Extract and return the structured data
299
- data = response.json()
300
- return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
301
-
302
- except requests.exceptions.RequestException as e:
303
- print(f"Error querying Gemini API: {e}")
304
- return {"error": str(e)}
305
-
306
-
307
- if __name__ == '__main__':
308
- app.run(debug=True)
 
1
+ import os
2
+ import whisper
3
+ import requests
4
+ import asyncio
5
+ import aiohttp # For making async HTTP requests
6
+ from quart import Quart, request, jsonify, render_template
7
+ from dotenv import load_dotenv
8
+ import warnings
9
+ warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
10
+
11
+ app = Quart(__name__)
12
+ print("APP IS RUNNING, ANIKET")
13
+
14
+ # Load the .env file
15
+ load_dotenv()
16
+
17
+ print("ENV LOADED, ANIKET")
18
+
19
+ # Fetch the API key from the .env file
20
+ API_KEY = os.getenv("FIRST_API_KEY")
21
+
22
+ # Ensure the API key is loaded correctly
23
+ if not API_KEY:
24
+ raise ValueError("API Key not found. Make sure it is set in the .env file.")
25
+
26
+ GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
27
+ GEMINI_API_KEY = API_KEY
28
+
29
+ # Load Whisper AI model at startup
30
+ print("Loading Whisper AI model..., ANIKET")
31
+ whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
32
+ print("Whisper AI model loaded successfully, ANIKET")
33
+
34
+
35
+ @app.route("/", methods=["GET"])
36
+ async def health_check():
37
+ return jsonify({"status": "success", "message": "API is running successfully!"}), 200
38
+
39
+
40
+ @app.route("/mbsa")
41
+ async def mbsa():
42
+ return await render_template("mbsa.html")
43
+
44
+
45
+ @app.route('/process-audio', methods=['POST'])
46
+ async def process_audio():
47
+ print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
48
+
49
+ if 'audio' not in request.files:
50
+ return jsonify({"error": "No audio file provided"}), 400
51
+
52
+ audio_file = request.files['audio']
53
+ print("AUDIO FILE NAME: ", audio_file)
54
+
55
+ try:
56
+ print("STARTING TRANSCRIPTION, ANIKET")
57
+
58
+ # Step 1: Transcribe the uploaded audio file asynchronously
59
+ transcription = await transcribe_audio(audio_file)
60
+
61
+ print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
62
+
63
+ if not transcription:
64
+ return jsonify({"error": "Audio transcription failed"}), 500
65
+
66
+ print("GOT THE transcription")
67
+
68
+ print("Starting the GEMINI REQUEST TO STRUCTURE IT")
69
+ # Step 2: Generate structured recipe information using Gemini API asynchronously
70
+ structured_data = await query_gemini_api(transcription)
71
+
72
+ print("GOT THE STRUCTURED DATA", structured_data)
73
+ # Step 3: Return the structured data
74
+ return jsonify(structured_data)
75
+
76
+ except Exception as e:
77
+ return jsonify({"error": str(e)}), 500
78
+
79
+
80
+ async def transcribe_audio(audio_file):
81
+ """
82
+ Transcribe audio using Whisper AI (async function).
83
+ """
84
+ print("CAME IN THE transcribe audio function")
85
+ try:
86
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
87
+ audio_file.save(temp_audio_file.name)
88
+ print(f"Temporary audio file saved: {temp_audio_file.name}")
89
+
90
+ # Run Whisper transcription asynchronously
91
+ loop = asyncio.get_event_loop()
92
+ result = await loop.run_in_executor(None, whisper_model.transcribe, temp_audio_file.name)
93
+ print("THE RESULTS ARE", result)
94
+
95
+ return result.get("text", "").strip()
96
+
97
+ except Exception as e:
98
+ print(f"Error in transcription: {e}")
99
+ return None
100
+
101
+
102
+ async def query_gemini_api(transcription):
103
+ """
104
+ Send transcription text to Gemini API and fetch structured recipe information (async function).
105
+ """
106
+ try:
107
+ # Define the structured prompt
108
+ prompt = (
109
+ "Analyze the provided cooking video transcription and extract the following structured information:\n"
110
+ "1. Recipe Name: Identify the name of the dish being prepared.\n"
111
+ "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
112
+ "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
113
+ "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
114
+ "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
115
+ "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
116
+ "7. Serving size: In count of people or portion size.\n"
117
+ "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
118
+ "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
119
+ f"Text: {transcription}\n"
120
+ )
121
+
122
+ # Prepare the payload and headers
123
+ payload = {
124
+ "contents": [
125
+ {
126
+ "parts": [
127
+ {"text": prompt}
128
+ ]
129
+ }
130
+ ]
131
+ }
132
+ headers = {"Content-Type": "application/json"}
133
+
134
+ # Send request to Gemini API asynchronously
135
+ async with aiohttp.ClientSession() as session:
136
+ async with session.post(
137
+ f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
138
+ json=payload,
139
+ headers=headers,
140
+ timeout=60 # 60 seconds timeout for the request
141
+ ) as response:
142
+ response.raise_for_status() # Raise error if response code is not 200
143
+ data = await response.json()
144
+
145
+ return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
146
+
147
+ except aiohttp.ClientError as e:
148
+ print(f"Error querying Gemini API: {e}")
149
+ return {"error": str(e)}
150
+
151
+
152
+ if __name__ == '__main__':
153
+ app.run(debug=True)
154
+
155
+
156
+
157
+
158
+
159
+ # # Above code is without polling and sleep
160
+ # import os
161
+ # import whisper
162
+ # import requests
163
+ # from flask import Flask, request, jsonify, render_template
164
+ # import tempfile
165
+ # import warnings
166
+ # warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
167
+
168
+ # app = Flask(__name__)
169
+ # print("APP IS RUNNING, ANIKET")
170
+
171
+ # # Gemini API settings
172
+ # from dotenv import load_dotenv
173
+ # # Load the .env file
174
+ # load_dotenv()
175
+
176
+ # print("ENV LOADED, ANIKET")
177
+
178
+ # # Fetch the API key from the .env file
179
+ # API_KEY = os.getenv("FIRST_API_KEY")
180
+
181
+ # # Ensure the API key is loaded correctly
182
+ # if not API_KEY:
183
+ # raise ValueError("API Key not found. Make sure it is set in the .env file.")
184
+
185
+ # GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
186
+ # GEMINI_API_KEY = API_KEY
187
+
188
+
189
+ # # Load Whisper AI model at startup
190
+ # print("Loading Whisper AI model..., ANIKET")
191
+ # whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
192
+ # print("Whisper AI model loaded successfully, ANIKET")
193
+
194
+
195
+ # # Define the "/" endpoint for health check
196
+ # @app.route("/", methods=["GET"])
197
+ # def health_check():
198
+ # return jsonify({"status": "success", "message": "API is running successfully!"}), 200
199
+
200
+ # @app.route("/mbsa")
201
+ # def mbsa():
202
+ # return render_template("mbsa.html")
203
+
204
+ # @app.route('/process-audio', methods=['POST'])
205
+ # def process_audio():
206
+ # print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
207
+ # """
208
+ # Flask endpoint to process audio:
209
+ # 1. Transcribe provided audio file using Whisper AI.
210
+ # 2. Send transcription to Gemini API for recipe information extraction.
211
+ # 3. Return structured data in the response.
212
+ # """
213
+
214
+ # if 'audio' not in request.files:
215
+ # return jsonify({"error": "No audio file provided"}), 400
216
+
217
+ # audio_file = request.files['audio']
218
+ # print("AUDIO FILE NAME: ", audio_file)
219
+
220
+ # try:
221
+ # print("STARTING TRANSCRIPTION, ANIKET")
222
+ # # Step 1: Transcribe the uploaded audio file directly
223
+ # audio_file = request.files['audio']
224
+ # transcription = transcribe_audio(audio_file)
225
+
226
+ # print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
227
+
228
+ # if not transcription:
229
+ # return jsonify({"error": "Audio transcription failed"}), 500
230
+
231
+ # print("GOT THE transcription")
232
+
233
+ # print("Starting the GEMINI REQUEST TO STRUCTURE IT")
234
+ # # Step 2: Generate structured recipe information using Gemini API
235
+ # structured_data = query_gemini_api(transcription)
236
+
237
+ # print("GOT THE STRUCTURED DATA", structured_data)
238
+ # # Step 3: Return the structured data
239
+ # return jsonify(structured_data)
240
+
241
+ # except Exception as e:
242
+ # return jsonify({"error": str(e)}), 500
243
+
244
+ # def transcribe_audio(audio_path):
245
+ # """
246
+ # Transcribe audio using Whisper AI.
247
+ # """
248
+ # print("CAME IN THE transcribe audio function")
249
+ # try:
250
+ # # Transcribe audio using Whisper AI
251
+ # print("Transcribing audio...")
252
+ # result = whisper_model.transcribe(audio_path)
253
+ # print("THE RESULTS ARE", result)
254
+
255
+ # return result.get("text", "").strip()
256
+
257
+ # except Exception as e:
258
+ # print(f"Error in transcription: {e}")
259
+ # return None
260
+
261
+
262
+ # def query_gemini_api(transcription):
263
+ # """
264
+ # Send transcription text to Gemini API and fetch structured recipe information.
265
+ # """
266
+ # try:
267
+ # # Define the structured prompt
268
+ # prompt = (
269
+ # "Analyze the provided cooking video transcription and extract the following structured information:\n"
270
+ # "1. Recipe Name: Identify the name of the dish being prepared.\n"
271
+ # "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
272
+ # "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
273
+ # "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
274
+ # "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
275
+ # "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
276
+ # "7. Serving size: In count of people or portion size.\n"
277
+ # "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
278
+ # "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
279
+ # f"Text: {transcription}\n"
280
+ # )
281
+
282
+ # # Prepare the payload and headers
283
+ # payload = {
284
+ # "contents": [
285
+ # {
286
+ # "parts": [
287
+ # {"text": prompt}
288
+ # ]
289
+ # }
290
+ # ]
291
+ # }
292
+ # headers = {"Content-Type": "application/json"}
293
+
294
+ # # Send request to Gemini API and wait for the response
295
+ # print("Querying Gemini API...")
296
+ # response = requests.post(
297
+ # f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
298
+ # json=payload,
299
+ # headers=headers,
300
+ # timeout=60 # 60 seconds timeout for the request
301
+ # )
302
+ # response.raise_for_status()
303
+
304
+ # # Extract and return the structured data
305
+ # data = response.json()
306
+ # return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
307
+
308
+ # except requests.exceptions.RequestException as e:
309
+ # print(f"Error querying Gemini API: {e}")
310
+ # return {"error": str(e)}
311
+
312
+
313
+ # if __name__ == '__main__':
314
+ # app.run(debug=True)
315
+
316
+
317
+
318
+
319
+
320
+
321
+
322
  # import os
323
  # import subprocess
324
  # import whisper
 
470
  # if __name__ == '__main__':
471
  # app.run(debug=True)
472