Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -152,7 +152,7 @@ def get_information_from_video_using_OCR(video_path, interval=1):
|
|
152 |
cleaned_text = clean_ocr_text(text)
|
153 |
if cleaned_text:
|
154 |
extracted_text += cleaned_text + "\n\n"
|
155 |
-
print(f"Text found at frame {frame_count}: {cleaned_text[:50]}...")
|
156 |
|
157 |
frame_count += 1
|
158 |
|
@@ -223,6 +223,7 @@ def query_gemini_api(video_transcription, audio_transcription):
|
|
223 |
"""
|
224 |
Send transcription text to Gemini API and fetch structured recipe information synchronously.
|
225 |
"""
|
|
|
226 |
try:
|
227 |
# Define the structured prompt
|
228 |
prompt = (
|
@@ -236,8 +237,9 @@ def query_gemini_api(video_transcription, audio_transcription):
|
|
236 |
"7. Serving size: In count of people or portion size.\n"
|
237 |
"8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
|
238 |
"9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
|
239 |
-
|
240 |
-
|
|
|
241 |
)
|
242 |
|
243 |
# Prepare the payload and headers
|
|
|
152 |
cleaned_text = clean_ocr_text(text)
|
153 |
if cleaned_text:
|
154 |
extracted_text += cleaned_text + "\n\n"
|
155 |
+
# print(f"Text found at frame {frame_count}: {cleaned_text[:50]}...")
|
156 |
|
157 |
frame_count += 1
|
158 |
|
|
|
223 |
"""
|
224 |
Send transcription text to Gemini API and fetch structured recipe information synchronously.
|
225 |
"""
|
226 |
+
transcription = f"audio transcription: {audio_transcription} and video transcription: {video_transcription}"
|
227 |
try:
|
228 |
# Define the structured prompt
|
229 |
prompt = (
|
|
|
237 |
"7. Serving size: In count of people or portion size.\n"
|
238 |
"8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
|
239 |
"9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
|
240 |
+
"There are errors and missing parts in the video transcription part, if something is not able to interpret from the video information use the audio information\n"
|
241 |
+
"If you are not able to get required information, return empty texts for the fields that I asked above instead of giving any other text response."
|
242 |
+
f"Text: {transcription}\n"
|
243 |
)
|
244 |
|
245 |
# Prepare the payload and headers
|