noumanjavaid commited on
Commit
d3a14d0
Β·
verified Β·
1 Parent(s): 0d5b319

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +225 -157
app.py CHANGED
@@ -5,36 +5,36 @@ import os
5
  import time
6
  import numpy as np
7
  import tempfile
8
- from io import BytesIO, StringIO
9
  from md2pdf.core import md2pdf
10
  from dotenv import load_dotenv
11
  from datetime import datetime
12
- import threading
 
 
 
13
  from download import download_video_audio, delete_download
 
14
 
15
- # Override the max file size (40MB in bytes)
16
- MAX_FILE_SIZE = 41943040 # 40MB in bytes
17
  FILE_TOO_LARGE_MESSAGE = "File too large. Maximum size is 40MB."
18
 
19
- # Load environment variables in a secure way
20
  load_dotenv()
21
 
22
  # Initialize session states
23
  if 'api_key' not in st.session_state:
24
- # Try to get API key from environment variable first
25
  st.session_state.api_key = os.environ.get("GROQ_API_KEY", "")
26
 
27
- if 'recording' not in st.session_state:
28
- st.session_state.recording = False
29
-
30
- if 'audio_data' not in st.session_state:
31
- st.session_state.audio_data = None
32
-
33
  if 'transcript' not in st.session_state:
34
  st.session_state.transcript = ""
35
 
36
  if 'groq_client' not in st.session_state:
37
  st.session_state.groq_client = None
 
 
 
38
 
39
  # Set page configuration
40
  st.set_page_config(
@@ -48,17 +48,41 @@ st.set_page_config(
48
  LLM_MODEL = "deepseek-r1-distill-llama-70b"
49
  TRANSCRIPTION_MODEL = "distil-whisper-large-v3-en"
50
 
51
- # Initialize Groq client function
52
- def initialize_groq_client(api_key):
 
 
 
 
 
53
  """Initialize Groq client with the provided API key"""
54
  if not api_key:
55
  return None
 
56
  try:
57
- return Groq(api_key=api_key)
 
 
 
58
  except Exception as e:
59
- st.error(f"Failed to initialize Groq client: {e}")
 
 
 
 
 
 
60
  return None
61
 
 
 
 
 
 
 
 
 
 
62
  class GenerationStatistics:
63
  def __init__(self, input_time=0, output_time=0, input_tokens=0, output_tokens=0, total_time=0, model_name=LLM_MODEL):
64
  self.input_time = input_time
@@ -168,67 +192,88 @@ class NoteSection:
168
  markdown_content += self.get_markdown_content(content, level + 1)
169
  return markdown_content
170
 
171
- # Audio recorder functionality
172
- class AudioRecorder:
173
- def __init__(self, sample_rate=44100):
174
- self.sample_rate = sample_rate
175
- self.recording = False
176
- self.audio_data = []
177
- self.thread = None
178
-
179
- def start_recording(self):
180
- self.recording = True
181
- self.audio_data = []
182
- self.thread = threading.Thread(target=self._record_audio)
183
- self.thread.start()
184
-
185
- def _record_audio(self):
186
- import sounddevice as sd
187
- with sd.InputStream(callback=self._audio_callback, channels=1, samplerate=self.sample_rate):
188
- while self.recording:
189
- time.sleep(0.1)
190
-
191
- def _audio_callback(self, indata, frames, time, status):
192
- if status:
193
- print(f"Status: {status}")
194
- self.audio_data.append(indata.copy())
195
 
196
- def stop_recording(self):
197
- self.recording = False
198
- if self.thread:
199
- self.thread.join()
200
 
201
- if not self.audio_data:
202
- return None
203
-
204
- # Concatenate all audio chunks
205
- import numpy as np
206
- import soundfile as sf
207
- audio = np.concatenate(self.audio_data, axis=0)
208
-
209
- # Save to a temporary file
210
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".m4a")
211
- sf.write(temp_file.name, audio, self.sample_rate)
212
 
213
- return temp_file.name
214
-
215
- def transcribe_audio_with_groq(audio_file_path):
216
- """Transcribe audio file using Groq's transcription API"""
217
  if not st.session_state.groq_client:
218
- st.error("Groq client is not initialized. Please check your API key.")
219
- return ""
220
 
221
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  with open(audio_file_path, "rb") as file:
 
 
 
 
223
  transcription = st.session_state.groq_client.audio.transcriptions.create(
224
  file=(audio_file_path, file.read()),
225
  model=TRANSCRIPTION_MODEL,
226
  response_format="verbose_json"
227
  )
 
 
 
 
 
 
 
 
 
 
 
228
  return transcription.text
 
229
  except Exception as e:
230
- st.error(f"Error transcribing audio with Groq: {e}")
231
- return ""
 
 
 
 
 
 
 
 
 
 
 
 
232
 
233
  def process_transcript(transcript):
234
  """Process transcript with Groq's DeepSeek model for highly structured notes"""
@@ -472,71 +517,58 @@ def main():
472
  # Input methods tabs
473
  input_method = st.radio("Choose input method:", ["Live Recording", "Upload Audio", "YouTube URL", "Text Input"])
474
 
475
- audio_recorder = AudioRecorder()
476
-
477
  if input_method == "Live Recording":
478
- col1, col2 = st.columns(2)
 
479
 
480
- with col1:
481
- if not st.session_state.recording:
482
- if st.button("Start Recording 🎀", key="start_rec"):
483
- st.session_state.recording = True
484
- audio_recorder.start_recording()
485
- st.rerun()
486
- else:
487
- if st.button("Stop Recording ⏹️", key="stop_rec"):
488
- audio_file = audio_recorder.stop_recording()
489
- st.session_state.recording = False
490
-
491
- if audio_file:
492
- st.session_state.audio_data = audio_file
493
- st.success("Recording saved!")
494
-
495
- # Auto-transcribe using Groq
496
- with st.spinner("Transcribing audio with Groq..."):
497
- transcript = transcribe_audio_with_groq(audio_file)
498
- if transcript:
499
- st.session_state.transcript = transcript
500
- st.success("Transcription complete!")
501
- st.rerun()
502
 
503
- with col2:
504
- if st.session_state.recording:
505
- st.markdown("#### πŸ”΄ Recording in progress...")
506
-
507
- # Animated recording indicator
508
- progress_bar = st.progress(0)
509
- for i in range(100):
510
- time.sleep(0.05)
511
- progress_bar.progress((i + 1) % 101)
512
-
513
- # Break if recording stopped
514
- if not st.session_state.recording:
515
- break
516
- st.rerun()
517
 
518
- if st.session_state.audio_data:
519
- st.audio(st.session_state.audio_data)
520
 
521
- if st.session_state.transcript:
522
- if st.button("Generate Structured Notes", key="generate_live"):
523
- with st.spinner("Creating highly structured notes..."):
524
- notes = process_transcript(st.session_state.transcript)
525
-
526
- if notes:
527
- st.success("Notes generated successfully!")
 
528
 
529
- # Export options
530
- col1, col2 = st.columns(2)
531
- with col1:
532
- if st.button("Export as Markdown", key="md_live"):
533
- export_notes(notes, "markdown")
534
- with col2:
535
- if st.button("Export as PDF", key="pdf_live"):
536
- export_notes(notes, "pdf")
 
 
 
 
 
 
 
 
 
 
 
 
 
537
 
538
  elif input_method == "Upload Audio":
539
- uploaded_file = st.file_uploader("Upload an audio file (max 40MB)", type=["mp3", "wav", "m4a", "ogg"])
 
 
 
 
540
 
541
  if uploaded_file:
542
  file_size = uploaded_file.size
@@ -551,32 +583,56 @@ def main():
551
  st.audio(uploaded_file)
552
 
553
  if st.button("Transcribe and Generate Notes", key="transcribe_upload"):
 
 
 
554
  with st.spinner("Transcribing audio with Groq..."):
555
- transcript = transcribe_audio_with_groq(audio_file_path)
556
-
557
- if transcript:
558
- st.session_state.transcript = transcript
559
-
560
- with st.spinner("Creating highly structured notes..."):
561
- notes = process_transcript(transcript)
562
 
563
- if notes:
564
- st.success("Notes generated successfully!")
 
 
 
565
 
566
- # Export options
567
- col1, col2 = st.columns(2)
568
- with col1:
569
- if st.button("Export as Markdown", key="md_upload"):
570
- export_notes(notes, "markdown")
571
- with col2:
572
- if st.button("Export as PDF", key="pdf_upload"):
573
- export_notes(notes, "pdf")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
574
 
575
  elif input_method == "YouTube URL":
576
- youtube_url = st.text_input("Enter YouTube URL:")
 
 
 
577
 
578
  if youtube_url:
579
  if st.button("Process YouTube Content", key="process_yt"):
 
 
 
580
  with st.spinner("Downloading YouTube content..."):
581
  try:
582
  audio_path = download_video_audio(youtube_url)
@@ -586,26 +642,34 @@ def main():
586
  st.audio(audio_path)
587
 
588
  with st.spinner("Transcribing audio with Groq..."):
589
- transcript = transcribe_audio_with_groq(audio_path)
590
-
591
- if transcript:
592
- st.session_state.transcript = transcript
593
-
594
- with st.spinner("Creating highly structured notes..."):
595
- notes = process_transcript(transcript)
 
596
 
597
- if notes:
598
- st.success("Notes generated successfully!")
599
 
600
- # Export options
601
- col1, col2 = st.columns(2)
602
- with col1:
603
- if st.button("Export as Markdown", key="md_yt"):
604
- export_notes(notes, "markdown")
605
- with col2:
606
- if st.button("Export as PDF", key="pdf_yt"):
607
- export_notes(notes, "pdf")
608
-
 
 
 
 
 
 
 
609
  # Clean up downloaded files
610
  delete_download(audio_path)
611
 
@@ -616,7 +680,11 @@ def main():
616
  st.error(f"Error processing YouTube video: {e}")
617
 
618
  else: # Text Input
619
- transcript = st.text_area("Enter transcript text:", height=300)
 
 
 
 
620
 
621
  if transcript:
622
  st.session_state.transcript = transcript
 
5
  import time
6
  import numpy as np
7
  import tempfile
8
+ from io import BytesIO
9
  from md2pdf.core import md2pdf
10
  from dotenv import load_dotenv
11
  from datetime import datetime
12
+ import backoff
13
+ import requests
14
+ from requests.exceptions import RequestException
15
+ from typing import Optional, Dict, Any
16
  from download import download_video_audio, delete_download
17
+ from st_audiorec import st_audiorec # Import the audio recorder component
18
 
19
+ # Set max file size for audio uploads (40MB)
20
+ MAX_FILE_SIZE = 41943040 # 40MB
21
  FILE_TOO_LARGE_MESSAGE = "File too large. Maximum size is 40MB."
22
 
23
+ # Load environment variables
24
  load_dotenv()
25
 
26
  # Initialize session states
27
  if 'api_key' not in st.session_state:
 
28
  st.session_state.api_key = os.environ.get("GROQ_API_KEY", "")
29
 
 
 
 
 
 
 
30
  if 'transcript' not in st.session_state:
31
  st.session_state.transcript = ""
32
 
33
  if 'groq_client' not in st.session_state:
34
  st.session_state.groq_client = None
35
+
36
+ if 'transcription_error' not in st.session_state:
37
+ st.session_state.transcription_error = None
38
 
39
  # Set page configuration
40
  st.set_page_config(
 
48
  LLM_MODEL = "deepseek-r1-distill-llama-70b"
49
  TRANSCRIPTION_MODEL = "distil-whisper-large-v3-en"
50
 
51
+ # Configure exponential backoff for API retries
52
+ MAX_RETRIES = 5
53
+ INITIAL_WAIT = 0.5
54
+ MAX_WAIT = 30
55
+
56
+ # Initialize Groq client with improved error handling
57
+ def initialize_groq_client(api_key: str) -> Optional[Groq]:
58
  """Initialize Groq client with the provided API key"""
59
  if not api_key:
60
  return None
61
+
62
  try:
63
+ client = Groq(api_key=api_key)
64
+ # Perform a simple test call to validate the API key
65
+ client.models.list()
66
+ return client
67
  except Exception as e:
68
+ error_msg = str(e)
69
+ if "401" in error_msg:
70
+ st.error("❌ Invalid API key: Authentication failed")
71
+ elif "403" in error_msg:
72
+ st.error("❌ API key doesn't have permission to access Groq API")
73
+ else:
74
+ st.error(f"❌ Failed to initialize Groq client: {error_msg}")
75
  return None
76
 
77
+ # Define custom exception for Groq API errors
78
+ class GroqAPIError(Exception):
79
+ """Custom exception for Groq API errors"""
80
+ def __init__(self, message, status_code=None, response=None):
81
+ self.message = message
82
+ self.status_code = status_code
83
+ self.response = response
84
+ super().__init__(self.message)
85
+
86
  class GenerationStatistics:
87
  def __init__(self, input_time=0, output_time=0, input_tokens=0, output_tokens=0, total_time=0, model_name=LLM_MODEL):
88
  self.input_time = input_time
 
192
  markdown_content += self.get_markdown_content(content, level + 1)
193
  return markdown_content
194
 
195
+ # Add backoff decorator for retrying transcription
196
+ @backoff.on_exception(
197
+ backoff.expo,
198
+ (RequestException, GroqAPIError),
199
+ max_tries=MAX_RETRIES,
200
+ factor=INITIAL_WAIT,
201
+ max_value=MAX_WAIT,
202
+ jitter=backoff.full_jitter,
203
+ on_backoff=lambda details: st.info(f"Retrying transcription... (Attempt {details['tries']}/{MAX_RETRIES})")
204
+ )
205
+ def transcribe_audio_with_groq(audio_data) -> str:
206
+ """
207
+ Transcribe audio file using Groq's transcription API with retry capability
 
 
 
 
 
 
 
 
 
 
 
208
 
209
+ Args:
210
+ audio_data: Either file path string or binary audio data
 
 
211
 
212
+ Returns:
213
+ Transcribed text
 
 
 
 
 
 
 
 
 
214
 
215
+ Raises:
216
+ GroqAPIError: For API-related errors
217
+ ValueError: For invalid input
218
+ """
219
  if not st.session_state.groq_client:
220
+ raise ValueError("Groq client is not initialized. Please check your API key.")
 
221
 
222
  try:
223
+ # Save audio data to a temporary file if it's binary data
224
+ if isinstance(audio_data, bytes):
225
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
226
+ tmp_file.write(audio_data)
227
+ audio_file_path = tmp_file.name
228
+ else:
229
+ # Assume it's a file path
230
+ audio_file_path = audio_data
231
+ if not os.path.exists(audio_file_path):
232
+ raise ValueError(f"Audio file not found: {audio_file_path}")
233
+
234
+ # Check file size before sending
235
+ file_size = os.path.getsize(audio_file_path)
236
+ if file_size > MAX_FILE_SIZE:
237
+ raise ValueError(f"Audio file size ({file_size/1048576:.2f}MB) exceeds 40MB limit")
238
+
239
  with open(audio_file_path, "rb") as file:
240
+ # Display a progress message since transcription can take time
241
+ progress_placeholder = st.empty()
242
+ progress_placeholder.info("Processing audio... This may take a minute.")
243
+
244
  transcription = st.session_state.groq_client.audio.transcriptions.create(
245
  file=(audio_file_path, file.read()),
246
  model=TRANSCRIPTION_MODEL,
247
  response_format="verbose_json"
248
  )
249
+
250
+ # Clear the progress message when done
251
+ progress_placeholder.empty()
252
+
253
+ if not hasattr(transcription, 'text') or not transcription.text:
254
+ raise GroqAPIError("Empty transcription result returned")
255
+
256
+ # Delete temp file if we created one
257
+ if isinstance(audio_data, bytes) and os.path.exists(audio_file_path):
258
+ os.unlink(audio_file_path)
259
+
260
  return transcription.text
261
+
262
  except Exception as e:
263
+ error_msg = str(e)
264
+
265
+ # Handle specific error cases
266
+ if "401" in error_msg:
267
+ raise GroqAPIError("Authentication failed. Please check your API key.", 401)
268
+ elif "429" in error_msg:
269
+ raise GroqAPIError("Rate limit exceeded. Please try again later.", 429)
270
+ elif "413" in error_msg:
271
+ raise GroqAPIError("Audio file too large for processing.", 413)
272
+ elif "500" in error_msg or "502" in error_msg or "503" in error_msg or "504" in error_msg:
273
+ raise GroqAPIError("Groq server error. Please try again later.", int(error_msg[:3]))
274
+ else:
275
+ # Re-raise as a GroqAPIError for consistent handling
276
+ raise GroqAPIError(f"Error transcribing audio: {error_msg}")
277
 
278
  def process_transcript(transcript):
279
  """Process transcript with Groq's DeepSeek model for highly structured notes"""
 
517
  # Input methods tabs
518
  input_method = st.radio("Choose input method:", ["Live Recording", "Upload Audio", "YouTube URL", "Text Input"])
519
 
 
 
520
  if input_method == "Live Recording":
521
+ st.markdown("### Record Audio")
522
+ st.markdown("Click the microphone button below to start recording. Click it again to stop.")
523
 
524
+ # Use the streamlit-audiorec component for recording
525
+ wav_audio_data = st_audiorec()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
526
 
527
+ # If audio data is returned, display and process it
528
+ if wav_audio_data is not None:
529
+ # Reset any previous transcription errors
530
+ st.session_state.transcription_error = None
 
 
 
 
 
 
 
 
 
 
531
 
532
+ # Display the audio
533
+ st.audio(wav_audio_data, format='audio/wav')
534
 
535
+ # Add a button to transcribe the recorded audio
536
+ if st.button("Transcribe Recording", key="transcribe_rec"):
537
+ with st.spinner("Transcribing audio with Groq..."):
538
+ try:
539
+ transcript = transcribe_audio_with_groq(wav_audio_data)
540
+ if transcript:
541
+ st.session_state.transcript = transcript
542
+ st.success("βœ… Transcription complete!")
543
 
544
+ with st.expander("View Transcript", expanded=True):
545
+ st.markdown(transcript)
546
+
547
+ if st.button("Generate Structured Notes", key="generate_live"):
548
+ with st.spinner("Creating highly structured notes..."):
549
+ notes = process_transcript(transcript)
550
+
551
+ if notes:
552
+ st.success("Notes generated successfully!")
553
+
554
+ # Export options
555
+ col1, col2 = st.columns(2)
556
+ with col1:
557
+ if st.button("Export as Markdown", key="md_live"):
558
+ export_notes(notes, "markdown")
559
+ with col2:
560
+ if st.button("Export as PDF", key="pdf_live"):
561
+ export_notes(notes, "pdf")
562
+ except (ValueError, GroqAPIError) as e:
563
+ st.session_state.transcription_error = str(e)
564
+ st.error(f"❌ Transcription failed: {str(e)}")
565
 
566
  elif input_method == "Upload Audio":
567
+ uploaded_file = st.file_uploader(
568
+ "Upload an audio file (max 40MB)",
569
+ type=["mp3", "wav", "m4a", "ogg"],
570
+ help="Supported formats: MP3, WAV, M4A, OGG. Maximum size: 40MB"
571
+ )
572
 
573
  if uploaded_file:
574
  file_size = uploaded_file.size
 
583
  st.audio(uploaded_file)
584
 
585
  if st.button("Transcribe and Generate Notes", key="transcribe_upload"):
586
+ # Reset any previous transcription errors
587
+ st.session_state.transcription_error = None
588
+
589
  with st.spinner("Transcribing audio with Groq..."):
590
+ try:
591
+ transcript = transcribe_audio_with_groq(audio_file_path)
592
+ if transcript:
593
+ st.session_state.transcript = transcript
594
+ st.success("βœ… Transcription complete!")
 
 
595
 
596
+ with st.expander("View Transcript", expanded=True):
597
+ st.markdown(transcript)
598
+
599
+ with st.spinner("Creating highly structured notes..."):
600
+ notes = process_transcript(transcript)
601
 
602
+ if notes:
603
+ st.success("Notes generated successfully!")
604
+
605
+ # Export options
606
+ col1, col2 = st.columns(2)
607
+ with col1:
608
+ if st.button("Export as Markdown", key="md_upload"):
609
+ export_notes(notes, "markdown")
610
+ with col2:
611
+ if st.button("Export as PDF", key="pdf_upload"):
612
+ export_notes(notes, "pdf")
613
+ except (ValueError, GroqAPIError) as e:
614
+ st.session_state.transcription_error = str(e)
615
+ st.error(f"❌ Transcription failed: {str(e)}")
616
+
617
+ # Provide helpful suggestions based on error type
618
+ if "Audio file too large" in str(e) or "exceeds" in str(e):
619
+ st.info("πŸ’‘ Try trimming your audio file or uploading a shorter segment.")
620
+ elif "API key" in str(e) or "Authentication" in str(e):
621
+ st.info("πŸ’‘ Check that your Groq API key is correct and has access to the transcription API.")
622
+ elif "Rate limit" in str(e):
623
+ st.info("πŸ’‘ You've hit Groq's rate limits. Please wait a few minutes before trying again.")
624
 
625
  elif input_method == "YouTube URL":
626
+ youtube_url = st.text_input(
627
+ "Enter YouTube URL:",
628
+ help="Enter the full URL of a YouTube video (e.g., https://www.youtube.com/watch?v=example)"
629
+ )
630
 
631
  if youtube_url:
632
  if st.button("Process YouTube Content", key="process_yt"):
633
+ # Reset any previous errors
634
+ st.session_state.transcription_error = None
635
+
636
  with st.spinner("Downloading YouTube content..."):
637
  try:
638
  audio_path = download_video_audio(youtube_url)
 
642
  st.audio(audio_path)
643
 
644
  with st.spinner("Transcribing audio with Groq..."):
645
+ try:
646
+ transcript = transcribe_audio_with_groq(audio_path)
647
+ if transcript:
648
+ st.session_state.transcript = transcript
649
+ st.success("βœ… Transcription complete!")
650
+
651
+ with st.expander("View Transcript", expanded=True):
652
+ st.markdown(transcript)
653
 
654
+ with st.spinner("Creating highly structured notes..."):
655
+ notes = process_transcript(transcript)
656
 
657
+ if notes:
658
+ st.success("Notes generated successfully!")
659
+
660
+ # Export options
661
+ col1, col2 = st.columns(2)
662
+ with col1:
663
+ if st.button("Export as Markdown", key="md_yt"):
664
+ export_notes(notes, "markdown")
665
+ with col2:
666
+ if st.button("Export as PDF", key="pdf_yt"):
667
+ export_notes(notes, "pdf")
668
+
669
+ except (ValueError, GroqAPIError) as e:
670
+ st.session_state.transcription_error = str(e)
671
+ st.error(f"❌ Transcription failed: {str(e)}")
672
+
673
  # Clean up downloaded files
674
  delete_download(audio_path)
675
 
 
680
  st.error(f"Error processing YouTube video: {e}")
681
 
682
  else: # Text Input
683
+ transcript = st.text_area(
684
+ "Enter transcript text:",
685
+ height=300,
686
+ help="Paste or type your transcript text here for generating structured notes"
687
+ )
688
 
689
  if transcript:
690
  st.session_state.transcript = transcript