Pijush2023 commited on
Commit
3595ee8
·
verified ·
1 Parent(s): ebe2251

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -4
app.py CHANGED
@@ -58,21 +58,31 @@ pipe_asr = pipeline(
58
  return_timestamps=True
59
  )
60
 
61
- # Function to determine if a pause occurred
62
  def determine_pause(audio: np.ndarray, sampling_rate: int, state: AppState) -> bool:
63
- """Take in the stream, determine if a pause happened"""
64
  temp_audio = audio
65
  dur_vad = len(temp_audio) / sampling_rate # Simulating VAD duration for this example
66
  duration = len(audio) / sampling_rate
67
 
 
 
 
 
68
  if dur_vad > 0.5 and not state.started_talking:
69
  print("Started talking")
70
  state.started_talking = True
71
  return False
72
 
73
- print(f"Duration after VAD: {dur_vad:.3f} s")
 
 
 
 
 
 
 
74
 
75
- return (duration - dur_vad) > 1 # Adjust the threshold for pause duration as needed
76
 
77
  # Function to process audio input, detect pauses, and handle state
78
  def process_audio(audio: tuple, state: AppState):
@@ -96,10 +106,20 @@ def process_audio(audio: tuple, state: AppState):
96
  _, transcription, _ = transcribe_function(state.stream, (state.sampling_rate, state.stream))
97
  print(f"Transcription: {transcription}")
98
 
 
 
 
 
 
99
  # Retrieve hybrid response using Neo4j and other methods
100
  response_text = retriever(transcription)
101
  print(f"Response: {response_text}")
102
 
 
 
 
 
 
103
  # Generate audio from the response text
104
  audio_path = generate_audio_elevenlabs(response_text)
105
 
@@ -112,6 +132,7 @@ def process_audio(audio: tuple, state: AppState):
112
 
113
  return None, state
114
 
 
115
  # Function to process audio input and transcribe it
116
  def transcribe_function(stream, new_chunk):
117
  try:
 
58
  return_timestamps=True
59
  )
60
 
61
+ # Adjusted function to determine if a pause occurred
62
  def determine_pause(audio: np.ndarray, sampling_rate: int, state: AppState) -> bool:
63
+ """Take in the stream, determine if a pause happened."""
64
  temp_audio = audio
65
  dur_vad = len(temp_audio) / sampling_rate # Simulating VAD duration for this example
66
  duration = len(audio) / sampling_rate
67
 
68
+ # Log the duration and VAD result for debugging
69
+ print(f"Duration after VAD: {dur_vad:.3f} s, Total Duration: {duration:.3f} s")
70
+
71
+ # Check if speech has started
72
  if dur_vad > 0.5 and not state.started_talking:
73
  print("Started talking")
74
  state.started_talking = True
75
  return False
76
 
77
+ # If the difference between total duration and VAD duration is significant, consider it a pause
78
+ # Adjust the threshold for pause detection (e.g., 0.5 seconds)
79
+ pause_threshold = 0.5 # This value can be adjusted to be more sensitive
80
+ if (duration - dur_vad) > pause_threshold and state.started_talking:
81
+ print("Pause detected")
82
+ return True
83
+
84
+ return False
85
 
 
86
 
87
  # Function to process audio input, detect pauses, and handle state
88
  def process_audio(audio: tuple, state: AppState):
 
106
  _, transcription, _ = transcribe_function(state.stream, (state.sampling_rate, state.stream))
107
  print(f"Transcription: {transcription}")
108
 
109
+ # Check if transcription is empty
110
+ if not transcription:
111
+ print("No transcription available.")
112
+ return None, state
113
+
114
  # Retrieve hybrid response using Neo4j and other methods
115
  response_text = retriever(transcription)
116
  print(f"Response: {response_text}")
117
 
118
+ # Check if the response is empty before proceeding
119
+ if not response_text:
120
+ print("No response generated.")
121
+ return None, state
122
+
123
  # Generate audio from the response text
124
  audio_path = generate_audio_elevenlabs(response_text)
125
 
 
132
 
133
  return None, state
134
 
135
+
136
  # Function to process audio input and transcribe it
137
  def transcribe_function(stream, new_chunk):
138
  try: