dhs-st commited on
Commit
c392f9e
·
verified ·
1 Parent(s): 7fdf8ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -20
app.py CHANGED
@@ -47,7 +47,46 @@ class SpeakerVerification:
47
  probability = max(0.0, min(1.0, probability))
48
  return probability
49
 
50
- def verify_speaker(self, audio_path1: str, audio_path2: str) -> tuple[float, str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  try:
52
  wav_path1 = self.convert_audio(audio_path1)
53
  wav_path2 = self.convert_audio(audio_path2)
@@ -63,11 +102,14 @@ class SpeakerVerification:
63
  probability = self.score_to_probability(score_value)
64
  decision = "Same speaker" if prediction.item() else "Different speakers"
65
 
66
- return probability, decision, score_value
 
 
 
67
 
68
  except Exception as e:
69
  print(f"Error in speaker verification: {str(e)}")
70
- return 0.0, f"Error: {str(e)}"
71
 
72
  def get_embeddings(self, audio_path: str):
73
  wav_path = self.convert_audio(audio_path)
@@ -109,24 +151,30 @@ class SpeakerVerification:
109
  def create_gradio_interface():
110
  speaker_verifier = SpeakerVerification()
111
 
112
- def process_audio(audio1, audio2):
113
- try:
114
- if audio1 is None or audio2 is None:
115
- return "Error: Please provide both audio samples", None
116
-
117
- probability, decision, score = speaker_verifier.verify_speaker(audio1, audio2)
118
- emb1 = speaker_verifier.get_embeddings(audio1)
119
- emb2 = speaker_verifier.get_embeddings(audio2)
120
-
121
- embeddings_plot = speaker_verifier.plot_embeddings_comparison(emb1, emb2)
122
-
123
- #result_text = f"Probability: {probability:.2%}\nCosine similarity: {score}\nDecision: {decision}"
124
- result_text = f"Cosine similarity (threshold for the model=0.25): {score}\nDecision: {decision}"
125
-
126
- return result_text, embeddings_plot
127
 
128
- except Exception as e:
129
- return f"Error processing audio: {str(e)}", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  interface = gr.Interface(
132
  fn=process_audio,
 
47
  probability = max(0.0, min(1.0, probability))
48
  return probability
49
 
50
+ def calculate_confidence_metrics(self, score_value: float) -> dict:
51
+ """Calculate various confidence metrics."""
52
+ try:
53
+ # Distance from threshold
54
+ threshold_distance = abs(score_value - self.threshold)
55
+
56
+ # Normalized confidence score (0-1 scale)
57
+ normalized_confidence = (score_value + 1) / 2
58
+
59
+ # Certainty score based on distance from decision boundary
60
+ certainty = 1 - (1 / (1 + np.exp(5 * threshold_distance)))
61
+
62
+ # Decision strength (how far from ambiguous region)
63
+ ambiguous_region = 0.1
64
+ if abs(score_value - self.threshold) < ambiguous_region:
65
+ decision_strength = "Low"
66
+ elif abs(score_value - self.threshold) < ambiguous_region * 2:
67
+ decision_strength = "Medium"
68
+ else:
69
+ decision_strength = "High"
70
+
71
+ # Confidence level categories
72
+ if certainty < 0.6:
73
+ confidence_level = "Low"
74
+ elif certainty < 0.8:
75
+ confidence_level = "Medium"
76
+ else:
77
+ confidence_level = "High"
78
+
79
+ return {
80
+ "certainty_score": certainty,
81
+ "threshold_distance": threshold_distance,
82
+ "decision_strength": decision_strength,
83
+ "confidence_level": confidence_level
84
+ }
85
+ except Exception as e:
86
+ print(f"Error calculating confidence metrics: {str(e)}")
87
+ return {}
88
+
89
+ def verify_speaker(self, audio_path1: str, audio_path2: str) -> tuple:
90
  try:
91
  wav_path1 = self.convert_audio(audio_path1)
92
  wav_path2 = self.convert_audio(audio_path2)
 
102
  probability = self.score_to_probability(score_value)
103
  decision = "Same speaker" if prediction.item() else "Different speakers"
104
 
105
+ # Calculate confidence metrics
106
+ confidence_metrics = self.calculate_confidence_metrics(score_value)
107
+
108
+ return probability, decision, score_value, confidence_metrics
109
 
110
  except Exception as e:
111
  print(f"Error in speaker verification: {str(e)}")
112
+ return 0.0, f"Error: {str(e)}", 0.0, {}
113
 
114
  def get_embeddings(self, audio_path: str):
115
  wav_path = self.convert_audio(audio_path)
 
151
  def create_gradio_interface():
152
  speaker_verifier = SpeakerVerification()
153
 
154
+ def process_audio(audio1, audio2):
155
+ try:
156
+ if audio1 is None or audio2 is None:
157
+ return "Error: Please provide both audio samples", None
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ probability, decision, score, confidence_metrics = speaker_verifier.verify_speaker(audio1, audio2)
160
+ emb1 = speaker_verifier.get_embeddings(audio1)
161
+ emb2 = speaker_verifier.get_embeddings(audio2)
162
+
163
+ embeddings_plot = speaker_verifier.plot_embeddings_comparison(emb1, emb2)
164
+
165
+ result_text = (
166
+ f"Cosine similarity (threshold=0.25): {score:.3f}\n"
167
+ f"Decision: {decision}\n"
168
+ f"Certainty Score: {confidence_metrics['certainty_score']:.2f}\n"
169
+ f"Threshold Distance: {confidence_metrics['threshold_distance']:.3f}\n"
170
+ f"Decision Strength: {confidence_metrics['decision_strength']}\n"
171
+ f"Confidence Level: {confidence_metrics['confidence_level']}"
172
+ )
173
+
174
+ return result_text, embeddings_plot
175
+
176
+ except Exception as e:
177
+ return f"Error processing audio: {str(e)}", None
178
 
179
  interface = gr.Interface(
180
  fn=process_audio,