DavidCombei commited on
Commit
d2fc68f
·
verified ·
1 Parent(s): 0436843

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -31
app.py CHANGED
@@ -2,23 +2,19 @@ import joblib
2
  from transformers import AutoFeatureExtractor, Wav2Vec2Model
3
  import torch
4
  import librosa
5
- import numpy as np
6
- from sklearn.linear_model import LogisticRegression
7
- import gradio as gr
8
- import os
9
- import torch.nn.functional as F
10
  from scipy.special import expit
11
  import json
12
-
13
 
14
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
15
 
 
 
16
  class CustomWav2Vec2Model(Wav2Vec2Model):
17
  def __init__(self, config):
18
  super().__init__(config)
19
  self.encoder.layers = self.encoder.layers[:9]
20
 
21
- truncated_model = CustomWav2Vec2Model.from_pretrained("facebook/wav2vec2-xls-r-2b")
22
 
23
  class HuggingFaceFeatureExtractor:
24
  def __init__(self, model, feature_extractor_name):
@@ -38,10 +34,14 @@ class HuggingFaceFeatureExtractor:
38
  inputs = {k: v.to(self.device) for k, v in inputs.items()}
39
  with torch.no_grad():
40
  outputs = self.model(**inputs, output_hidden_states=True)
41
- return outputs.hidden_states[9]
42
 
 
 
43
  FEATURE_EXTRACTOR = HuggingFaceFeatureExtractor(truncated_model, "facebook/wav2vec2-xls-r-2b")
44
- classifier,scaler, thresh = joblib.load('logreg_margin_pruning_ALL_with_scaler+threshold.joblib')
 
 
45
 
46
  def segment_audio(audio, sr, segment_duration):
47
  segment_samples = int(segment_duration * sr)
@@ -60,37 +60,33 @@ def process_audio(input_data, segment_duration=10):
60
  audio = audio[0]
61
  segments = segment_audio(audio, sr, segment_duration)
62
  segment_predictions = []
63
- confidence_scores = []
64
- eer_threshold = thresh - 5e-3 # small margin of error
65
-
 
 
 
66
  for idx, segment in enumerate(segments):
67
  features = FEATURE_EXTRACTOR(segment, sr)
68
- features_avg = torch.mean(features, dim=1).cpu().numpy().reshape(1, -1)
 
69
  decision_score = classifier.decision_function(features_avg)
70
  decision_score_scaled = scaler.transform(decision_score.reshape(-1, 1)).flatten()
71
  decision_value = decision_score_scaled[0]
72
  pred = 1 if decision_value >= eer_threshold else 0
73
-
74
- if pred == 1:
75
- confidence_percentage = expit(decision_score).item()
76
- else:
77
  confidence_percentage = 1 - expit(decision_score).item()
78
-
 
 
 
 
 
79
  segment_predictions.append(pred)
80
- confidence_scores.append(confidence_percentage)
81
-
82
  output_dict = {
83
- "label": "real" if sum(segment_predictions) > (len(segment_predictions) / 2) else "fake",
84
- "segments": [
85
- {
86
- "segment": idx + 1,
87
- "prediction": "real" if pred == 1 else "fake",
88
- "confidence": round(conf * 100, 2)
89
- }
90
- for idx, (pred, conf) in enumerate(zip(segment_predictions, confidence_scores))
91
- ]
92
- }
93
-
94
  json_output = json.dumps(output_dict, indent=4)
95
  print(json_output)
96
  return json_output
 
2
  from transformers import AutoFeatureExtractor, Wav2Vec2Model
3
  import torch
4
  import librosa
 
 
 
 
 
5
  from scipy.special import expit
6
  import json
7
+ import os
8
 
9
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
10
 
11
+
12
+
13
  class CustomWav2Vec2Model(Wav2Vec2Model):
14
  def __init__(self, config):
15
  super().__init__(config)
16
  self.encoder.layers = self.encoder.layers[:9]
17
 
 
18
 
19
  class HuggingFaceFeatureExtractor:
20
  def __init__(self, model, feature_extractor_name):
 
34
  inputs = {k: v.to(self.device) for k, v in inputs.items()}
35
  with torch.no_grad():
36
  outputs = self.model(**inputs, output_hidden_states=True)
37
+ return outputs.hidden_states[9]
38
 
39
+
40
+ truncated_model = CustomWav2Vec2Model.from_pretrained(r"C:\Users\david\PycharmProjects\David2\model\wav2vec2-xls-r-2b_truncated")
41
  FEATURE_EXTRACTOR = HuggingFaceFeatureExtractor(truncated_model, "facebook/wav2vec2-xls-r-2b")
42
+ classifier, scaler, thresh = joblib.load(r'C:\Users\david\PycharmProjects\David2\model\logreg_margin_pruning_ALL_with_scaler_threshold.joblib')
43
+
44
+
45
 
46
  def segment_audio(audio, sr, segment_duration):
47
  segment_samples = int(segment_duration * sr)
 
60
  audio = audio[0]
61
  segments = segment_audio(audio, sr, segment_duration)
62
  segment_predictions = []
63
+ confidence_scores_fake_sum = 0
64
+ fake_segments = 0
65
+ confidence_scores_real_sum = 0
66
+ real_segments = 0
67
+ eer_threshold = thresh - 5e-3 # small margin error due to feature extractor space differences
68
+ #print(eer_threshold)
69
  for idx, segment in enumerate(segments):
70
  features = FEATURE_EXTRACTOR(segment, sr)
71
+ features_avg = torch.mean(features, dim=1).cpu().numpy()
72
+ features_avg = features_avg.reshape(1, -1)
73
  decision_score = classifier.decision_function(features_avg)
74
  decision_score_scaled = scaler.transform(decision_score.reshape(-1, 1)).flatten()
75
  decision_value = decision_score_scaled[0]
76
  pred = 1 if decision_value >= eer_threshold else 0
77
+ if pred == 0:
 
 
 
78
  confidence_percentage = 1 - expit(decision_score).item()
79
+ confidence_scores_fake_sum +=confidence_percentage
80
+ fake_segments +=1
81
+ else:
82
+ confidence_percentage = expit(decision_score).item()
83
+ confidence_scores_real_sum +=confidence_percentage
84
+ real_segments +=1
85
  segment_predictions.append(pred)
 
 
86
  output_dict = {
87
+ "label": "real" if sum(segment_predictions) > (len(segment_predictions) / 2) else "fake",
88
+ "confidence score:": f'{confidence_scores_real_sum/real_segments:.2f}' if sum(segment_predictions) > (len(segment_predictions) / 2) else f'{confidence_scores_fake_sum/fake_segments:.2f}'
89
+ }
 
 
 
 
 
 
 
 
90
  json_output = json.dumps(output_dict, indent=4)
91
  print(json_output)
92
  return json_output