Antonio commited on
Commit
adad62e
·
1 Parent(s): 374f948
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -25,6 +25,8 @@ def get_emotion_from_filename(filename):
25
 
26
  def separate_video_audio(file_path):
27
  output_dir = './temp/'
 
 
28
  video_path = os.path.join(output_dir, os.path.basename(file_path).replace('.mp4', '_video.mp4'))
29
  audio_path = os.path.join(output_dir, os.path.basename(file_path).replace('.mp4', '_audio.wav'))
30
 
@@ -81,7 +83,7 @@ def video_label_to_emotion(label):
81
  def predict_video(file_path, video_model, image_processor):
82
  video = process_video(file_path)
83
  inputs = image_processor(list(video), return_tensors="pt")
84
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
85
  inputs = inputs.to(device)
86
 
87
  with torch.no_grad():
@@ -100,7 +102,7 @@ def audio_label_to_emotion(label):
100
  def preprocess_and_predict_audio(file_path, model, processor):
101
  audio_array, _ = librosa.load(file_path, sr=16000)
102
  inputs = processor(audio_array, sampling_rate=16000, return_tensors="pt", padding=True, max_length=75275)
103
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
104
  model = model.to(device)
105
  inputs = {k: v.to(device) for k, v in inputs.items()}
106
 
@@ -182,13 +184,13 @@ decision_frameworks = {
182
  def predict(video_file, video_model_name, audio_model_name, framework_name):
183
 
184
  image_processor = VivitImageProcessor.from_pretrained("google/vivit-b-16x2-kinetics400")
185
- video_model = torch.load('./' + video_model_name)
186
 
187
  model_id = "facebook/wav2vec2-large"
188
  config = AutoConfig.from_pretrained(model_id, num_labels=6)
189
  audio_processor = AutoFeatureExtractor.from_pretrained(model_id)
190
  audio_model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id, config=config)
191
- audio_model.load_state_dict(torch.load('./' + audio_model_name))
192
  audio_model.eval()
193
 
194
  delete_directory_path = "./temp/"
 
25
 
26
  def separate_video_audio(file_path):
27
  output_dir = './temp/'
28
+ if not os.path.exists(output_dir):
29
+ os.makedirs(output_dir)
30
  video_path = os.path.join(output_dir, os.path.basename(file_path).replace('.mp4', '_video.mp4'))
31
  audio_path = os.path.join(output_dir, os.path.basename(file_path).replace('.mp4', '_audio.wav'))
32
 
 
83
  def predict_video(file_path, video_model, image_processor):
84
  video = process_video(file_path)
85
  inputs = image_processor(list(video), return_tensors="pt")
86
+ device = torch.device("cpu")
87
  inputs = inputs.to(device)
88
 
89
  with torch.no_grad():
 
102
  def preprocess_and_predict_audio(file_path, model, processor):
103
  audio_array, _ = librosa.load(file_path, sr=16000)
104
  inputs = processor(audio_array, sampling_rate=16000, return_tensors="pt", padding=True, max_length=75275)
105
+ device = torch.device("cpu")
106
  model = model.to(device)
107
  inputs = {k: v.to(device) for k, v in inputs.items()}
108
 
 
184
  def predict(video_file, video_model_name, audio_model_name, framework_name):
185
 
186
  image_processor = VivitImageProcessor.from_pretrained("google/vivit-b-16x2-kinetics400")
187
+ video_model = torch.load('./' + video_model_name, map_location=torch.device('cpu'))
188
 
189
  model_id = "facebook/wav2vec2-large"
190
  config = AutoConfig.from_pretrained(model_id, num_labels=6)
191
  audio_processor = AutoFeatureExtractor.from_pretrained(model_id)
192
  audio_model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id, config=config)
193
+ audio_model.load_state_dict(torch.load('./' + audio_model_name, map_location=torch.device('cpu')))
194
  audio_model.eval()
195
 
196
  delete_directory_path = "./temp/"