Antonio
commited on
Commit
·
adad62e
1
Parent(s):
374f948
Change
Browse files
app.py
CHANGED
@@ -25,6 +25,8 @@ def get_emotion_from_filename(filename):
|
|
25 |
|
26 |
def separate_video_audio(file_path):
|
27 |
output_dir = './temp/'
|
|
|
|
|
28 |
video_path = os.path.join(output_dir, os.path.basename(file_path).replace('.mp4', '_video.mp4'))
|
29 |
audio_path = os.path.join(output_dir, os.path.basename(file_path).replace('.mp4', '_audio.wav'))
|
30 |
|
@@ -81,7 +83,7 @@ def video_label_to_emotion(label):
|
|
81 |
def predict_video(file_path, video_model, image_processor):
|
82 |
video = process_video(file_path)
|
83 |
inputs = image_processor(list(video), return_tensors="pt")
|
84 |
-
device = torch.device("
|
85 |
inputs = inputs.to(device)
|
86 |
|
87 |
with torch.no_grad():
|
@@ -100,7 +102,7 @@ def audio_label_to_emotion(label):
|
|
100 |
def preprocess_and_predict_audio(file_path, model, processor):
|
101 |
audio_array, _ = librosa.load(file_path, sr=16000)
|
102 |
inputs = processor(audio_array, sampling_rate=16000, return_tensors="pt", padding=True, max_length=75275)
|
103 |
-
device = torch.device("
|
104 |
model = model.to(device)
|
105 |
inputs = {k: v.to(device) for k, v in inputs.items()}
|
106 |
|
@@ -182,13 +184,13 @@ decision_frameworks = {
|
|
182 |
def predict(video_file, video_model_name, audio_model_name, framework_name):
|
183 |
|
184 |
image_processor = VivitImageProcessor.from_pretrained("google/vivit-b-16x2-kinetics400")
|
185 |
-
video_model = torch.load('./' + video_model_name)
|
186 |
|
187 |
model_id = "facebook/wav2vec2-large"
|
188 |
config = AutoConfig.from_pretrained(model_id, num_labels=6)
|
189 |
audio_processor = AutoFeatureExtractor.from_pretrained(model_id)
|
190 |
audio_model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id, config=config)
|
191 |
-
audio_model.load_state_dict(torch.load('./' + audio_model_name))
|
192 |
audio_model.eval()
|
193 |
|
194 |
delete_directory_path = "./temp/"
|
|
|
25 |
|
26 |
def separate_video_audio(file_path):
|
27 |
output_dir = './temp/'
|
28 |
+
if not os.path.exists(output_dir):
|
29 |
+
os.makedirs(output_dir)
|
30 |
video_path = os.path.join(output_dir, os.path.basename(file_path).replace('.mp4', '_video.mp4'))
|
31 |
audio_path = os.path.join(output_dir, os.path.basename(file_path).replace('.mp4', '_audio.wav'))
|
32 |
|
|
|
83 |
def predict_video(file_path, video_model, image_processor):
|
84 |
video = process_video(file_path)
|
85 |
inputs = image_processor(list(video), return_tensors="pt")
|
86 |
+
device = torch.device("cpu")
|
87 |
inputs = inputs.to(device)
|
88 |
|
89 |
with torch.no_grad():
|
|
|
102 |
def preprocess_and_predict_audio(file_path, model, processor):
|
103 |
audio_array, _ = librosa.load(file_path, sr=16000)
|
104 |
inputs = processor(audio_array, sampling_rate=16000, return_tensors="pt", padding=True, max_length=75275)
|
105 |
+
device = torch.device("cpu")
|
106 |
model = model.to(device)
|
107 |
inputs = {k: v.to(device) for k, v in inputs.items()}
|
108 |
|
|
|
184 |
def predict(video_file, video_model_name, audio_model_name, framework_name):
|
185 |
|
186 |
image_processor = VivitImageProcessor.from_pretrained("google/vivit-b-16x2-kinetics400")
|
187 |
+
video_model = torch.load('./' + video_model_name, map_location=torch.device('cpu'))
|
188 |
|
189 |
model_id = "facebook/wav2vec2-large"
|
190 |
config = AutoConfig.from_pretrained(model_id, num_labels=6)
|
191 |
audio_processor = AutoFeatureExtractor.from_pretrained(model_id)
|
192 |
audio_model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id, config=config)
|
193 |
+
audio_model.load_state_dict(torch.load('./' + audio_model_name, map_location=torch.device('cpu')))
|
194 |
audio_model.eval()
|
195 |
|
196 |
delete_directory_path = "./temp/"
|