Pradheep1647 commited on
Commit
1be32a3
1 Parent(s): 909f75a

made some changes to app.py(oauth2)

Browse files
Files changed (1) hide show
  1. app.py +21 -2
app.py CHANGED
@@ -11,10 +11,15 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
11
  from transformers import BlipProcessor, BlipForConditionalGeneration
12
  import cv2
13
 
 
 
 
14
  def download_youtube_video(video_url, output_path):
15
  ydl_opts = {
16
  'format': 'bestvideo+bestaudio',
17
  'outtmpl': os.path.join(output_path, '%(title)s.%(ext)s'),
 
 
18
  }
19
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
20
  ydl.download([video_url])
@@ -92,21 +97,32 @@ emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model
92
  def analyze_video(video_url):
93
  global output_path
94
  output_path = './'
 
 
 
95
  video_path = download_youtube_video(video_url, output_path)
 
96
  mp4_path = convert_to_mp4(video_path, output_path)
 
97
  audio_path = extract_audio_from_video(mp4_path)
 
98
  audio_wav_path = convert_mp3_to_wav(audio_path)
 
99
  model_whisper = whisper.load_model("base")
100
 
101
  result_whisper = model_whisper.transcribe(audio_wav_path)
102
 
103
  transcript = result_whisper['text']
 
104
  emotion_dict_text, predicted_emotion_text = process_text(transcript)
105
 
106
  n_frame_interval = 60
107
  emotion_vectors_video = []
 
108
  video_capture = cv2.VideoCapture(mp4_path)
 
109
  total_frames_video = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
 
110
  frame_count_video = 0
111
 
112
  while video_capture.isOpened():
@@ -118,7 +134,7 @@ def analyze_video(video_url):
118
  if frame_count_video % n_frame_interval == 0:
119
  pixel_values_video = preprocess_frame(frame_video)
120
  caption_video = generate_caption(pixel_values_video)
121
- predicted_emotions_video, _ = predict_emotions(caption_video)
122
  emotion_vectors_video.append(np.array(list(predicted_emotions_video.values())))
123
 
124
  frame_count_video += 1
@@ -126,8 +142,11 @@ def analyze_video(video_url):
126
  video_capture.release()
127
 
128
  average_emotion_vector_video = np.mean(emotion_vectors_video, axis=0)
 
129
  combined_emotion_vector_final = np.concatenate((np.array(list(emotion_dict_text.values())), average_emotion_vector_video))
 
130
  final_most_predicted_index = np.argmax(combined_emotion_vector_final)
 
131
  final_most_predicted_emotion = list(emotion_dict_text.keys())[final_most_predicted_index]
132
 
133
  return transcript, predicted_emotion_text, final_most_predicted_emotion
@@ -139,4 +158,4 @@ iface = gr.Interface(fn=analyze_video,
139
  description="Enter a YouTube Video URL to analyze emotions from both audio and visual content.")
140
 
141
  if __name__ == "__main__":
142
- iface.launch()
 
11
  from transformers import BlipProcessor, BlipForConditionalGeneration
12
  import cv2
13
 
14
+ def authenticate_youtube():
15
+ os.system('yt-dlp --username oauth2 --password ""')
16
+
17
  def download_youtube_video(video_url, output_path):
18
  ydl_opts = {
19
  'format': 'bestvideo+bestaudio',
20
  'outtmpl': os.path.join(output_path, '%(title)s.%(ext)s'),
21
+ 'username': 'oauth2',
22
+ 'password': ''
23
  }
24
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
25
  ydl.download([video_url])
 
97
  def analyze_video(video_url):
98
  global output_path
99
  output_path = './'
100
+
101
+ authenticate_youtube()
102
+
103
  video_path = download_youtube_video(video_url, output_path)
104
+
105
  mp4_path = convert_to_mp4(video_path, output_path)
106
+
107
  audio_path = extract_audio_from_video(mp4_path)
108
+
109
  audio_wav_path = convert_mp3_to_wav(audio_path)
110
+
111
  model_whisper = whisper.load_model("base")
112
 
113
  result_whisper = model_whisper.transcribe(audio_wav_path)
114
 
115
  transcript = result_whisper['text']
116
+
117
  emotion_dict_text, predicted_emotion_text = process_text(transcript)
118
 
119
  n_frame_interval = 60
120
  emotion_vectors_video = []
121
+
122
  video_capture = cv2.VideoCapture(mp4_path)
123
+
124
  total_frames_video = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
125
+
126
  frame_count_video = 0
127
 
128
  while video_capture.isOpened():
 
134
  if frame_count_video % n_frame_interval == 0:
135
  pixel_values_video = preprocess_frame(frame_video)
136
  caption_video = generate_caption(pixel_values_video)
137
+ predicted_emotions_video = predict_emotions(caption_video)
138
  emotion_vectors_video.append(np.array(list(predicted_emotions_video.values())))
139
 
140
  frame_count_video += 1
 
142
  video_capture.release()
143
 
144
  average_emotion_vector_video = np.mean(emotion_vectors_video, axis=0)
145
+
146
  combined_emotion_vector_final = np.concatenate((np.array(list(emotion_dict_text.values())), average_emotion_vector_video))
147
+
148
  final_most_predicted_index = np.argmax(combined_emotion_vector_final)
149
+
150
  final_most_predicted_emotion = list(emotion_dict_text.keys())[final_most_predicted_index]
151
 
152
  return transcript, predicted_emotion_text, final_most_predicted_emotion
 
158
  description="Enter a YouTube Video URL to analyze emotions from both audio and visual content.")
159
 
160
  if __name__ == "__main__":
161
+ iface.launch()