Pradheep1647 commited on
Commit
c3c9064
·
1 Parent(s): 1be32a3

access browser cookies instead of prompting the user

Browse files
Files changed (2) hide show
  1. app.py +43 -34
  2. requirements.txt +2 -1
app.py CHANGED
@@ -10,32 +10,42 @@ from transformers import AutoModelForAudioClassification, AutoFeatureExtractor
10
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
11
  from transformers import BlipProcessor, BlipForConditionalGeneration
12
  import cv2
13
-
14
- def authenticate_youtube():
15
- os.system('yt-dlp --username oauth2 --password ""')
16
-
17
- def download_youtube_video(video_url, output_path):
 
 
 
 
 
 
 
 
 
 
 
18
  ydl_opts = {
 
19
  'format': 'bestvideo+bestaudio',
20
- 'outtmpl': os.path.join(output_path, '%(title)s.%(ext)s'),
21
- 'username': 'oauth2',
22
- 'password': ''
23
  }
24
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
25
  ydl.download([video_url])
26
  video_info = ydl.extract_info(video_url, download=False)
27
  video_title = video_info.get('title', 'video')
28
- return os.path.join(output_path, f"{video_title}.webm")
29
 
30
- def convert_to_mp4(input_path, output_path):
31
- output_file = os.path.join(output_path, 'video.mp4')
32
  command = ['ffmpeg', '-i', input_path, '-c', 'copy', output_file]
33
  subprocess.run(command, check=True)
34
  return output_file
35
 
36
  def extract_audio_from_video(video_path):
37
  video_clip = VideoFileClip(video_path)
38
- audio_output = os.path.join(output_path, 'audio.mp3')
39
  audio_clip = video_clip.audio
40
  audio_clip.write_audiofile(audio_output)
41
  return audio_output
@@ -43,7 +53,7 @@ def extract_audio_from_video(video_path):
43
  def convert_mp3_to_wav(mp3_path):
44
  from pydub import AudioSegment
45
  audio = AudioSegment.from_mp3(mp3_path)
46
- wav_output = os.path.join(output_path, 'audio.wav')
47
  audio.export(wav_output, format="wav")
48
  return wav_output
49
 
@@ -94,35 +104,33 @@ emotion_model_name = "j-hartmann/emotion-english-distilroberta-base"
94
  emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
95
  emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)
96
 
97
- def analyze_video(video_url):
98
  global output_path
99
  output_path = './'
100
 
101
- authenticate_youtube()
102
-
103
- video_path = download_youtube_video(video_url, output_path)
104
-
105
- mp4_path = convert_to_mp4(video_path, output_path)
106
-
107
  audio_path = extract_audio_from_video(mp4_path)
108
-
109
  audio_wav_path = convert_mp3_to_wav(audio_path)
110
 
111
  model_whisper = whisper.load_model("base")
112
-
113
  result_whisper = model_whisper.transcribe(audio_wav_path)
114
-
115
  transcript = result_whisper['text']
116
-
117
  emotion_dict_text, predicted_emotion_text = process_text(transcript)
118
-
119
  n_frame_interval = 60
120
  emotion_vectors_video = []
121
-
122
  video_capture = cv2.VideoCapture(mp4_path)
123
-
124
  total_frames_video = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
125
-
126
  frame_count_video = 0
127
 
128
  while video_capture.isOpened():
@@ -134,7 +142,7 @@ def analyze_video(video_url):
134
  if frame_count_video % n_frame_interval == 0:
135
  pixel_values_video = preprocess_frame(frame_video)
136
  caption_video = generate_caption(pixel_values_video)
137
- predicted_emotions_video = predict_emotions(caption_video)
138
  emotion_vectors_video.append(np.array(list(predicted_emotions_video.values())))
139
 
140
  frame_count_video += 1
@@ -152,10 +160,11 @@ def analyze_video(video_url):
152
  return transcript, predicted_emotion_text, final_most_predicted_emotion
153
 
154
  iface = gr.Interface(fn=analyze_video,
155
- inputs=gr.Textbox(label="YouTube Video URL"),
156
- outputs=["text", "text", "text"],
157
- title="Multimodal Emotion Recognition",
158
- description="Enter a YouTube Video URL to analyze emotions from both audio and visual content.")
 
159
 
160
  if __name__ == "__main__":
161
- iface.launch()
 
10
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
11
  from transformers import BlipProcessor, BlipForConditionalGeneration
12
  import cv2
13
+ import browser_cookie3
14
+
15
+ def get_youtube_cookies(browser):
16
+ if browser == 'Chrome':
17
+ return browser_cookie3.chrome()
18
+ elif browser == 'Firefox':
19
+ return browser_cookie3.firefox()
20
+ elif browser == 'Edge':
21
+ return browser_cookie3.edge()
22
+ elif browser == 'Brave':
23
+ return browser_cookie3.brave()
24
+ else:
25
+ raise ValueError("Unsupported browser")
26
+
27
+ def download_youtube_video(video_url, browser):
28
+ cookies = get_youtube_cookies(browser)
29
  ydl_opts = {
30
+ 'cookiefile': cookies,
31
  'format': 'bestvideo+bestaudio',
32
+ 'outtmpl': os.path.join('./', '%(title)s.%(ext)s'),
 
 
33
  }
34
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
35
  ydl.download([video_url])
36
  video_info = ydl.extract_info(video_url, download=False)
37
  video_title = video_info.get('title', 'video')
38
+ return os.path.join('./', f"{video_title}.webm")
39
 
40
+ def convert_to_mp4(input_path):
41
+ output_file = os.path.join('./', 'video.mp4')
42
  command = ['ffmpeg', '-i', input_path, '-c', 'copy', output_file]
43
  subprocess.run(command, check=True)
44
  return output_file
45
 
46
  def extract_audio_from_video(video_path):
47
  video_clip = VideoFileClip(video_path)
48
+ audio_output = os.path.join('./', 'audio.mp3')
49
  audio_clip = video_clip.audio
50
  audio_clip.write_audiofile(audio_output)
51
  return audio_output
 
53
  def convert_mp3_to_wav(mp3_path):
54
  from pydub import AudioSegment
55
  audio = AudioSegment.from_mp3(mp3_path)
56
+ wav_output = os.path.join('./', 'audio.wav')
57
  audio.export(wav_output, format="wav")
58
  return wav_output
59
 
 
104
  emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
105
  emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)
106
 
107
+ def analyze_video(video_url, browser):
108
  global output_path
109
  output_path = './'
110
 
111
+ video_path = download_youtube_video(video_url, browser)
112
+
113
+ mp4_path = convert_to_mp4(video_path)
114
+
 
 
115
  audio_path = extract_audio_from_video(mp4_path)
116
+
117
  audio_wav_path = convert_mp3_to_wav(audio_path)
118
 
119
  model_whisper = whisper.load_model("base")
120
+
121
  result_whisper = model_whisper.transcribe(audio_wav_path)
122
+
123
  transcript = result_whisper['text']
124
+
125
  emotion_dict_text, predicted_emotion_text = process_text(transcript)
126
+
127
  n_frame_interval = 60
128
  emotion_vectors_video = []
129
+
130
  video_capture = cv2.VideoCapture(mp4_path)
131
+
132
  total_frames_video = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
133
+
134
  frame_count_video = 0
135
 
136
  while video_capture.isOpened():
 
142
  if frame_count_video % n_frame_interval == 0:
143
  pixel_values_video = preprocess_frame(frame_video)
144
  caption_video = generate_caption(pixel_values_video)
145
+ predicted_emotions_video, _ = predict_emotions(caption_video)
146
  emotion_vectors_video.append(np.array(list(predicted_emotions_video.values())))
147
 
148
  frame_count_video += 1
 
160
  return transcript, predicted_emotion_text, final_most_predicted_emotion
161
 
162
  iface = gr.Interface(fn=analyze_video,
163
+ inputs=[gr.Textbox(label="YouTube Video URL"),
164
+ gr.Dropdown(label="Select Browser", choices=["Chrome", "Firefox", "Edge", "Brave"])],
165
+ outputs=["text", "text", "text"],
166
+ title="Multimodal Emotion Recognition",
167
+ description="Enter a YouTube Video URL and select your browser to analyze emotions from both audio and visual content.")
168
 
169
  if __name__ == "__main__":
170
+ iface.launch()
requirements.txt CHANGED
@@ -8,4 +8,5 @@ moviepy
8
  openai-whisper
9
  yt-dlp
10
  torch
11
- opencv-python
 
 
8
  openai-whisper
9
  yt-dlp
10
  torch
11
+ opencv-python
12
+ browser-cookie3