Dani commited on
Commit
fb05f67
·
1 Parent(s): 93fc4c5

transcription

Browse files
Files changed (2) hide show
  1. app.py +28 -20
  2. requirements.txt +4 -1
app.py CHANGED
@@ -21,34 +21,42 @@ from transformers import pipeline
21
  from moviepy.editor import VideoFileClip
22
  import tempfile
23
  import os
 
 
 
 
 
 
 
 
24
 
25
  pipe = pipeline(model="dacavi/whisper-small-hi")
26
 
27
  def transcribe_video(video_url):
28
  # Download video and extract audio
29
- with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video:
30
- os.system(f"youtube-dl -o {temp_video.name} {video_url}")
31
- video_clip = VideoFileClip(temp_video.name)
32
- audio_clip = video_clip.audio
33
- temp_audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
34
- audio_clip.write_audiofile(temp_audio_path, codec="wav")
35
 
36
  # Transcribe audio
37
- text = pipe(temp_audio_path)["text"]
38
 
39
  # Clean up temporary files
40
- os.remove(temp_video.name)
41
- os.remove(temp_audio_path)
42
-
43
- return text
44
 
45
- iface = gr.Interface(
46
- fn=transcribe_video,
47
- inputs="text",
48
- outputs="text",
49
- live=True,
50
- title="Video Transcription",
51
- description="Paste the URL of a video to transcribe the spoken content.",
52
- )
53
 
54
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
21
  from moviepy.editor import VideoFileClip
22
  import tempfile
23
  import os
24
+ from pydub import AudioSegment
25
+ from huggingface_hub import login
26
+
27
+ with open("../../token.txt", "r") as file:
28
+ token = file.readline().strip()
29
+
30
+
31
+ login(token=token, add_to_git_credential=True)
32
 
33
  pipe = pipeline(model="dacavi/whisper-small-hi")
34
 
35
  def transcribe_video(video_url):
36
  # Download video and extract audio
37
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
38
+ # os.system(f"yt-dlp -o {temp_audio.name} -x --audio-format wav {video_url}")
39
+ os.system(f"yt-dlp -o audioSample.wav -x --audio-format wav {video_url}")
40
+
41
+ print("Downloaded audio:", temp_audio.name)
42
+
43
 
44
  # Transcribe audio
45
+ text = pipe("audioSample.wav")["text"]
46
 
47
  # Clean up temporary files
48
+ os.remove("audioSample.wav")
 
 
 
49
 
 
 
 
 
 
 
 
 
50
 
51
+ return text
52
+ print(transcribe_video("https://www.youtube.com/watch?v=8FkLRUJj-o0"))
53
+ # iface = gr.Interface(
54
+ # fn=transcribe_video,
55
+ # inputs="text",
56
+ # outputs="text",
57
+ # live=True,
58
+ # title="Video Transcription",
59
+ # description="Paste the URL of a video to transcribe the spoken content.",
60
+ # )
61
+ #
62
+ # iface.launch()
requirements.txt CHANGED
@@ -2,4 +2,7 @@ transformers
2
  torch
3
  tensorflow
4
  moviepy==1.0.3
5
- ffmpeg
 
 
 
 
2
  torch
3
  tensorflow
4
  moviepy==1.0.3
5
+ ffmpeg
6
+ ffprobe
7
+ yt-dlp
8
+ pydub