Commit
·
02b8bbc
1
Parent(s):
76f91a0
Upload 6 files
Browse files- LICENSE +21 -0
- README.md +28 -13
- app.py +33 -0
- helpers.py +171 -0
- requirements.txt +4 -0
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 Otman Heddouch
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
CHANGED
@@ -1,13 +1,28 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Edit Video like Sheet
|
2 |
+
[](https://huggingface.co/spaces/otmanheddouch/edit-video-like-sheet/)
|
3 |
+
|
4 |
+
With the help of [WhisperModel](https://huggingface.co/docs/transformers/model_doc/whisper) we build gradio demo app that can edit video like editing you sheet file. You can easly edit the generated script from uploaded video and then remove unwanted word like cursing or any other words, it will generate edited version of the video that you can download .
|
5 |
+
|
6 |
+
To view demo check [Hugging face](https://huggingface.co/spaces/otmanheddouch/edit-video-like-sheet)
|
7 |
+
|
8 |
+
|
9 |
+
## How it works?
|
10 |
+
In order to install and run the app you will need to excute the following cammand:
|
11 |
+
Clone the code
|
12 |
+
|
13 |
+
`git clone https://github.com/otman-ai/edit-video-like-sheet.git`
|
14 |
+
|
15 |
+
`pip install -r requirements.txt`
|
16 |
+
|
17 |
+
`python app.py`
|
18 |
+
|
19 |
+
Link will be displayed in logs enter it in your browser and that is it !
|
20 |
+
if you encounte issues just open issue and I will get back to you
|
21 |
+
|
22 |
+
## Have questions ?
|
23 |
+
If you have any questions or suggestion, reach out to me .
|
24 |
+
|
25 |
+
|
26 |
+
## License
|
27 |
+
|
28 |
+
This repo is under [MIT](https://github.com/otman-ai/edit-video-like-sheet/blob/main/LICENSE) License .
|
app.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from helpers import *
|
3 |
+
|
4 |
+
with gr.Blocks() as demo:
|
5 |
+
|
6 |
+
with gr.Row():
|
7 |
+
heading = gr.HTML("<h2> Edit your videos like SpreedSheet using <a href='https://huggingface.co/docs/transformers/model_doc/whisper'> WhisperModel</a></h2>")
|
8 |
+
|
9 |
+
|
10 |
+
with gr.Row():
|
11 |
+
p = gr.Markdown('Please upload video first then click `Transcribe` to get the full script of the video, after you can easly edit Script field by removing the unwanted words.')
|
12 |
+
with gr.Row():
|
13 |
+
p = gr.Markdown('Make sure you click on `Cut` to generate your edited video.')
|
14 |
+
|
15 |
+
|
16 |
+
with gr.Row():
|
17 |
+
video_file = gr.Video(label="Upload Video")
|
18 |
+
script = gr.Textbox(label='Script')
|
19 |
+
results = gr.Video(label='Result')
|
20 |
+
|
21 |
+
with gr.Row():
|
22 |
+
transcribe = gr.Button('Transcribe')
|
23 |
+
cut_button = gr.Button('Cut')
|
24 |
+
|
25 |
+
|
26 |
+
if video_file:
|
27 |
+
transcribe.click(process_video, inputs=[video_file], outputs=[script])
|
28 |
+
if script:
|
29 |
+
|
30 |
+
cut_button.click(edit_video, inputs=[script,video_file], outputs=[results])
|
31 |
+
with gr.Row():
|
32 |
+
gr.Markdown('Made by **Otman Heddouch**')
|
33 |
+
demo.launch(share=True, debug=True)
|
helpers.py
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from faster_whisper import WhisperModel # use WhisperModel for transcribe
|
2 |
+
import moviepy.editor as mp # moviepy for editing the video
|
3 |
+
import re
|
4 |
+
from moviepy.video.io.VideoFileClip import VideoFileClip
|
5 |
+
from moviepy.video.compositing.concatenate import concatenate_videoclips
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
## Helper functions
|
10 |
+
|
11 |
+
def load_model(model_size="medium"):
|
12 |
+
"""
|
13 |
+
Load the model
|
14 |
+
"""
|
15 |
+
model = WhisperModel(model_size)
|
16 |
+
return model
|
17 |
+
|
18 |
+
def transribe(video_path, model, audio_path = 'audio.wav'):
|
19 |
+
"""
|
20 |
+
Transcribe the video into mapped segments.
|
21 |
+
|
22 |
+
Parametres:
|
23 |
+
------
|
24 |
+
video_path : the path of the video to be transcribe
|
25 |
+
model : the model we will use to extract the script from the video
|
26 |
+
audio_path : path of the audio to be exported to
|
27 |
+
|
28 |
+
"""
|
29 |
+
# Load the video
|
30 |
+
video = mp.VideoFileClip(video_path)
|
31 |
+
|
32 |
+
# Extract the audio from the video
|
33 |
+
audio_file = video.audio
|
34 |
+
audio_file.write_audiofile(audio_path)
|
35 |
+
|
36 |
+
# Run the transcription
|
37 |
+
segments, info = model.transcribe(audio_path, word_timestamps=True)
|
38 |
+
segments = list(segments) # The transcription will actually run here.
|
39 |
+
return segments
|
40 |
+
|
41 |
+
def mapping_segments(segments):
|
42 |
+
"""
|
43 |
+
Mapped the subtitles, each word with it correspond start and end time
|
44 |
+
|
45 |
+
Parametres:
|
46 |
+
----
|
47 |
+
segments: the segements results from runing the model
|
48 |
+
|
49 |
+
return dictionairy of each word with it's own start and end time as well as the entire script in single string.
|
50 |
+
"""
|
51 |
+
# Empty dictionairy to store the subtitles with it's own start and end time
|
52 |
+
subtitles_word = {}
|
53 |
+
# list of all the words
|
54 |
+
transcript = []
|
55 |
+
# looping for every segments
|
56 |
+
for segment in segments:
|
57 |
+
for word in segment.words:
|
58 |
+
# clean the word from any space or punctuation.
|
59 |
+
text_without_punctuation = re.sub(r'[^\w\s]', '', word.word.strip())
|
60 |
+
# Store the cleaned word in dic
|
61 |
+
subtitles_word[f"{word.start}-{word.end}"] = text_without_punctuation
|
62 |
+
# as well as in list
|
63 |
+
transcript.append(text_without_punctuation)
|
64 |
+
|
65 |
+
return subtitles_word, transcript
|
66 |
+
|
67 |
+
|
68 |
+
def find_time_range_cutted(subtitles_word, edited_script_list_word):
|
69 |
+
|
70 |
+
"""
|
71 |
+
Return the time range that correspond to cutted word
|
72 |
+
|
73 |
+
Parametres
|
74 |
+
----
|
75 |
+
subtitles_word : mapped words with their own time(start and end)
|
76 |
+
edited_script_list_word : list of words with no punctuation and space comming from user submition.
|
77 |
+
|
78 |
+
"""
|
79 |
+
# assign 0 to tracked_index which track the word index in original script with index in new edited script
|
80 |
+
tracked_index = 0
|
81 |
+
# empty list to store the time range to cut
|
82 |
+
time_range_to_cut = []
|
83 |
+
# loop through all the original word
|
84 |
+
for i, (range_, sub) in enumerate(subtitles_word.items()):
|
85 |
+
# get the correspond word of the new script
|
86 |
+
compared_value = edited_script_list_word[tracked_index]
|
87 |
+
print(f"Comparing '{compared_value}' of index {tracked_index} with '{sub}' of index {i}")
|
88 |
+
# if the index of old script is equal to new script then it hasnt cutted move to the next word.
|
89 |
+
if sub == compared_value:
|
90 |
+
tracked_index += 1
|
91 |
+
# otherwise add its range as it removed by th user and assign tracked_index same number as it is
|
92 |
+
# This is will not shift the index of the new script until we found its own range from the old one
|
93 |
+
else :
|
94 |
+
time_range_to_cut.append(range_)
|
95 |
+
tracked_index += 0
|
96 |
+
|
97 |
+
return time_range_to_cut
|
98 |
+
|
99 |
+
|
100 |
+
|
101 |
+
|
102 |
+
|
103 |
+
def process_video(video_file):
|
104 |
+
"""
|
105 |
+
Process video and return text to be edited
|
106 |
+
"""
|
107 |
+
print(video_file)
|
108 |
+
print("Transribe.....")
|
109 |
+
segments = transribe(video_file, model)
|
110 |
+
print('Mapping the segments....')
|
111 |
+
subtitles_word, list_words = mapping_segments(segments)
|
112 |
+
# Plain string to be edited as sheet
|
113 |
+
text_to_edited = ' '.join(list_words)
|
114 |
+
return text_to_edited
|
115 |
+
|
116 |
+
|
117 |
+
def cut_video(input_video, output_video, cut_ranges):
|
118 |
+
cut_ranges_cleaned = cut_ranges.copy()
|
119 |
+
# cut_ranges_cleaned = [(i.split('-')[0], i.split('-')[1]) for i in cut_ranges_cleaned]
|
120 |
+
print(cut_ranges_cleaned)
|
121 |
+
# Load the video clip
|
122 |
+
video_clip = VideoFileClip(input_video)
|
123 |
+
# Cut and concatenate the specified ranges
|
124 |
+
cut_clips = [video_clip.subclip(start, end) for start, end in cut_ranges_cleaned]
|
125 |
+
final_clip = concatenate_videoclips(cut_clips)
|
126 |
+
# Write the result to a new video file
|
127 |
+
final_clip.write_videofile(output_video, codec="libx264", audio_codec="aac")
|
128 |
+
|
129 |
+
def edit_video(script, video_file):
|
130 |
+
segments = transribe(video_file, model)
|
131 |
+
subtitles_word_text, list_words = mapping_segments(segments)
|
132 |
+
print("subtiles word mapped: ", subtitles_word_text)
|
133 |
+
# Plain string to be edited as sheet
|
134 |
+
file_content = re.sub(r'[^\w\s]', '', script)
|
135 |
+
# after text has been edited transform it to list of words
|
136 |
+
edited_script_list_word = [ i for i in file_content.split(' ') if i != '']
|
137 |
+
time_range_to_cut = find_time_range_cutted(subtitles_word_text, edited_script_list_word)
|
138 |
+
# sort and transform it to list and sorted range
|
139 |
+
sorted_range = []
|
140 |
+
time_range_to_cut_cleaned = [(i.split('-')[0], i.split('-')[1]) for i in time_range_to_cut]
|
141 |
+
print("Cleaned range ", time_range_to_cut_cleaned)
|
142 |
+
for range_time in time_range_to_cut_cleaned:
|
143 |
+
for r in range_time:
|
144 |
+
sorted_range.append(r)
|
145 |
+
if sorted_range!=[]:
|
146 |
+
started_range = (0, sorted_range[0])
|
147 |
+
video_clip = VideoFileClip(video_file)
|
148 |
+
video_duration = video_clip.duration
|
149 |
+
ended_range = (sorted_range[-1], video_duration)
|
150 |
+
|
151 |
+
complete_range = []
|
152 |
+
complete_range.append(started_range)
|
153 |
+
print('sorted range ', sorted_range)
|
154 |
+
if len(sorted_range) > 2:
|
155 |
+
new_X = sorted_range[1:-1]
|
156 |
+
print("new x ", new_X)
|
157 |
+
print('len ', len(new_X))
|
158 |
+
for i in range(0, len(sorted_range)-2, 2):
|
159 |
+
print("Before the error ", i)
|
160 |
+
print(new_X[i:i+2])
|
161 |
+
pair_of_items = new_X[i:i+2]
|
162 |
+
complete_range.append((pair_of_items[0], pair_of_items[1]))
|
163 |
+
|
164 |
+
complete_range.append(ended_range)
|
165 |
+
print("Time range : ", complete_range)
|
166 |
+
output_video_path = "output.mp4"
|
167 |
+
cut_video(video_file, output_video_path, complete_range)
|
168 |
+
return output_video_path
|
169 |
+
return video_file
|
170 |
+
|
171 |
+
model = load_model()
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
SpeechRecognition
|
2 |
+
moviepy
|
3 |
+
faster-whisper==0.7.0
|
4 |
+
gradio
|