JaganathC commited on
Commit
2f88fda
·
verified ·
1 Parent(s): 94823fe

Upload 4 files

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -35
  2. README.md +12 -13
  3. app.py +228 -0
  4. requirements.txt +13 -0
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,13 +1,12 @@
1
- ---
2
- title: Video Summ
3
- emoji: 🐢
4
- colorFrom: gray
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 5.21.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Video Transcription Smart Summary
3
+ emoji:
4
+ colorFrom: indigo
5
+ colorTo: gray
6
+ sdk: gradio
7
+ sdk_version: 4.37.2
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
app.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import yt_dlp
4
+ import os
5
+ import subprocess
6
+ import json
7
+ from threading import Thread
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM
9
+ import spaces
10
+ import moviepy.editor as mp
11
+ import time
12
+ import langdetect
13
+ import uuid
14
+
15
+ HF_TOKEN = os.environ.get("HF_TOKEN")
16
+ print("Starting the program...")
17
+
18
+ model_path = "Qwen/Qwen2.5-7B-Instruct"
19
+ print(f"Loading model {model_path}...")
20
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
21
+ model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True).cuda()
22
+ model = model.eval()
23
+ print("Model successfully loaded.")
24
+
25
+ def generate_unique_filename(extension):
26
+ return f"{uuid.uuid4()}{extension}"
27
+
28
+ def cleanup_files(*files):
29
+ for file in files:
30
+ if file and os.path.exists(file):
31
+ os.remove(file)
32
+ print(f"Removed file: {file}")
33
+
34
+ def download_youtube_audio(url):
35
+ print(f"Downloading audio from YouTube: {url}")
36
+ output_path = generate_unique_filename(".wav")
37
+ ydl_opts = {
38
+ 'format': 'bestaudio/best',
39
+ 'postprocessors': [{
40
+ 'key': 'FFmpegExtractAudio',
41
+ 'preferredcodec': 'wav',
42
+ }],
43
+ 'outtmpl': output_path,
44
+ 'keepvideo': True,
45
+ }
46
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
47
+ ydl.download([url])
48
+
49
+ # Check if the file was renamed to .wav.wav
50
+ if os.path.exists(output_path + ".wav"):
51
+ os.rename(output_path + ".wav", output_path)
52
+
53
+ if os.path.exists(output_path):
54
+ print(f"Audio download completed. File saved at: {output_path}")
55
+ print(f"File size: {os.path.getsize(output_path)} bytes")
56
+ else:
57
+ print(f"Error: File {output_path} not found after download.")
58
+
59
+ return output_path
60
+
61
+ @spaces.GPU(duration=90)
62
+ def transcribe_audio(file_path):
63
+ print(f"Starting transcription of file: {file_path}")
64
+ temp_audio = None
65
+ if file_path.endswith(('.mp4', '.avi', '.mov', '.flv')):
66
+ print("Video file detected. Extracting audio...")
67
+ try:
68
+ video = mp.VideoFileClip(file_path)
69
+ temp_audio = generate_unique_filename(".wav")
70
+ video.audio.write_audiofile(temp_audio)
71
+ file_path = temp_audio
72
+ except Exception as e:
73
+ print(f"Error extracting audio from video: {e}")
74
+ raise
75
+
76
+ print(f"Does the file exist? {os.path.exists(file_path)}")
77
+ print(f"File size: {os.path.getsize(file_path) if os.path.exists(file_path) else 'N/A'} bytes")
78
+
79
+ output_file = generate_unique_filename(".json")
80
+ command = [
81
+ "insanely-fast-whisper",
82
+ "--file-name", file_path,
83
+ "--device-id", "0",
84
+ "--model-name", "openai/whisper-large-v3",
85
+ "--task", "transcribe",
86
+ "--timestamp", "chunk",
87
+ "--transcript-path", output_file
88
+ ]
89
+ print(f"Executing command: {' '.join(command)}")
90
+ try:
91
+ result = subprocess.run(command, check=True, capture_output=True, text=True)
92
+ print(f"Standard output: {result.stdout}")
93
+ print(f"Error output: {result.stderr}")
94
+ except subprocess.CalledProcessError as e:
95
+ print(f"Error running insanely-fast-whisper: {e}")
96
+ print(f"Standard output: {e.stdout}")
97
+ print(f"Error output: {e.stderr}")
98
+ raise
99
+
100
+ print(f"Reading transcription file: {output_file}")
101
+ try:
102
+ with open(output_file, "r") as f:
103
+ transcription = json.load(f)
104
+ except json.JSONDecodeError as e:
105
+ print(f"Error decoding JSON: {e}")
106
+ print(f"File content: {open(output_file, 'r').read()}")
107
+ raise
108
+
109
+ if "text" in transcription:
110
+ result = transcription["text"]
111
+ else:
112
+ result = " ".join([chunk["text"] for chunk in transcription.get("chunks", [])])
113
+
114
+ print("Transcription completed.")
115
+
116
+ # Cleanup
117
+ cleanup_files(output_file)
118
+ if temp_audio:
119
+ cleanup_files(temp_audio)
120
+
121
+ return result
122
+
123
+ @spaces.GPU(duration=90)
124
+ def generate_summary_stream(transcription):
125
+ print("Starting summary generation...")
126
+ print(f"Transcription length: {len(transcription)} characters")
127
+
128
+ detected_language = langdetect.detect(transcription)
129
+
130
+ prompt = f"""Summarize the following video transcription in 150-300 words.
131
+ The summary should be in the same language as the transcription, which is detected as {detected_language}.
132
+ Please ensure that the summary captures the main points and key ideas of the transcription:
133
+
134
+ {transcription[:300000]}..."""
135
+
136
+ response, history = model.chat(tokenizer, prompt, history=[])
137
+ print(f"Final summary generated: {response[:100]}...")
138
+ print("Summary generation completed.")
139
+ return response
140
+
141
+ def process_youtube(url):
142
+ if not url:
143
+ print("YouTube URL not provided.")
144
+ return "Please enter a YouTube URL.", None
145
+ print(f"Processing YouTube URL: {url}")
146
+
147
+ audio_file = None
148
+ try:
149
+ audio_file = download_youtube_audio(url)
150
+ if not os.path.exists(audio_file):
151
+ raise FileNotFoundError(f"File {audio_file} does not exist after download.")
152
+
153
+ print(f"Audio file found: {audio_file}")
154
+ print("Starting transcription...")
155
+ transcription = transcribe_audio(audio_file)
156
+ print(f"Transcription completed. Length: {len(transcription)} characters")
157
+ return transcription, None
158
+ except Exception as e:
159
+ print(f"Error processing YouTube: {e}")
160
+ return f"Processing error: {str(e)}", None
161
+ finally:
162
+ if audio_file and os.path.exists(audio_file):
163
+ cleanup_files(audio_file)
164
+ print(f"Directory content after processing: {os.listdir('.')}")
165
+
166
+ def process_uploaded_video(video_path):
167
+ print(f"Processing uploaded video: {video_path}")
168
+ try:
169
+ print("Starting transcription...")
170
+ transcription = transcribe_audio(video_path)
171
+ print(f"Transcription completed. Length: {len(transcription)} characters")
172
+ return transcription, None
173
+ except Exception as e:
174
+ print(f"Error processing video: {e}")
175
+ return f"Processing error: {str(e)}", None
176
+
177
+ print("Setting up Gradio interface...")
178
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
179
+ gr.Markdown(
180
+ """
181
+ # 🎥 Video Transcription and Smart Summary
182
+
183
+ Upload a video or provide a YouTube link to get a transcription and AI-generated summary. HF Zero GPU has a usage time limit. So if you want to run longer videos I recommend you clone the space. Remove @Spaces.gpu from the code and run it locally on your GPU!
184
+ """
185
+ )
186
+
187
+ with gr.Tabs():
188
+ with gr.TabItem("📤 Video Upload"):
189
+ video_input = gr.Video(label="Drag and drop or click to upload")
190
+ video_button = gr.Button("🚀 Process Video", variant="primary")
191
+
192
+ with gr.TabItem("🔗 YouTube Link"):
193
+ url_input = gr.Textbox(label="Paste YouTube URL here", placeholder="https://www.youtube.com/watch?v=...")
194
+ url_button = gr.Button("🚀 Process URL", variant="primary")
195
+
196
+ with gr.Row():
197
+ with gr.Column():
198
+ transcription_output = gr.Textbox(label="📝 Transcription", lines=10, show_copy_button=True)
199
+ with gr.Column():
200
+ summary_output = gr.Textbox(label="📊 Summary", lines=10, show_copy_button=True)
201
+
202
+ summary_button = gr.Button("📝 Generate Summary", variant="secondary")
203
+
204
+ gr.Markdown(
205
+ """
206
+ ### How to use:
207
+ 1. Upload a video or paste a YouTube link.
208
+ 2. Click 'Process' to get the transcription.
209
+ 3. Click 'Generate Summary' to get a summary of the content.
210
+
211
+ *Note: Processing may take a few minutes depending on the video length.*
212
+ """
213
+ )
214
+
215
+ def process_video_and_update(video):
216
+ if video is None:
217
+ return "No video uploaded.", "Please upload a video."
218
+ print(f"Video received: {video}")
219
+ transcription, _ = process_uploaded_video(video)
220
+ print(f"Returned transcription: {transcription[:100] if transcription else 'No transcription generated'}...")
221
+ return transcription or "Transcription error", ""
222
+
223
+ video_button.click(process_video_and_update, inputs=[video_input], outputs=[transcription_output, summary_output])
224
+ url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
225
+ summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
226
+
227
+ print("Launching Gradio interface...")
228
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ insanely-fast-whisper
2
+ accelerate
3
+ bitsandbytes
4
+ scipy
5
+ sentencepiece
6
+ spaces
7
+ tiktoken
8
+ pytest
9
+ torch
10
+ transformers
11
+ yt-dlp
12
+ moviepy
13
+ langdetect