update youtube section
Browse files
src/03_low_code/video_transcripts/get_videos_for_youtube_channels.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
src/03_low_code/video_transcripts/youtube-transcript-extraction.ipynb
CHANGED
@@ -28,7 +28,7 @@
|
|
28 |
},
|
29 |
{
|
30 |
"cell_type": "code",
|
31 |
-
"execution_count":
|
32 |
"metadata": {
|
33 |
"execution": {
|
34 |
"iopub.execute_input": "2024-12-08T19:21:41.981395Z",
|
@@ -39,9 +39,19 @@
|
|
39 |
},
|
40 |
"trusted": true
|
41 |
},
|
42 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
"source": [
|
44 |
-
"
|
45 |
]
|
46 |
},
|
47 |
{
|
@@ -54,7 +64,7 @@
|
|
54 |
},
|
55 |
{
|
56 |
"cell_type": "code",
|
57 |
-
"execution_count":
|
58 |
"metadata": {
|
59 |
"execution": {
|
60 |
"iopub.execute_input": "2024-12-08T19:21:56.471492Z",
|
@@ -85,7 +95,7 @@
|
|
85 |
},
|
86 |
{
|
87 |
"cell_type": "code",
|
88 |
-
"execution_count":
|
89 |
"metadata": {
|
90 |
"execution": {
|
91 |
"iopub.execute_input": "2024-12-08T19:21:56.660152Z",
|
@@ -164,7 +174,7 @@
|
|
164 |
},
|
165 |
{
|
166 |
"cell_type": "code",
|
167 |
-
"execution_count":
|
168 |
"metadata": {},
|
169 |
"outputs": [],
|
170 |
"source": [
|
@@ -231,7 +241,7 @@
|
|
231 |
},
|
232 |
{
|
233 |
"cell_type": "code",
|
234 |
-
"execution_count":
|
235 |
"metadata": {
|
236 |
"execution": {
|
237 |
"iopub.execute_input": "2024-12-08T19:32:38.175946Z",
|
@@ -242,10 +252,27 @@
|
|
242 |
},
|
243 |
"trusted": true
|
244 |
},
|
245 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
"source": [
|
247 |
"# Eingabe der URLs\n",
|
248 |
-
"urls = \"https://www.youtube.com/playlist?list=
|
249 |
"\n",
|
250 |
"pdf_filenames = [] # Liste zur Speicherung der PDF-Dateinamen\n",
|
251 |
"\n",
|
@@ -263,7 +290,7 @@
|
|
263 |
" else:\n",
|
264 |
" video_urls = [url]\n",
|
265 |
"\n",
|
266 |
-
" for video_url in video_urls:\n",
|
267 |
" try:\n",
|
268 |
" yt = YouTube(video_url)\n",
|
269 |
" video_id = yt.video_id\n",
|
@@ -291,9 +318,17 @@
|
|
291 |
},
|
292 |
{
|
293 |
"cell_type": "code",
|
294 |
-
"execution_count":
|
295 |
"metadata": {},
|
296 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
297 |
"source": [
|
298 |
"\n",
|
299 |
"# Erstellen eines ZIP-Archivs\n",
|
@@ -316,7 +351,7 @@
|
|
316 |
},
|
317 |
{
|
318 |
"cell_type": "code",
|
319 |
-
"execution_count":
|
320 |
"metadata": {
|
321 |
"execution": {
|
322 |
"iopub.execute_input": "2024-12-08T19:30:12.194584Z",
|
@@ -366,7 +401,7 @@
|
|
366 |
"name": "python",
|
367 |
"nbconvert_exporter": "python",
|
368 |
"pygments_lexer": "ipython3",
|
369 |
-
"version": "3.
|
370 |
}
|
371 |
},
|
372 |
"nbformat": 4,
|
|
|
28 |
},
|
29 |
{
|
30 |
"cell_type": "code",
|
31 |
+
"execution_count": 18,
|
32 |
"metadata": {
|
33 |
"execution": {
|
34 |
"iopub.execute_input": "2024-12-08T19:21:41.981395Z",
|
|
|
39 |
},
|
40 |
"trusted": true
|
41 |
},
|
42 |
+
"outputs": [
|
43 |
+
{
|
44 |
+
"name": "stdout",
|
45 |
+
"output_type": "stream",
|
46 |
+
"text": [
|
47 |
+
"\n",
|
48 |
+
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
|
49 |
+
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n"
|
50 |
+
]
|
51 |
+
}
|
52 |
+
],
|
53 |
"source": [
|
54 |
+
"! pip install pytube youtube-transcript-api reportlab -q"
|
55 |
]
|
56 |
},
|
57 |
{
|
|
|
64 |
},
|
65 |
{
|
66 |
"cell_type": "code",
|
67 |
+
"execution_count": 19,
|
68 |
"metadata": {
|
69 |
"execution": {
|
70 |
"iopub.execute_input": "2024-12-08T19:21:56.471492Z",
|
|
|
95 |
},
|
96 |
{
|
97 |
"cell_type": "code",
|
98 |
+
"execution_count": 20,
|
99 |
"metadata": {
|
100 |
"execution": {
|
101 |
"iopub.execute_input": "2024-12-08T19:21:56.660152Z",
|
|
|
174 |
},
|
175 |
{
|
176 |
"cell_type": "code",
|
177 |
+
"execution_count": 21,
|
178 |
"metadata": {},
|
179 |
"outputs": [],
|
180 |
"source": [
|
|
|
241 |
},
|
242 |
{
|
243 |
"cell_type": "code",
|
244 |
+
"execution_count": 22,
|
245 |
"metadata": {
|
246 |
"execution": {
|
247 |
"iopub.execute_input": "2024-12-08T19:32:38.175946Z",
|
|
|
252 |
},
|
253 |
"trusted": true
|
254 |
},
|
255 |
+
"outputs": [
|
256 |
+
{
|
257 |
+
"name": "stdout",
|
258 |
+
"output_type": "stream",
|
259 |
+
"text": [
|
260 |
+
"Verarbeite Video: Jq7iHVGevRQ\n",
|
261 |
+
"Transkript für 'Jq7iHVGevRQ' gespeichert als Jq7iHVGevRQ.pdf.\n",
|
262 |
+
"Verarbeite Video: q9eWAtZxrW8\n",
|
263 |
+
"Transkript für 'q9eWAtZxrW8' gespeichert als q9eWAtZxrW8.pdf.\n",
|
264 |
+
"Verarbeite Video: NmjX3mkVTM4\n",
|
265 |
+
"Transkript für 'NmjX3mkVTM4' gespeichert als NmjX3mkVTM4.pdf.\n",
|
266 |
+
"Verarbeite Video: gELlAym0eJM\n",
|
267 |
+
"Transkript für 'gELlAym0eJM' gespeichert als gELlAym0eJM.pdf.\n",
|
268 |
+
"Verarbeite Video: qT2pbTlsNyk\n",
|
269 |
+
"Transkript für 'qT2pbTlsNyk' gespeichert als qT2pbTlsNyk.pdf.\n"
|
270 |
+
]
|
271 |
+
}
|
272 |
+
],
|
273 |
"source": [
|
274 |
"# Eingabe der URLs\n",
|
275 |
+
"urls = \"https://www.youtube.com/playlist?list=PLfRDp3S7rLduqUTa6oXe_Zlv7bEeD06t6\"\n",
|
276 |
"\n",
|
277 |
"pdf_filenames = [] # Liste zur Speicherung der PDF-Dateinamen\n",
|
278 |
"\n",
|
|
|
290 |
" else:\n",
|
291 |
" video_urls = [url]\n",
|
292 |
"\n",
|
293 |
+
" for video_url in video_urls[:5]:\n",
|
294 |
" try:\n",
|
295 |
" yt = YouTube(video_url)\n",
|
296 |
" video_id = yt.video_id\n",
|
|
|
318 |
},
|
319 |
{
|
320 |
"cell_type": "code",
|
321 |
+
"execution_count": 23,
|
322 |
"metadata": {},
|
323 |
+
"outputs": [
|
324 |
+
{
|
325 |
+
"name": "stdout",
|
326 |
+
"output_type": "stream",
|
327 |
+
"text": [
|
328 |
+
"ZIP-Archiv 'transcripts.zip' wurde erstellt.\n"
|
329 |
+
]
|
330 |
+
}
|
331 |
+
],
|
332 |
"source": [
|
333 |
"\n",
|
334 |
"# Erstellen eines ZIP-Archivs\n",
|
|
|
351 |
},
|
352 |
{
|
353 |
"cell_type": "code",
|
354 |
+
"execution_count": 24,
|
355 |
"metadata": {
|
356 |
"execution": {
|
357 |
"iopub.execute_input": "2024-12-08T19:30:12.194584Z",
|
|
|
401 |
"name": "python",
|
402 |
"nbconvert_exporter": "python",
|
403 |
"pygments_lexer": "ipython3",
|
404 |
+
"version": "3.12.1"
|
405 |
}
|
406 |
},
|
407 |
"nbformat": 4,
|