bsenst commited on
Commit
c171b42
·
1 Parent(s): e392f4c

update youtube section

Browse files
src/03_low_code/video_transcripts/get_videos_for_youtube_channels.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
src/03_low_code/video_transcripts/youtube-transcript-extraction.ipynb CHANGED
@@ -28,7 +28,7 @@
28
  },
29
  {
30
  "cell_type": "code",
31
- "execution_count": 1,
32
  "metadata": {
33
  "execution": {
34
  "iopub.execute_input": "2024-12-08T19:21:41.981395Z",
@@ -39,9 +39,19 @@
39
  },
40
  "trusted": true
41
  },
42
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
43
  "source": [
44
- "# ! pip install pytube youtube-transcript-api reportlab -q"
45
  ]
46
  },
47
  {
@@ -54,7 +64,7 @@
54
  },
55
  {
56
  "cell_type": "code",
57
- "execution_count": 2,
58
  "metadata": {
59
  "execution": {
60
  "iopub.execute_input": "2024-12-08T19:21:56.471492Z",
@@ -85,7 +95,7 @@
85
  },
86
  {
87
  "cell_type": "code",
88
- "execution_count": 3,
89
  "metadata": {
90
  "execution": {
91
  "iopub.execute_input": "2024-12-08T19:21:56.660152Z",
@@ -164,7 +174,7 @@
164
  },
165
  {
166
  "cell_type": "code",
167
- "execution_count": null,
168
  "metadata": {},
169
  "outputs": [],
170
  "source": [
@@ -231,7 +241,7 @@
231
  },
232
  {
233
  "cell_type": "code",
234
- "execution_count": null,
235
  "metadata": {
236
  "execution": {
237
  "iopub.execute_input": "2024-12-08T19:32:38.175946Z",
@@ -242,10 +252,27 @@
242
  },
243
  "trusted": true
244
  },
245
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  "source": [
247
  "# Eingabe der URLs\n",
248
- "urls = \"https://www.youtube.com/playlist?list=PLf8HAovJg47MN7bswKf73pffom98Fx8Q8\"\n",
249
  "\n",
250
  "pdf_filenames = [] # Liste zur Speicherung der PDF-Dateinamen\n",
251
  "\n",
@@ -263,7 +290,7 @@
263
  " else:\n",
264
  " video_urls = [url]\n",
265
  "\n",
266
- " for video_url in video_urls:\n",
267
  " try:\n",
268
  " yt = YouTube(video_url)\n",
269
  " video_id = yt.video_id\n",
@@ -291,9 +318,17 @@
291
  },
292
  {
293
  "cell_type": "code",
294
- "execution_count": null,
295
  "metadata": {},
296
- "outputs": [],
 
 
 
 
 
 
 
 
297
  "source": [
298
  "\n",
299
  "# Erstellen eines ZIP-Archivs\n",
@@ -316,7 +351,7 @@
316
  },
317
  {
318
  "cell_type": "code",
319
- "execution_count": 13,
320
  "metadata": {
321
  "execution": {
322
  "iopub.execute_input": "2024-12-08T19:30:12.194584Z",
@@ -366,7 +401,7 @@
366
  "name": "python",
367
  "nbconvert_exporter": "python",
368
  "pygments_lexer": "ipython3",
369
- "version": "3.10.14"
370
  }
371
  },
372
  "nbformat": 4,
 
28
  },
29
  {
30
  "cell_type": "code",
31
+ "execution_count": 18,
32
  "metadata": {
33
  "execution": {
34
  "iopub.execute_input": "2024-12-08T19:21:41.981395Z",
 
39
  },
40
  "trusted": true
41
  },
42
+ "outputs": [
43
+ {
44
+ "name": "stdout",
45
+ "output_type": "stream",
46
+ "text": [
47
+ "\n",
48
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
49
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n"
50
+ ]
51
+ }
52
+ ],
53
  "source": [
54
+ "! pip install pytube youtube-transcript-api reportlab -q"
55
  ]
56
  },
57
  {
 
64
  },
65
  {
66
  "cell_type": "code",
67
+ "execution_count": 19,
68
  "metadata": {
69
  "execution": {
70
  "iopub.execute_input": "2024-12-08T19:21:56.471492Z",
 
95
  },
96
  {
97
  "cell_type": "code",
98
+ "execution_count": 20,
99
  "metadata": {
100
  "execution": {
101
  "iopub.execute_input": "2024-12-08T19:21:56.660152Z",
 
174
  },
175
  {
176
  "cell_type": "code",
177
+ "execution_count": 21,
178
  "metadata": {},
179
  "outputs": [],
180
  "source": [
 
241
  },
242
  {
243
  "cell_type": "code",
244
+ "execution_count": 22,
245
  "metadata": {
246
  "execution": {
247
  "iopub.execute_input": "2024-12-08T19:32:38.175946Z",
 
252
  },
253
  "trusted": true
254
  },
255
+ "outputs": [
256
+ {
257
+ "name": "stdout",
258
+ "output_type": "stream",
259
+ "text": [
260
+ "Verarbeite Video: Jq7iHVGevRQ\n",
261
+ "Transkript für 'Jq7iHVGevRQ' gespeichert als Jq7iHVGevRQ.pdf.\n",
262
+ "Verarbeite Video: q9eWAtZxrW8\n",
263
+ "Transkript für 'q9eWAtZxrW8' gespeichert als q9eWAtZxrW8.pdf.\n",
264
+ "Verarbeite Video: NmjX3mkVTM4\n",
265
+ "Transkript für 'NmjX3mkVTM4' gespeichert als NmjX3mkVTM4.pdf.\n",
266
+ "Verarbeite Video: gELlAym0eJM\n",
267
+ "Transkript für 'gELlAym0eJM' gespeichert als gELlAym0eJM.pdf.\n",
268
+ "Verarbeite Video: qT2pbTlsNyk\n",
269
+ "Transkript für 'qT2pbTlsNyk' gespeichert als qT2pbTlsNyk.pdf.\n"
270
+ ]
271
+ }
272
+ ],
273
  "source": [
274
  "# Eingabe der URLs\n",
275
+ "urls = \"https://www.youtube.com/playlist?list=PLfRDp3S7rLduqUTa6oXe_Zlv7bEeD06t6\"\n",
276
  "\n",
277
  "pdf_filenames = [] # Liste zur Speicherung der PDF-Dateinamen\n",
278
  "\n",
 
290
  " else:\n",
291
  " video_urls = [url]\n",
292
  "\n",
293
+ " for video_url in video_urls[:5]:\n",
294
  " try:\n",
295
  " yt = YouTube(video_url)\n",
296
  " video_id = yt.video_id\n",
 
318
  },
319
  {
320
  "cell_type": "code",
321
+ "execution_count": 23,
322
  "metadata": {},
323
+ "outputs": [
324
+ {
325
+ "name": "stdout",
326
+ "output_type": "stream",
327
+ "text": [
328
+ "ZIP-Archiv 'transcripts.zip' wurde erstellt.\n"
329
+ ]
330
+ }
331
+ ],
332
  "source": [
333
  "\n",
334
  "# Erstellen eines ZIP-Archivs\n",
 
351
  },
352
  {
353
  "cell_type": "code",
354
+ "execution_count": 24,
355
  "metadata": {
356
  "execution": {
357
  "iopub.execute_input": "2024-12-08T19:30:12.194584Z",
 
401
  "name": "python",
402
  "nbconvert_exporter": "python",
403
  "pygments_lexer": "ipython3",
404
+ "version": "3.12.1"
405
  }
406
  },
407
  "nbformat": 4,