deepak191z commited on
Commit
564251d
·
verified ·
1 Parent(s): e03131a

Update index.js

Browse files
Files changed (1) hide show
  1. index.js +264 -45
index.js CHANGED
@@ -8,72 +8,191 @@ app.use(bodyParser.urlencoded({ extended: true }));
8
  app.use(bodyParser.json());
9
  app.use(cors());
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  const html = `
12
  <!DOCTYPE html>
13
  <html lang="en">
14
  <head>
15
  <meta charset="UTF-8">
16
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
17
- <title>YouTube Transcript Generator (Playwright)</title>
18
  <style>
19
  body {
20
  font-family: Arial, sans-serif;
21
  max-width: 800px;
22
  margin: 0 auto;
23
  padding: 20px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  }
25
  form {
26
  display: flex;
27
  flex-direction: column;
 
28
  }
29
  input, button {
30
- margin: 10px 0;
31
- padding: 5px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  #result {
34
  white-space: pre-wrap;
35
- background-color: #f0f0f0;
36
- padding: 10px;
37
- border-radius: 5px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  }
39
  </style>
40
  </head>
41
  <body>
42
- <h1>YouTube Transcript Generator (Playwright)</h1>
43
- <form id="transcriptForm">
44
- <input type="text" id="videoUrl" name="videoUrl" placeholder="YouTube Video URL" required>
45
-
46
- <button type="submit">Generate Transcript</button>
47
- </form>
48
- <div id="result"></div>
49
-
 
 
 
 
 
50
  <script>
51
  document.getElementById('transcriptForm').addEventListener('submit', async (e) => {
52
  e.preventDefault();
53
  const videoUrl = document.getElementById('videoUrl').value;
54
-
55
  const resultDiv = document.getElementById('result');
56
-
57
- resultDiv.textContent = 'Generating transcript...';
58
-
 
 
 
 
 
 
 
 
 
 
 
59
  try {
60
  const response = await fetch('/extract-transcript', {
61
  method: 'POST',
62
  headers: {
63
  'Content-Type': 'application/json',
64
  },
65
- body: JSON.stringify({ videoUrl}),
66
  });
67
-
 
 
 
68
  if (response.ok) {
69
  const data = await response.json();
70
  resultDiv.textContent = data.transcript;
 
71
  } else {
72
- resultDiv.textContent = 'Error generating transcript. Please try again.';
 
 
73
  }
74
  } catch (error) {
75
  console.error('Error:', error);
76
- resultDiv.textContent = 'An error occurred. Please try again.';
 
 
 
 
 
 
77
  }
78
  });
79
  </script>
@@ -92,44 +211,144 @@ app.post('/extract-transcript', async (req, res) => {
92
  return res.status(400).send('videoUrl is required');
93
  }
94
 
95
- const browser = await chromium.launch();
96
- const context = await browser.newContext();
97
- const page = await context.newPage();
 
 
 
 
 
98
 
99
  try {
100
- await page.goto(videoUrl, { waitUntil: 'networkidle' });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
- // Set viewport size
103
- await page.setViewportSize({ width: 1920, height: 1080 });
104
 
105
- // Try to expand description if button is present (optional)
106
- const expandButton = await page.$('tp-yt-paper-button#expand');
107
- if (expandButton) {
108
- await expandButton.click();
 
 
 
 
 
 
 
 
 
 
 
 
109
  }
110
 
111
- // Click the "Show transcript" button
112
- await page.click('button[aria-label="Show transcript"]');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
- // Wait for the transcript container to appear
115
- await page.waitForSelector('ytd-transcript-segment-list-renderer');
 
116
 
117
- // Extract the transcript text
118
  const transcript = await page.evaluate(() => {
119
- const elements = Array.from(document.querySelectorAll('ytd-transcript-segment-renderer .segment-text'));
120
- return elements.map(element => element.innerText).join('\n');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  });
122
 
 
 
 
 
123
  res.json({ transcript });
124
 
125
  } catch (error) {
126
  console.error('Error extracting transcript:', error);
127
- res.status(500).send('Error extracting transcript');
 
 
 
 
 
 
 
128
  } finally {
129
- await browser.close();
 
 
 
130
  }
131
  });
132
- const PORT = 7860;
133
- app.listen(PORT, () => {
134
- console.log(`Server is running on port ${PORT}`);
135
- });
 
 
 
 
 
 
 
8
  app.use(bodyParser.json());
9
  app.use(cors());
10
 
11
+ // Global browser instance for reuse
12
+ let globalBrowser = null;
13
+
14
+ // Initialize browser once on startup
15
+ async function initBrowser() {
16
+ if (!globalBrowser) {
17
+ globalBrowser = await chromium.launch({
18
+ headless: true,
19
+ args: [
20
+ '--no-sandbox',
21
+ '--disable-setuid-sandbox',
22
+ '--disable-dev-shm-usage',
23
+ '--disable-background-timer-throttling',
24
+ '--disable-backgrounding-occluded-windows',
25
+ '--disable-renderer-backgrounding',
26
+ '--disable-features=TranslateUI',
27
+ '--disable-ipc-flooding-protection'
28
+ ]
29
+ });
30
+
31
+ // Handle browser close on process exit
32
+ process.on('exit', async () => {
33
+ if (globalBrowser) await globalBrowser.close();
34
+ });
35
+ process.on('SIGINT', async () => {
36
+ if (globalBrowser) await globalBrowser.close();
37
+ process.exit();
38
+ });
39
+ }
40
+ return globalBrowser;
41
+ }
42
+
43
  const html = `
44
  <!DOCTYPE html>
45
  <html lang="en">
46
  <head>
47
  <meta charset="UTF-8">
48
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
49
+ <title>Fast YouTube Transcript Generator</title>
50
  <style>
51
  body {
52
  font-family: Arial, sans-serif;
53
  max-width: 800px;
54
  margin: 0 auto;
55
  padding: 20px;
56
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
57
+ min-height: 100vh;
58
+ color: white;
59
+ }
60
+ .container {
61
+ background: rgba(255, 255, 255, 0.1);
62
+ backdrop-filter: blur(10px);
63
+ border-radius: 15px;
64
+ padding: 30px;
65
+ box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37);
66
+ }
67
+ h1 {
68
+ text-align: center;
69
+ margin-bottom: 30px;
70
+ font-size: 2.5em;
71
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
72
  }
73
  form {
74
  display: flex;
75
  flex-direction: column;
76
+ gap: 15px;
77
  }
78
  input, button {
79
+ padding: 15px;
80
+ border: none;
81
+ border-radius: 10px;
82
+ font-size: 16px;
83
+ }
84
+ input {
85
+ background: rgba(255, 255, 255, 0.9);
86
+ color: #333;
87
+ }
88
+ button {
89
+ background: linear-gradient(45deg, #ff6b6b, #ee5a24);
90
+ color: white;
91
+ cursor: pointer;
92
+ font-weight: bold;
93
+ transition: transform 0.2s, box-shadow 0.2s;
94
+ }
95
+ button:hover {
96
+ transform: translateY(-2px);
97
+ box-shadow: 0 5px 15px rgba(0,0,0,0.3);
98
+ }
99
+ button:disabled {
100
+ opacity: 0.6;
101
+ cursor: not-allowed;
102
+ transform: none;
103
  }
104
  #result {
105
  white-space: pre-wrap;
106
+ background: rgba(255, 255, 255, 0.1);
107
+ padding: 20px;
108
+ border-radius: 10px;
109
+ margin-top: 20px;
110
+ max-height: 400px;
111
+ overflow-y: auto;
112
+ border: 1px solid rgba(255, 255, 255, 0.2);
113
+ }
114
+ .loading {
115
+ display: inline-block;
116
+ width: 20px;
117
+ height: 20px;
118
+ border: 3px solid rgba(255,255,255,.3);
119
+ border-radius: 50%;
120
+ border-top-color: #fff;
121
+ animation: spin 1s ease-in-out infinite;
122
+ }
123
+ @keyframes spin {
124
+ to { transform: rotate(360deg); }
125
+ }
126
+ .status {
127
+ margin-top: 10px;
128
+ font-weight: bold;
129
+ text-align: center;
130
  }
131
  </style>
132
  </head>
133
  <body>
134
+ <div class="container">
135
+ <h1>⚡ Fast YouTube Transcript</h1>
136
+ <form id="transcriptForm">
137
+ <input type="text" id="videoUrl" name="videoUrl" placeholder="Enter YouTube Video URL..." required>
138
+ <button type="submit" id="submitBtn">
139
+ <span id="btnText">Generate Transcript</span>
140
+ <span id="loader" class="loading" style="display: none;"></span>
141
+ </button>
142
+ </form>
143
+ <div id="status" class="status"></div>
144
+ <div id="result"></div>
145
+ </div>
146
+
147
  <script>
148
  document.getElementById('transcriptForm').addEventListener('submit', async (e) => {
149
  e.preventDefault();
150
  const videoUrl = document.getElementById('videoUrl').value;
 
151
  const resultDiv = document.getElementById('result');
152
+ const statusDiv = document.getElementById('status');
153
+ const submitBtn = document.getElementById('submitBtn');
154
+ const btnText = document.getElementById('btnText');
155
+ const loader = document.getElementById('loader');
156
+
157
+ // Update UI for loading state
158
+ submitBtn.disabled = true;
159
+ btnText.style.display = 'none';
160
+ loader.style.display = 'inline-block';
161
+ statusDiv.textContent = 'Extracting transcript...';
162
+ resultDiv.textContent = '';
163
+
164
+ const startTime = Date.now();
165
+
166
  try {
167
  const response = await fetch('/extract-transcript', {
168
  method: 'POST',
169
  headers: {
170
  'Content-Type': 'application/json',
171
  },
172
+ body: JSON.stringify({ videoUrl }),
173
  });
174
+
175
+ const endTime = Date.now();
176
+ const duration = ((endTime - startTime) / 1000).toFixed(1);
177
+
178
  if (response.ok) {
179
  const data = await response.json();
180
  resultDiv.textContent = data.transcript;
181
+ statusDiv.textContent = `✅ Transcript extracted in ${duration}s`;
182
  } else {
183
+ const error = await response.text();
184
+ resultDiv.textContent = `Error: ${error}`;
185
+ statusDiv.textContent = '❌ Failed to extract transcript';
186
  }
187
  } catch (error) {
188
  console.error('Error:', error);
189
+ resultDiv.textContent = 'Network error occurred. Please try again.';
190
+ statusDiv.textContent = '❌ Network error';
191
+ } finally {
192
+ // Reset UI
193
+ submitBtn.disabled = false;
194
+ btnText.style.display = 'inline';
195
+ loader.style.display = 'none';
196
  }
197
  });
198
  </script>
 
211
  return res.status(400).send('videoUrl is required');
212
  }
213
 
214
+ // Validate YouTube URL
215
+ if (!videoUrl.includes('youtube.com/watch') && !videoUrl.includes('youtu.be/')) {
216
+ return res.status(400).send('Please provide a valid YouTube URL');
217
+ }
218
+
219
+ const browser = await initBrowser();
220
+ let context = null;
221
+ let page = null;
222
 
223
  try {
224
+ // Create a new context for this request
225
+ context = await browser.newContext({
226
+ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
227
+ });
228
+
229
+ page = await context.newPage();
230
+
231
+ // Block unnecessary resources to speed up loading
232
+ await page.route('**/*', (route) => {
233
+ const resourceType = route.request().resourceType();
234
+ if (['image', 'stylesheet', 'font', 'media'].includes(resourceType)) {
235
+ route.abort();
236
+ } else {
237
+ route.continue();
238
+ }
239
+ });
240
+
241
+ // Navigate to the video with shorter timeout
242
+ await page.goto(videoUrl, {
243
+ waitUntil: 'domcontentloaded',
244
+ timeout: 30000
245
+ });
246
 
247
+ // Wait for the page to be ready with shorter timeout
248
+ await page.waitForLoadState('networkidle', { timeout: 10000 });
249
 
250
+ // Try multiple selectors for the transcript button
251
+ const transcriptSelectors = [
252
+ 'button[aria-label="Show transcript"]',
253
+ 'button[aria-label*="transcript"]',
254
+ '[data-text="Show transcript"]',
255
+ 'ytd-button-renderer:has-text("Show transcript")'
256
+ ];
257
+
258
+ let transcriptButton = null;
259
+ for (const selector of transcriptSelectors) {
260
+ try {
261
+ transcriptButton = await page.waitForSelector(selector, { timeout: 5000 });
262
+ if (transcriptButton) break;
263
+ } catch (e) {
264
+ continue;
265
+ }
266
  }
267
 
268
+ if (!transcriptButton) {
269
+ return res.status(404).send('Transcript not available for this video');
270
+ }
271
+
272
+ // Click the transcript button
273
+ await transcriptButton.click();
274
+
275
+ // Wait for transcript to load with multiple possible selectors
276
+ const transcriptContainerSelectors = [
277
+ 'ytd-transcript-segment-list-renderer',
278
+ '[data-testid="transcript-segment-list"]',
279
+ '.ytd-transcript-segment-list-renderer'
280
+ ];
281
+
282
+ let transcriptContainer = null;
283
+ for (const selector of transcriptContainerSelectors) {
284
+ try {
285
+ transcriptContainer = await page.waitForSelector(selector, { timeout: 10000 });
286
+ if (transcriptContainer) break;
287
+ } catch (e) {
288
+ continue;
289
+ }
290
+ }
291
 
292
+ if (!transcriptContainer) {
293
+ return res.status(404).send('Could not load transcript content');
294
+ }
295
 
296
+ // Extract transcript with multiple selector attempts
297
  const transcript = await page.evaluate(() => {
298
+ // Try multiple selectors for transcript segments
299
+ const selectors = [
300
+ 'ytd-transcript-segment-renderer .segment-text',
301
+ '.transcript-segment .segment-text',
302
+ '[data-testid="transcript-segment"] .segment-text',
303
+ 'ytd-transcript-segment-renderer yt-formatted-string'
304
+ ];
305
+
306
+ for (const selector of selectors) {
307
+ const elements = Array.from(document.querySelectorAll(selector));
308
+ if (elements.length > 0) {
309
+ return elements.map(element => element.innerText.trim()).join('\n');
310
+ }
311
+ }
312
+
313
+ // Fallback: try to get any text from transcript container
314
+ const container = document.querySelector('ytd-transcript-segment-list-renderer');
315
+ if (container) {
316
+ return container.innerText.replace(/\n+/g, '\n').trim();
317
+ }
318
+
319
+ return 'No transcript text found';
320
  });
321
 
322
+ if (!transcript || transcript === 'No transcript text found') {
323
+ return res.status(404).send('Could not extract transcript text');
324
+ }
325
+
326
  res.json({ transcript });
327
 
328
  } catch (error) {
329
  console.error('Error extracting transcript:', error);
330
+
331
+ if (error.message.includes('timeout')) {
332
+ res.status(408).send('Request timeout - video may be loading slowly');
333
+ } else if (error.message.includes('net::ERR_')) {
334
+ res.status(502).send('Network error accessing YouTube');
335
+ } else {
336
+ res.status(500).send('Error extracting transcript');
337
+ }
338
  } finally {
339
+ // Always close the context, but keep browser running
340
+ if (context) {
341
+ await context.close();
342
+ }
343
  }
344
  });
345
+
346
+ const PORT = process.env.PORT || 7860;
347
+
348
+ // Initialize browser on startup
349
+ initBrowser().then(() => {
350
+ app.listen(PORT, () => {
351
+ console.log(`🚀 Fast YouTube Transcript Server running on port ${PORT}`);
352
+ console.log(`📝 Access the web interface at http://localhost:${PORT}`);
353
+ });
354
+ }).catch(console.error);