deepak191z commited on
Commit
68adc7a
·
verified ·
1 Parent(s): 6846b53

Update index.js

Browse files
Files changed (1) hide show
  1. index.js +81 -333
index.js CHANGED
@@ -4,349 +4,97 @@ const bodyParser = require('body-parser');
4
  const cors = require('cors');
5
 
6
  const app = express();
7
- app.use(bodyParser.urlencoded({ extended: true }));
8
  app.use(bodyParser.json());
9
  app.use(cors());
10
 
11
- // Global browser instance for reuse
12
- let globalBrowser = null;
 
 
 
13
 
14
- // Initialize browser once on startup
15
- async function initBrowser() {
16
- if (!globalBrowser) {
17
- globalBrowser = await chromium.launch({
18
- headless: true,
19
- args: [
20
- '--no-sandbox',
21
- '--disable-setuid-sandbox',
22
- '--disable-dev-shm-usage',
23
- '--disable-background-timer-throttling',
24
- '--disable-backgrounding-occluded-windows',
25
- '--disable-renderer-backgrounding',
26
- '--disable-features=TranslateUI',
27
- '--disable-ipc-flooding-protection'
28
- ]
29
- });
30
-
31
- // Handle browser close on process exit
32
- process.on('exit', async () => {
33
- if (globalBrowser) await globalBrowser.close();
34
- });
35
- process.on('SIGINT', async () => {
36
- if (globalBrowser) await globalBrowser.close();
37
- process.exit();
38
- });
39
- }
40
- return globalBrowser;
41
- }
42
-
43
- const html = `<!DOCTYPE html>
44
  <html lang="en">
45
  <head>
46
- <meta charset="UTF-8">
47
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
48
- <title>Fast YouTube Transcript Generator</title>
49
- <style>
50
- body {
51
- font-family: Arial, sans-serif;
52
- max-width: 800px;
53
- margin: 0 auto;
54
- padding: 20px;
55
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
56
- min-height: 100vh;
57
- color: white;
58
- }
59
- .container {
60
- background: rgba(255, 255, 255, 0.1);
61
- backdrop-filter: blur(10px);
62
- border-radius: 15px;
63
- padding: 30px;
64
- box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37);
65
- }
66
- h1 {
67
- text-align: center;
68
- margin-bottom: 30px;
69
- font-size: 2.5em;
70
- text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
71
- }
72
- form {
73
- display: flex;
74
- flex-direction: column;
75
- gap: 15px;
76
- }
77
- input, button {
78
- padding: 15px;
79
- border: none;
80
- border-radius: 10px;
81
- font-size: 16px;
82
- }
83
- input {
84
- background: rgba(255, 255, 255, 0.9);
85
- color: #333;
86
- }
87
- button {
88
- background: linear-gradient(45deg, #ff6b6b, #ee5a24);
89
- color: white;
90
- cursor: pointer;
91
- font-weight: bold;
92
- transition: transform 0.2s, box-shadow 0.2s;
93
- }
94
- button:hover {
95
- transform: translateY(-2px);
96
- box-shadow: 0 5px 15px rgba(0,0,0,0.3);
97
- }
98
- button:disabled {
99
- opacity: 0.6;
100
- cursor: not-allowed;
101
- transform: none;
102
- }
103
- #result {
104
- white-space: pre-wrap;
105
- background: rgba(255, 255, 255, 0.1);
106
- padding: 20px;
107
- border-radius: 10px;
108
- margin-top: 20px;
109
- max-height: 400px;
110
- overflow-y: auto;
111
- border: 1px solid rgba(255, 255, 255, 0.2);
112
- }
113
- .loading {
114
- display: inline-block;
115
- width: 20px;
116
- height: 20px;
117
- border: 3px solid rgba(255,255,255,.3);
118
- border-radius: 50%;
119
- border-top-color: #fff;
120
- animation: spin 1s ease-in-out infinite;
121
- }
122
- @keyframes spin {
123
- to { transform: rotate(360deg); }
124
- }
125
- .status {
126
- margin-top: 10px;
127
- font-weight: bold;
128
- text-align: center;
129
- }
130
- </style>
131
  </head>
132
  <body>
133
- <div class="container">
134
- <h1>Fast YouTube Transcript</h1>
135
- <form id="transcriptForm">
136
- <input type="text" id="videoUrl" name="videoUrl" placeholder="Enter YouTube Video URL..." required>
137
- <button type="submit" id="submitBtn">
138
- <span id="btnText">Generate Transcript</span>
139
- <span id="loader" class="loading" style="display: none;"></span>
140
- </button>
141
- </form>
142
- <div id="status" class="status"></div>
143
- <div id="result"></div>
144
- </div>
145
-
146
- <script>
147
- document.getElementById('transcriptForm').addEventListener('submit', async (e) => {
148
- e.preventDefault();
149
- const videoUrl = document.getElementById('videoUrl').value;
150
- const resultDiv = document.getElementById('result');
151
- const statusDiv = document.getElementById('status');
152
- const submitBtn = document.getElementById('submitBtn');
153
- const btnText = document.getElementById('btnText');
154
- const loader = document.getElementById('loader');
155
-
156
- // Update UI for loading state
157
- submitBtn.disabled = true;
158
- btnText.style.display = 'none';
159
- loader.style.display = 'inline-block';
160
- statusDiv.textContent = 'Extracting transcript...';
161
- resultDiv.textContent = '';
162
-
163
- const startTime = Date.now();
164
-
165
- try {
166
- const response = await fetch('/extract-transcript', {
167
- method: 'POST',
168
- headers: {
169
- 'Content-Type': 'application/json',
170
- },
171
- body: JSON.stringify({ videoUrl }),
172
- });
173
-
174
- const endTime = Date.now();
175
- const duration = ((endTime - startTime) / 1000).toFixed(1);
176
-
177
- if (response.ok) {
178
- const data = await response.json();
179
- resultDiv.textContent = data.transcript;
180
- statusDiv.textContent = 'SUCCESS: Transcript extracted in ' + duration + 's';
181
- } else {
182
- const error = await response.text();
183
- resultDiv.textContent = 'Error: ' + error;
184
- statusDiv.textContent = 'ERROR: Failed to extract transcript';
185
- }
186
- } catch (error) {
187
- console.error('Error:', error);
188
- resultDiv.textContent = 'Network error occurred. Please try again.';
189
- statusDiv.textContent = 'ERROR: Network error';
190
- } finally {
191
- // Reset UI
192
- submitBtn.disabled = false;
193
- btnText.style.display = 'inline';
194
- loader.style.display = 'none';
195
- }
196
  });
197
- </script>
 
 
 
 
 
 
198
  </body>
199
- </html>`;
200
-
201
- app.get('/', (req, res) => {
202
- res.send(html);
203
- });
204
-
205
- app.post('/extract-transcript', async (req, res) => {
206
- const { videoUrl } = req.body;
207
-
208
- if (!videoUrl) {
209
- return res.status(400).send('videoUrl is required');
210
- }
211
-
212
- // Validate YouTube URL
213
- if (!videoUrl.includes('youtube.com/watch') && !videoUrl.includes('youtu.be/')) {
214
- return res.status(400).send('Please provide a valid YouTube URL');
215
- }
216
-
217
- const browser = await initBrowser();
218
- let context = null;
219
- let page = null;
220
-
221
- try {
222
- // Create a new context for this request
223
- context = await browser.newContext({
224
- userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
225
- });
226
-
227
- page = await context.newPage();
228
-
229
- // Block unnecessary resources to speed up loading
230
- await page.route('**/*', (route) => {
231
- const resourceType = route.request().resourceType();
232
- if (['image', 'stylesheet', 'font', 'media'].includes(resourceType)) {
233
- route.abort();
234
- } else {
235
- route.continue();
236
- }
237
- });
238
-
239
- // Navigate to the video with shorter timeout
240
- await page.goto(videoUrl, {
241
- waitUntil: 'domcontentloaded',
242
- timeout: 30000
243
- });
244
-
245
- // Wait for the page to be ready with shorter timeout
246
- await page.waitForLoadState('networkidle', { timeout: 10000 });
247
-
248
- // Try multiple selectors for the transcript button
249
- const transcriptSelectors = [
250
- 'button[aria-label="Show transcript"]',
251
- 'button[aria-label*="transcript"]',
252
- '[data-text="Show transcript"]',
253
- 'ytd-button-renderer:has-text("Show transcript")'
254
- ];
255
-
256
- let transcriptButton = null;
257
- for (const selector of transcriptSelectors) {
258
- try {
259
- transcriptButton = await page.waitForSelector(selector, { timeout: 5000 });
260
- if (transcriptButton) break;
261
- } catch (e) {
262
- continue;
263
- }
264
- }
265
-
266
- if (!transcriptButton) {
267
- return res.status(404).send('Transcript not available for this video');
268
- }
269
-
270
- // Click the transcript button
271
- await transcriptButton.click();
272
-
273
- // Wait for transcript to load with multiple possible selectors
274
- const transcriptContainerSelectors = [
275
- 'ytd-transcript-segment-list-renderer',
276
- '[data-testid="transcript-segment-list"]',
277
- '.ytd-transcript-segment-list-renderer'
278
- ];
279
-
280
- let transcriptContainer = null;
281
- for (const selector of transcriptContainerSelectors) {
282
- try {
283
- transcriptContainer = await page.waitForSelector(selector, { timeout: 10000 });
284
- if (transcriptContainer) break;
285
- } catch (e) {
286
- continue;
287
- }
288
- }
289
-
290
- if (!transcriptContainer) {
291
- return res.status(404).send('Could not load transcript content');
292
- }
293
-
294
- // Extract transcript with multiple selector attempts
295
- const transcript = await page.evaluate(() => {
296
- // Try multiple selectors for transcript segments
297
- const selectors = [
298
- 'ytd-transcript-segment-renderer .segment-text',
299
- '.transcript-segment .segment-text',
300
- '[data-testid="transcript-segment"] .segment-text',
301
- 'ytd-transcript-segment-renderer yt-formatted-string'
302
- ];
303
-
304
- for (const selector of selectors) {
305
- const elements = Array.from(document.querySelectorAll(selector));
306
- if (elements.length > 0) {
307
- return elements.map(element => element.innerText.trim()).join('\n');
308
- }
309
- }
310
-
311
- // Fallback: try to get any text from transcript container
312
- const container = document.querySelector('ytd-transcript-segment-list-renderer');
313
- if (container) {
314
- return container.innerText.replace(/\n+/g, '\n').trim();
315
- }
316
-
317
- return 'No transcript text found';
318
- });
319
-
320
- if (!transcript || transcript === 'No transcript text found') {
321
- return res.status(404).send('Could not extract transcript text');
322
- }
323
-
324
- res.json({ transcript });
325
-
326
- } catch (error) {
327
- console.error('Error extracting transcript:', error);
328
-
329
- if (error.message.includes('timeout')) {
330
- res.status(408).send('Request timeout - video may be loading slowly');
331
- } else if (error.message.includes('net::ERR_')) {
332
- res.status(502).send('Network error accessing YouTube');
333
- } else {
334
- res.status(500).send('Error extracting transcript');
335
- }
336
- } finally {
337
- // Always close the context, but keep browser running
338
- if (context) {
339
- await context.close();
340
- }
341
- }
342
  });
343
 
344
  const PORT = process.env.PORT || 7860;
345
-
346
- // Initialize browser on startup
347
- initBrowser().then(() => {
348
- app.listen(PORT, () => {
349
- console.log('Fast YouTube Transcript Server running on port ' + PORT);
350
- console.log('Access the web interface at http://localhost:' + PORT);
351
- });
352
- }).catch(console.error);
 
4
  const cors = require('cors');
5
 
6
  const app = express();
 
7
  app.use(bodyParser.json());
8
  app.use(cors());
9
 
10
+ // Launch browser once to reuse
11
+ let browser;
12
+ (async () => {
13
+ browser = await chromium.launch({ headless: true });
14
+ })();
15
 
16
+ // Route: serve minimal HTML
17
+ const html = `
18
+ <!DOCTYPE html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  <html lang="en">
20
  <head>
21
+ <meta charset="UTF-8">
22
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
23
+ <title>YouTube Transcript Generator</title>
24
+ <style>
25
+ body { font-family: Arial, sans-serif; max-width: 600px; margin: 20px auto; }
26
+ input, button { width: 100%; padding: 8px; margin: 8px 0; }
27
+ #result { white-space: pre-wrap; background: #f5f5f5; padding: 10px; border-radius: 4px; }
28
+ </style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  </head>
30
  <body>
31
+ <h2>YouTube Transcript Generator</h2>
32
+ <form id="form">
33
+ <input type="text" id="videoUrl" placeholder="YouTube Video URL" required />
34
+ <button type="submit">Generate</button>
35
+ </form>
36
+ <div id="result"></div>
37
+ <script>
38
+ document.getElementById('form').addEventListener('submit', async e => {
39
+ e.preventDefault();
40
+ const url = document.getElementById('videoUrl').value;
41
+ const resDiv = document.getElementById('result');
42
+ resDiv.textContent = 'Loading...';
43
+ try {
44
+ const r = await fetch('/transcript', {
45
+ method: 'POST', headers: {'Content-Type':'application/json'}, body: JSON.stringify({ url })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  });
47
+ const { transcript, error } = await r.json();
48
+ resDiv.textContent = transcript || error;
49
+ } catch (err) {
50
+ resDiv.textContent = 'Unexpected error';
51
+ }
52
+ });
53
+ </script>
54
  </body>
55
+ </html>
56
+ `;
57
+
58
+ app.get('/', (_, res) => res.send(html));
59
+
60
+ app.post('/transcript', async (req, res) => {
61
+ const { url } = req.body;
62
+ if (!url) return res.status(400).json({ error: 'URL required' });
63
+
64
+ const context = await browser.newContext();
65
+ // Block images, styles, fonts, media for speed
66
+ await context.route('**/*', route => {
67
+ const req = route.request();
68
+ const t = req.resourceType();
69
+ if (['image', 'stylesheet', 'font', 'media'].includes(t))
70
+ route.abort();
71
+ else
72
+ route.continue();
73
+ });
74
+
75
+ const page = await context.newPage();
76
+ try {
77
+ await page.goto(url, { waitUntil: 'domcontentloaded' });
78
+
79
+ // Open transcript menu
80
+ const moreBtn = await page.$('button[aria-label="More actions"]');
81
+ if (moreBtn) await moreBtn.click();
82
+ await page.click('tp-yt-paper-item[role="menuitem"]:has-text("Open transcript")');
83
+
84
+ await page.waitForSelector('ytd-transcript-renderer', { timeout: 5000 });
85
+
86
+ const transcript = await page.$$eval(
87
+ 'ytd-transcript-segment-renderer .segment-text', els => els.map(el => el.textContent.trim()).join('\n')
88
+ );
89
+
90
+ res.json({ transcript });
91
+ } catch (err) {
92
+ console.error(err);
93
+ res.json({ error: 'Failed to extract transcript' });
94
+ } finally {
95
+ await context.close();
96
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  });
98
 
99
  const PORT = process.env.PORT || 7860;
100
+ app.listen(PORT, () => console.log(`Server running on ${PORT}`));