pvanand commited on
Commit
5f07afe
·
verified ·
1 Parent(s): 503f381

Update index.js

Browse files
Files changed (1) hide show
  1. index.js +68 -0
index.js CHANGED
@@ -338,6 +338,74 @@ app.post('/process', upload.single('htmlFile'), (req, res) => {
338
  }
339
  });
340
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
  const PORT = process.env.PORT || 3000;
342
  app.listen(PORT, () => {
343
  console.log(`Server running on http://localhost:${PORT}`);
 
338
  }
339
  });
340
 
341
+ // Add this function to handle script execution
342
+ function executeCheerioScript(html, scriptContent) {
343
+ try {
344
+ const $ = cheerio.load(html, {
345
+ decodeEntities: false,
346
+ xmlMode: false,
347
+ lowerCaseTags: true
348
+ });
349
+
350
+ // Create a safe function from the script content
351
+ const extractorFunction = new Function('$', `
352
+ ${scriptContent}
353
+ `);
354
+
355
+ // Execute the script with Cheerio instance
356
+ const result = extractorFunction($);
357
+ return {
358
+ success: true,
359
+ data: result,
360
+ error: null
361
+ };
362
+ } catch (err) {
363
+ return {
364
+ success: false,
365
+ data: null,
366
+ error: err.message
367
+ };
368
+ }
369
+ }
370
+
371
+ // cherio extract endpoint
372
+ app.post('/extract', upload.single('htmlFile'), (req, res) => {
373
+ try {
374
+ const startTime = Date.now();
375
+ let htmlContent = req.file
376
+ ? req.file.buffer.toString('utf8')
377
+ : req.body.html || '';
378
+
379
+ const extractorScript = req.body.script;
380
+
381
+ if (!htmlContent.trim()) {
382
+ return res.status(400).json({ error: 'No HTML content provided.' });
383
+ }
384
+
385
+ if (!extractorScript) {
386
+ return res.status(400).json({ error: 'No extractor script provided.' });
387
+ }
388
+
389
+ const result = executeCheerioScript(htmlContent, extractorScript);
390
+
391
+ return res.json({
392
+ success: result.success,
393
+ data: result.data,
394
+ error: result.error,
395
+ processingTime: `${Date.now() - startTime}ms`
396
+ });
397
+
398
+ } catch (err) {
399
+ console.error('Extraction failed:', err);
400
+ return res.status(500).json({
401
+ success: false,
402
+ error: 'Internal server error.',
403
+ details: err.message.substring(0, 100)
404
+ });
405
+ }
406
+ });
407
+
408
+
409
  const PORT = process.env.PORT || 3000;
410
  app.listen(PORT, () => {
411
  console.log(`Server running on http://localhost:${PORT}`);