pvanand commited on
Commit
b22edd2
·
verified ·
1 Parent(s): 2f35e5f

Update index.js

Browse files
Files changed (1) hide show
  1. index.js +34 -17
index.js CHANGED
@@ -338,27 +338,45 @@ app.post('/process', upload.single('htmlFile'), (req, res) => {
338
  }
339
  });
340
 
341
- // Add this function to handle script execution
 
 
 
 
 
 
 
 
342
  function executeCheerioScript(html, scriptContent) {
343
  try {
344
- const $ = cheerio.load(html, {
345
- decodeEntities: false,
346
- xmlMode: false,
347
- lowerCaseTags: true
348
- });
 
 
349
 
350
- // Create a safe function from the script content
351
- const extractorFunction = new Function('$', `
352
  ${scriptContent}
 
353
  `);
354
 
355
- // Execute the script with Cheerio instance
356
- const result = extractorFunction($);
357
- return {
358
- success: true,
359
- data: result,
360
- error: null
361
- };
 
 
 
 
 
 
 
362
  } catch (err) {
363
  return {
364
  success: false,
@@ -368,7 +386,7 @@ function executeCheerioScript(html, scriptContent) {
368
  }
369
  }
370
 
371
- // cherio extract endpoint
372
  app.post('/extract', upload.single('htmlFile'), (req, res) => {
373
  try {
374
  const startTime = Date.now();
@@ -405,7 +423,6 @@ app.post('/extract', upload.single('htmlFile'), (req, res) => {
405
  }
406
  });
407
 
408
-
409
  const PORT = process.env.PORT || 3000;
410
  app.listen(PORT, () => {
411
  console.log(`Server running on http://localhost:${PORT}`);
 
338
  }
339
  });
340
 
341
+ // Helper function to validate script structure
342
+ function validateScript(scriptContent) {
343
+ // Check if script contains function declaration
344
+ if (!scriptContent.includes('function extract(')) {
345
+ throw new Error('Script must contain a function named "extract"');
346
+ }
347
+ }
348
+
349
+ // Modified function to execute the extract function
350
  function executeCheerioScript(html, scriptContent) {
351
  try {
352
+ validateScript(scriptContent);
353
+
354
+ // Create a safe context for the script
355
+ const context = {
356
+ cheerio,
357
+ input: html
358
+ };
359
 
360
+ // Create the function from script content
361
+ const extractorFunction = new Function('input', 'cheerio', `
362
  ${scriptContent}
363
+ return extract(input, cheerio);
364
  `);
365
 
366
+ // Execute the script with provided parameters
367
+ const result = extractorFunction(html, cheerio);
368
+
369
+ // Validate result structure
370
+ if (!result || typeof result !== 'object') {
371
+ throw new Error('Extract function must return an object');
372
+ }
373
+
374
+ if (!('success' in result && 'data' in result && 'error' in result)) {
375
+ throw new Error('Return object must contain success, data, and error fields');
376
+ }
377
+
378
+ return result;
379
+
380
  } catch (err) {
381
  return {
382
  success: false,
 
386
  }
387
  }
388
 
389
+ // Updated endpoint
390
  app.post('/extract', upload.single('htmlFile'), (req, res) => {
391
  try {
392
  const startTime = Date.now();
 
423
  }
424
  });
425
 
 
426
  const PORT = process.env.PORT || 3000;
427
  app.listen(PORT, () => {
428
  console.log(`Server running on http://localhost:${PORT}`);