pvanand commited on
Commit
7924068
·
verified ·
1 Parent(s): 0ce5866

Update routers/scraping_router.py

Browse files
Files changed (1) hide show
  1. routers/scraping_router.py +28 -24
routers/scraping_router.py CHANGED
@@ -49,46 +49,48 @@ async def generate_cheerio_script(
49
  try:
50
  example = """
51
  Input HTML:
 
52
  <div class="product-card">
53
  <h2 class="title">iPhone 14</h2>
54
  <span class="price">$999</span>
55
  </div>
56
-
57
  Input Request: "extract product title and price"
58
-
59
  Expected Output:
60
  <cheerio_script>
61
- let result = {};
62
- try {
63
- const productCard = $('.product-card');
64
- result = {
65
- success: true,
66
- data: {
67
- title: productCard.find('.title').text().trim() || null,
68
- price: productCard.find('.price').text().trim() || null
69
- },
70
- error: null
71
- };
72
- } catch (error) {
73
- result = {
74
  success: false,
75
  data: null,
76
- error: error.message
77
  };
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  }
79
- return result;
80
  </cheerio_script>
81
  """
82
-
83
  system_prompt = f"""You are an expert at writing Cheerio.js web scraping scripts.
84
  Task: Generate a Cheerio.js script to extract {request.user_input} from the provided HTML.
85
 
86
  Requirements:
87
- - Return a dictionary/object with the structure: {{ success: boolean, data: object|null, error: string|null }}
 
88
  - Use modern JavaScript syntax
89
  - Include try-catch error handling
90
  - Make the script reusable and efficient
91
- - Enclose the script in <cheerio_script> tags
92
 
93
  Here's an example of the expected format:
94
  {example}
@@ -98,10 +100,12 @@ async def generate_cheerio_script(
98
 
99
  user_prompt = f"""Generate a Cheerio.js script to extract {request.user_input}.
100
  The script must:
101
- 1. Return a dictionary/object with success, data, and error fields
102
- 2. Handle missing elements gracefully
103
- 3. Use proper Cheerio selectors
104
- 4. Be enclosed in <cheerio_script> tags"""
 
 
105
 
106
  response = ""
107
  response = ai_client.chat(
 
49
  try:
50
  example = """
51
  Input HTML:
52
+ <html>
53
  <div class="product-card">
54
  <h2 class="title">iPhone 14</h2>
55
  <span class="price">$999</span>
56
  </div>
57
+ </html>
58
  Input Request: "extract product title and price"
 
59
  Expected Output:
60
  <cheerio_script>
61
+ function extract(input, cheerio) {
62
+ let result = {
 
 
 
 
 
 
 
 
 
 
 
63
  success: false,
64
  data: null,
65
+ error: null
66
  };
67
+
68
+ try {
69
+ let $ = cheerio.load(input);
70
+ result.data = {
71
+ title: $('.product-card .title').text().trim() || null,
72
+ price: $('.product-card .price').text().trim() || null
73
+ };
74
+ result.success = true;
75
+ } catch (error) {
76
+ result.error = error.message;
77
+ }
78
+
79
+ return result;
80
  }
 
81
  </cheerio_script>
82
  """
83
+
84
  system_prompt = f"""You are an expert at writing Cheerio.js web scraping scripts.
85
  Task: Generate a Cheerio.js script to extract {request.user_input} from the provided HTML.
86
 
87
  Requirements:
88
+ - Script must be wrapped in a function named 'extract' that takes (input, cheerio) parameters
89
+ - Return object must include: {{ success: boolean, data: object|null, error: string|null }}
90
  - Use modern JavaScript syntax
91
  - Include try-catch error handling
92
  - Make the script reusable and efficient
93
+ - Enclose the entire script in <cheerio_script> tags
94
 
95
  Here's an example of the expected format:
96
  {example}
 
100
 
101
  user_prompt = f"""Generate a Cheerio.js script to extract {request.user_input}.
102
  The script must:
103
+ 1. Be wrapped in a function named 'extract' that takes (input, cheerio) parameters
104
+ 2. Return an object with success, data, and error fields
105
+ 3. Handle missing elements by returning null
106
+ 4. Use proper Cheerio selectors
107
+ 5. Include error handling
108
+ 6. Be enclosed in <cheerio_script> tags"""
109
 
110
  response = ""
111
  response = ai_client.chat(