Spaces:
Sleeping
Sleeping
Update routers/scraping_router.py
Browse files- routers/scraping_router.py +28 -24
routers/scraping_router.py
CHANGED
@@ -49,46 +49,48 @@ async def generate_cheerio_script(
|
|
49 |
try:
|
50 |
example = """
|
51 |
Input HTML:
|
|
|
52 |
<div class="product-card">
|
53 |
<h2 class="title">iPhone 14</h2>
|
54 |
<span class="price">$999</span>
|
55 |
</div>
|
56 |
-
|
57 |
Input Request: "extract product title and price"
|
58 |
-
|
59 |
Expected Output:
|
60 |
<cheerio_script>
|
61 |
-
|
62 |
-
|
63 |
-
const productCard = $('.product-card');
|
64 |
-
result = {
|
65 |
-
success: true,
|
66 |
-
data: {
|
67 |
-
title: productCard.find('.title').text().trim() || null,
|
68 |
-
price: productCard.find('.price').text().trim() || null
|
69 |
-
},
|
70 |
-
error: null
|
71 |
-
};
|
72 |
-
} catch (error) {
|
73 |
-
result = {
|
74 |
success: false,
|
75 |
data: null,
|
76 |
-
error:
|
77 |
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
}
|
79 |
-
return result;
|
80 |
</cheerio_script>
|
81 |
"""
|
82 |
-
|
83 |
system_prompt = f"""You are an expert at writing Cheerio.js web scraping scripts.
|
84 |
Task: Generate a Cheerio.js script to extract {request.user_input} from the provided HTML.
|
85 |
|
86 |
Requirements:
|
87 |
-
-
|
|
|
88 |
- Use modern JavaScript syntax
|
89 |
- Include try-catch error handling
|
90 |
- Make the script reusable and efficient
|
91 |
-
- Enclose the script in <cheerio_script> tags
|
92 |
|
93 |
Here's an example of the expected format:
|
94 |
{example}
|
@@ -98,10 +100,12 @@ async def generate_cheerio_script(
|
|
98 |
|
99 |
user_prompt = f"""Generate a Cheerio.js script to extract {request.user_input}.
|
100 |
The script must:
|
101 |
-
1.
|
102 |
-
2.
|
103 |
-
3.
|
104 |
-
4.
|
|
|
|
|
105 |
|
106 |
response = ""
|
107 |
response = ai_client.chat(
|
|
|
49 |
try:
|
50 |
example = """
|
51 |
Input HTML:
|
52 |
+
<html>
|
53 |
<div class="product-card">
|
54 |
<h2 class="title">iPhone 14</h2>
|
55 |
<span class="price">$999</span>
|
56 |
</div>
|
57 |
+
</html>
|
58 |
Input Request: "extract product title and price"
|
|
|
59 |
Expected Output:
|
60 |
<cheerio_script>
|
61 |
+
function extract(input, cheerio) {
|
62 |
+
let result = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
success: false,
|
64 |
data: null,
|
65 |
+
error: null
|
66 |
};
|
67 |
+
|
68 |
+
try {
|
69 |
+
let $ = cheerio.load(input);
|
70 |
+
result.data = {
|
71 |
+
title: $('.product-card .title').text().trim() || null,
|
72 |
+
price: $('.product-card .price').text().trim() || null
|
73 |
+
};
|
74 |
+
result.success = true;
|
75 |
+
} catch (error) {
|
76 |
+
result.error = error.message;
|
77 |
+
}
|
78 |
+
|
79 |
+
return result;
|
80 |
}
|
|
|
81 |
</cheerio_script>
|
82 |
"""
|
83 |
+
|
84 |
system_prompt = f"""You are an expert at writing Cheerio.js web scraping scripts.
|
85 |
Task: Generate a Cheerio.js script to extract {request.user_input} from the provided HTML.
|
86 |
|
87 |
Requirements:
|
88 |
+
- Script must be wrapped in a function named 'extract' that takes (input, cheerio) parameters
|
89 |
+
- Return object must include: {{ success: boolean, data: object|null, error: string|null }}
|
90 |
- Use modern JavaScript syntax
|
91 |
- Include try-catch error handling
|
92 |
- Make the script reusable and efficient
|
93 |
+
- Enclose the entire script in <cheerio_script> tags
|
94 |
|
95 |
Here's an example of the expected format:
|
96 |
{example}
|
|
|
100 |
|
101 |
user_prompt = f"""Generate a Cheerio.js script to extract {request.user_input}.
|
102 |
The script must:
|
103 |
+
1. Be wrapped in a function named 'extract' that takes (input, cheerio) parameters
|
104 |
+
2. Return an object with success, data, and error fields
|
105 |
+
3. Handle missing elements by returning null
|
106 |
+
4. Use proper Cheerio selectors
|
107 |
+
5. Include error handling
|
108 |
+
6. Be enclosed in <cheerio_script> tags"""
|
109 |
|
110 |
response = ""
|
111 |
response = ai_client.chat(
|