Update index.js
Browse files
index.js
CHANGED
@@ -11,11 +11,16 @@ app.use(cors());
|
|
11 |
|
12 |
// Utility function untuk mengekstrak konten dari URL
|
13 |
async function extractContentFromUrl(url, browser) {
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
try {
|
17 |
-
//
|
18 |
-
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36');
|
19 |
|
20 |
// Navigate ke URL dengan timeout
|
21 |
await page.goto(url, {
|
@@ -98,7 +103,8 @@ async function extractContentFromUrl(url, browser) {
|
|
98 |
extractedAt: new Date().toISOString()
|
99 |
};
|
100 |
} finally {
|
101 |
-
|
|
|
102 |
}
|
103 |
}
|
104 |
|
@@ -154,7 +160,6 @@ app.post('/extract-content', async (req, res) => {
|
|
154 |
try {
|
155 |
// Launch browser
|
156 |
browser = await chromium.launch({
|
157 |
-
headless: true,
|
158 |
args: [
|
159 |
'--incognito',
|
160 |
'--single-process',
|
@@ -219,7 +224,7 @@ app.get('/health', (req, res) => {
|
|
219 |
|
220 |
// Root endpoint
|
221 |
app.get('/', (req, res) => {
|
222 |
-
res.json({
|
223 |
success: true,
|
224 |
message: 'Content Extractor API',
|
225 |
endpoints: {
|
@@ -234,7 +239,7 @@ app.get('/', (req, res) => {
|
|
234 |
urls: ['https://example.com', 'https://another-site.com']
|
235 |
}
|
236 |
}
|
237 |
-
});
|
238 |
});
|
239 |
|
240 |
// Error handling middleware
|
@@ -247,7 +252,7 @@ app.use((err, req, res, next) => {
|
|
247 |
});
|
248 |
});
|
249 |
|
250 |
-
// 404 handler
|
251 |
app.use((req, res) => {
|
252 |
res.status(404).json({
|
253 |
success: false,
|
@@ -260,4 +265,4 @@ app.listen(PORT, () => {
|
|
260 |
console.log(`π Content Extractor API running on port ${PORT}`);
|
261 |
console.log(`π API Documentation: http://localhost:${PORT}`);
|
262 |
console.log(`π₯ Health Check: http://localhost:${PORT}/health`);
|
263 |
-
});
|
|
|
11 |
|
12 |
// Utility function untuk mengekstrak konten dari URL
|
13 |
async function extractContentFromUrl(url, browser) {
|
14 |
+
// MODIFIKASI: Buat konteks baru untuk setiap URL. Ini lebih aman dan memungkinkan isolasi.
|
15 |
+
const context = await browser.newContext({
|
16 |
+
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
17 |
+
});
|
18 |
+
|
19 |
+
// MODIFIKASI: Buat halaman dari konteks yang sudah memiliki user agent.
|
20 |
+
const page = await context.newPage();
|
21 |
|
22 |
try {
|
23 |
+
// Baris "await page.setUserAgent(...)" yang lama sudah dihapus.
|
|
|
24 |
|
25 |
// Navigate ke URL dengan timeout
|
26 |
await page.goto(url, {
|
|
|
103 |
extractedAt: new Date().toISOString()
|
104 |
};
|
105 |
} finally {
|
106 |
+
// MODIFIKASI: Tutup konteksnya. Ini akan otomatis menutup halaman yang dibuat dari konteks ini.
|
107 |
+
await context.close();
|
108 |
}
|
109 |
}
|
110 |
|
|
|
160 |
try {
|
161 |
// Launch browser
|
162 |
browser = await chromium.launch({
|
|
|
163 |
args: [
|
164 |
'--incognito',
|
165 |
'--single-process',
|
|
|
224 |
|
225 |
// Root endpoint
|
226 |
app.get('/', (req, res) => {
|
227 |
+
res.json(JSON.stringify({
|
228 |
success: true,
|
229 |
message: 'Content Extractor API',
|
230 |
endpoints: {
|
|
|
239 |
urls: ['https://example.com', 'https://another-site.com']
|
240 |
}
|
241 |
}
|
242 |
+
}), 0, 2);
|
243 |
});
|
244 |
|
245 |
// Error handling middleware
|
|
|
252 |
});
|
253 |
});
|
254 |
|
255 |
+
// 404 handler
|
256 |
app.use((req, res) => {
|
257 |
res.status(404).json({
|
258 |
success: false,
|
|
|
265 |
console.log(`π Content Extractor API running on port ${PORT}`);
|
266 |
console.log(`π API Documentation: http://localhost:${PORT}`);
|
267 |
console.log(`π₯ Health Check: http://localhost:${PORT}/health`);
|
268 |
+
});
|