Update data_integration.py
Browse files- data_integration.py +15 -0
data_integration.py
CHANGED
@@ -10,6 +10,21 @@ custom_headers = {
|
|
10 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
|
11 |
}
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def get_soup(url):
|
14 |
response = requests.get(url, headers=custom_headers)
|
15 |
|
|
|
10 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
|
11 |
}
|
12 |
|
13 |
+
|
14 |
+
import time
|
15 |
+
from itertools import cycle
|
16 |
+
import requests
|
17 |
+
|
18 |
+
proxy_list = ["http://proxy1.com", "http://proxy2.com", "http://proxy3.com"]
|
19 |
+
proxy_pool = cycle(proxy_list)
|
20 |
+
|
21 |
+
for i in range(10):
|
22 |
+
proxy = next(proxy_pool)
|
23 |
+
url = "https://www.amazon.in/Dell-Inspiron-i7-1255U-Processor-Platinum/product-reviews/B0C9F142V6/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&revie"
|
24 |
+
response = requests.get(url, proxies={"http": proxy, "https": proxy}, headers=custom_headers)
|
25 |
+
print(response.status_code)
|
26 |
+
time.sleep(2) # Wait for 2 seconds between requests
|
27 |
+
|
28 |
def get_soup(url):
|
29 |
response = requests.get(url, headers=custom_headers)
|
30 |
|