Shreyas094
commited on
Commit
•
b688d25
1
Parent(s):
2b6834a
Update app.py
Browse files
app.py
CHANGED
@@ -31,6 +31,7 @@ import PyPDF2
|
|
31 |
import io
|
32 |
import requests
|
33 |
from duckduckgo_search import DDGS
|
|
|
34 |
|
35 |
# Load environment variables from a .env file
|
36 |
load_dotenv()
|
@@ -54,6 +55,20 @@ client = InferenceClient(
|
|
54 |
similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
|
55 |
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
def duckduckgo_search(query, num_results=10, time_range="", language="", safesearch=2):
|
58 |
try:
|
59 |
ddgs = DDGS()
|
@@ -78,13 +93,18 @@ def duckduckgo_search(query, num_results=10, time_range="", language="", safesea
|
|
78 |
else:
|
79 |
safesearch_setting = "strict"
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
results = ddgs.text(
|
82 |
query,
|
83 |
region='wt-wt',
|
84 |
safesearch=safesearch_setting,
|
85 |
timelimit=timelimit,
|
86 |
-
max_results=num_results
|
87 |
-
backend: str = "html"
|
88 |
)
|
89 |
|
90 |
return [{"url": result["href"], "title": result["title"]} for result in results]
|
|
|
31 |
import io
|
32 |
import requests
|
33 |
from duckduckgo_search import DDGS
|
34 |
+
import random
|
35 |
|
36 |
# Load environment variables from a .env file
|
37 |
load_dotenv()
|
|
|
55 |
similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
|
56 |
|
57 |
|
58 |
+
# List of common user agents
|
59 |
+
USER_AGENTS = [
|
60 |
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
61 |
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
|
62 |
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
|
63 |
+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
64 |
+
"Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1",
|
65 |
+
"Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/91.0.4472.80 Mobile/15E148 Safari/604.1",
|
66 |
+
"Mozilla/5.0 (Android 11; Mobile; rv:68.0) Gecko/68.0 Firefox/88.0",
|
67 |
+
]
|
68 |
+
|
69 |
+
def get_random_user_agent():
|
70 |
+
return random.choice(USER_AGENTS)
|
71 |
+
|
72 |
def duckduckgo_search(query, num_results=10, time_range="", language="", safesearch=2):
|
73 |
try:
|
74 |
ddgs = DDGS()
|
|
|
93 |
else:
|
94 |
safesearch_setting = "strict"
|
95 |
|
96 |
+
# Get a random user agent
|
97 |
+
user_agent = get_random_user_agent()
|
98 |
+
|
99 |
+
# Set the user agent for the DDGS instance
|
100 |
+
ddgs.session.headers.update({'User-Agent': user_agent})
|
101 |
+
|
102 |
results = ddgs.text(
|
103 |
query,
|
104 |
region='wt-wt',
|
105 |
safesearch=safesearch_setting,
|
106 |
timelimit=timelimit,
|
107 |
+
max_results=num_results
|
|
|
108 |
)
|
109 |
|
110 |
return [{"url": result["href"], "title": result["title"]} for result in results]
|