Spaces:
Runtime error
Runtime error
Commit
·
8d1a80c
1
Parent(s):
0750d7a
Add randomization to urls required to fetch all nearby pages
Browse files- If #required nearby pages < #available nearby pages, then randomize the urls in order to avoid biasing of results in one direction. Since the small circles are stacked in a hexagonal manner (and it stacks from south to north), not randomizing urls will result in cluttering of results in south with no result on north, in case # required pages < # available pages
- moved `fetch_url` to utils.py
- removed filtering based on distance on frontend side for nearby point rendering
- backend/utils.py +27 -1
- frontend/src/components/Map.js +1 -3
- main.py +19 -15
backend/utils.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
import math
|
|
|
|
|
2 |
def generate_circle_centers(center_lat, center_lon, radius_km, small_radius_km=10):
|
3 |
"""
|
4 |
Generate a list of centers of small circles (radius=10km) needed to cover a larger circle.
|
@@ -40,4 +42,28 @@ def generate_circle_centers(center_lat, center_lon, radius_km, small_radius_km=1
|
|
40 |
lon = center_lon + delta_lon
|
41 |
results.append((lat, lon))
|
42 |
|
43 |
-
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import math
|
2 |
+
import httpx
|
3 |
+
|
4 |
def generate_circle_centers(center_lat, center_lon, radius_km, small_radius_km=10):
|
5 |
"""
|
6 |
Generate a list of centers of small circles (radius=10km) needed to cover a larger circle.
|
|
|
42 |
lon = center_lon + delta_lon
|
43 |
results.append((lat, lon))
|
44 |
|
45 |
+
return results
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
async def fetch_url(client: httpx.AsyncClient, url: str):
|
50 |
+
"""
|
51 |
+
Fetch a URL asynchronously using httpx and return the response status and data.
|
52 |
+
This function is asynchrounously used to fetch multiple URLs in parallel when search radius > 10km.
|
53 |
+
Input:
|
54 |
+
- client: httpx.AsyncClient instance
|
55 |
+
- url: URL to fetch
|
56 |
+
Output:
|
57 |
+
- A dictionary with the URL, status code, and data if available.
|
58 |
+
- Data includes the JSON format of wiki geosearch response.
|
59 |
+
If an error occurs, return a dictionary with the URL and the error message.
|
60 |
+
"""
|
61 |
+
try:
|
62 |
+
response = await client.get(url, timeout=10.0)
|
63 |
+
return {
|
64 |
+
"url": url,
|
65 |
+
"status": response.status_code,
|
66 |
+
"data": response.json() if response.status_code == 200 else None,
|
67 |
+
}
|
68 |
+
except Exception as e:
|
69 |
+
return {"url": url, "error": str(e)}
|
frontend/src/components/Map.js
CHANGED
@@ -247,9 +247,7 @@ const Map = ( { onMapClick, searchQuery, contentType, setSearchQuery, setSubmitt
|
|
247 |
|
248 |
if (res.ok) {
|
249 |
const data = await res.json();
|
250 |
-
const markers = data.pages.
|
251 |
-
page => typeof page.dist === "number" && page.dist <= explorationRadius * 1000
|
252 |
-
).map(page => ({
|
253 |
position: [page.lat, page.lon],
|
254 |
title: page.title,
|
255 |
distance: page.dist
|
|
|
247 |
|
248 |
if (res.ok) {
|
249 |
const data = await res.json();
|
250 |
+
const markers = data.pages.map(page => ({
|
|
|
|
|
251 |
position: [page.lat, page.lon],
|
252 |
title: page.title,
|
253 |
distance: page.dist
|
main.py
CHANGED
@@ -8,7 +8,8 @@ import geopy.distance
|
|
8 |
from cachetools import TTLCache
|
9 |
import os
|
10 |
from dotenv import load_dotenv
|
11 |
-
from
|
|
|
12 |
|
13 |
load_dotenv()
|
14 |
|
@@ -175,16 +176,7 @@ def get_geodistance(payload: Geodistance):
|
|
175 |
)
|
176 |
|
177 |
|
178 |
-
|
179 |
-
try:
|
180 |
-
response = await client.get(url, timeout=10.0)
|
181 |
-
return {
|
182 |
-
"url": url,
|
183 |
-
"status": response.status_code,
|
184 |
-
"data": response.json() if response.status_code == 200 else None,
|
185 |
-
}
|
186 |
-
except Exception as e:
|
187 |
-
return {"url": url, "error": str(e)}
|
188 |
|
189 |
@app.post("/wiki/nearby")
|
190 |
async def get_nearby_wiki_pages(payload: NearbyWikiPage):
|
@@ -217,7 +209,9 @@ async def get_nearby_wiki_pages(payload: NearbyWikiPage):
|
|
217 |
radius = payload.radius
|
218 |
limit = payload.limit
|
219 |
|
220 |
-
|
|
|
|
|
221 |
url = ("https://en.wikipedia.org/w/api.php"+"?action=query"
|
222 |
"&list=geosearch"
|
223 |
f"&gscoord={lat_center}|{lon_center}"
|
@@ -248,11 +242,14 @@ async def get_nearby_wiki_pages(payload: NearbyWikiPage):
|
|
248 |
content={"error": str(e)},
|
249 |
status_code=500
|
250 |
)
|
251 |
-
|
|
|
|
|
252 |
small_circle_centers = generate_circle_centers(lat_center, lon_center, radius / 1000, small_radius_km=10)
|
253 |
all_pages = []
|
254 |
base_url = "https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gscoord={lat}|{lon}&gsradius={small_radius_km}&gslimit={page_limit}&format=json"
|
255 |
-
urls = [base_url.format(lat=center[0], lon=center[1], small_radius_km=
|
|
|
256 |
|
257 |
print("URL Counts:", len(urls))
|
258 |
try:
|
@@ -263,18 +260,25 @@ async def get_nearby_wiki_pages(payload: NearbyWikiPage):
|
|
263 |
# print(results)
|
264 |
for result in results:
|
265 |
for unit in result.get("data", {}).get("query", {}).get("geosearch", []):
|
|
|
|
|
266 |
lat, lon = unit.get("lat"), unit.get("lon")
|
267 |
if lat is not None and lon is not None:
|
268 |
dist = int(geopy.distance.distance(
|
269 |
(lat_center, lon_center), (lat, lon)
|
270 |
).m)
|
271 |
-
print(dist)
|
272 |
else:
|
273 |
dist = None
|
274 |
|
|
|
|
|
|
|
275 |
unit_with_dist = {**unit, "dist": dist}
|
276 |
all_pages.append(unit_with_dist)
|
277 |
|
|
|
|
|
278 |
return JSONResponse(
|
279 |
content={
|
280 |
"pages": all_pages,
|
|
|
8 |
from cachetools import TTLCache
|
9 |
import os
|
10 |
from dotenv import load_dotenv
|
11 |
+
from random import shuffle
|
12 |
+
from backend.utils import generate_circle_centers, fetch_url
|
13 |
|
14 |
load_dotenv()
|
15 |
|
|
|
176 |
)
|
177 |
|
178 |
|
179 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
|
181 |
@app.post("/wiki/nearby")
|
182 |
async def get_nearby_wiki_pages(payload: NearbyWikiPage):
|
|
|
209 |
radius = payload.radius
|
210 |
limit = payload.limit
|
211 |
|
212 |
+
wiki_geosearch_radius_limit_meters = 10000 # Wikipedia API limit for geosearch radius in meters
|
213 |
+
|
214 |
+
if radius <= wiki_geosearch_radius_limit_meters:
|
215 |
url = ("https://en.wikipedia.org/w/api.php"+"?action=query"
|
216 |
"&list=geosearch"
|
217 |
f"&gscoord={lat_center}|{lon_center}"
|
|
|
242 |
content={"error": str(e)},
|
243 |
status_code=500
|
244 |
)
|
245 |
+
|
246 |
+
elif radius > wiki_geosearch_radius_limit_meters:
|
247 |
+
print(radius)
|
248 |
small_circle_centers = generate_circle_centers(lat_center, lon_center, radius / 1000, small_radius_km=10)
|
249 |
all_pages = []
|
250 |
base_url = "https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gscoord={lat}|{lon}&gsradius={small_radius_km}&gslimit={page_limit}&format=json"
|
251 |
+
urls = [base_url.format(lat=center[0], lon=center[1], small_radius_km=wiki_geosearch_radius_limit_meters, page_limit=100) for center in small_circle_centers]
|
252 |
+
shuffle(urls) # If # available pages > # requested pages by user, randomize the results to avoid clustering around a single direction.
|
253 |
|
254 |
print("URL Counts:", len(urls))
|
255 |
try:
|
|
|
260 |
# print(results)
|
261 |
for result in results:
|
262 |
for unit in result.get("data", {}).get("query", {}).get("geosearch", []):
|
263 |
+
if len(all_pages) >= limit:
|
264 |
+
break
|
265 |
lat, lon = unit.get("lat"), unit.get("lon")
|
266 |
if lat is not None and lon is not None:
|
267 |
dist = int(geopy.distance.distance(
|
268 |
(lat_center, lon_center), (lat, lon)
|
269 |
).m)
|
270 |
+
# print(dist)
|
271 |
else:
|
272 |
dist = None
|
273 |
|
274 |
+
if (not dist) or (dist and dist > radius):
|
275 |
+
continue
|
276 |
+
|
277 |
unit_with_dist = {**unit, "dist": dist}
|
278 |
all_pages.append(unit_with_dist)
|
279 |
|
280 |
+
# print(all_pages)
|
281 |
+
|
282 |
return JSONResponse(
|
283 |
content={
|
284 |
"pages": all_pages,
|