DebasishDhal99 commited on
Commit
8d1a80c
·
1 Parent(s): 0750d7a

Add randomization to urls required to fetch all nearby pages

Browse files

- If #required nearby pages < #available nearby pages, then randomize the urls in order to avoid biasing of results in one direction. Since the small circles are stacked in a hexagonal manner (and it stacks from south to north), not randomizing urls will result in cluttering of results in south with no result on north, in case # required pages < # available pages

- moved `fetch_url` to utils.py

- removed filtering based on distance on frontend side for nearby point rendering

Files changed (3) hide show
  1. backend/utils.py +27 -1
  2. frontend/src/components/Map.js +1 -3
  3. main.py +19 -15
backend/utils.py CHANGED
@@ -1,4 +1,6 @@
1
  import math
 
 
2
  def generate_circle_centers(center_lat, center_lon, radius_km, small_radius_km=10):
3
  """
4
  Generate a list of centers of small circles (radius=10km) needed to cover a larger circle.
@@ -40,4 +42,28 @@ def generate_circle_centers(center_lat, center_lon, radius_km, small_radius_km=1
40
  lon = center_lon + delta_lon
41
  results.append((lat, lon))
42
 
43
- return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import math
2
+ import httpx
3
+
4
  def generate_circle_centers(center_lat, center_lon, radius_km, small_radius_km=10):
5
  """
6
  Generate a list of centers of small circles (radius=10km) needed to cover a larger circle.
 
42
  lon = center_lon + delta_lon
43
  results.append((lat, lon))
44
 
45
+ return results
46
+
47
+
48
+
49
+ async def fetch_url(client: httpx.AsyncClient, url: str):
50
+ """
51
+ Fetch a URL asynchronously using httpx and return the response status and data.
52
+ This function is asynchrounously used to fetch multiple URLs in parallel when search radius > 10km.
53
+ Input:
54
+ - client: httpx.AsyncClient instance
55
+ - url: URL to fetch
56
+ Output:
57
+ - A dictionary with the URL, status code, and data if available.
58
+ - Data includes the JSON format of wiki geosearch response.
59
+ If an error occurs, return a dictionary with the URL and the error message.
60
+ """
61
+ try:
62
+ response = await client.get(url, timeout=10.0)
63
+ return {
64
+ "url": url,
65
+ "status": response.status_code,
66
+ "data": response.json() if response.status_code == 200 else None,
67
+ }
68
+ except Exception as e:
69
+ return {"url": url, "error": str(e)}
frontend/src/components/Map.js CHANGED
@@ -247,9 +247,7 @@ const Map = ( { onMapClick, searchQuery, contentType, setSearchQuery, setSubmitt
247
 
248
  if (res.ok) {
249
  const data = await res.json();
250
- const markers = data.pages.filter(
251
- page => typeof page.dist === "number" && page.dist <= explorationRadius * 1000
252
- ).map(page => ({
253
  position: [page.lat, page.lon],
254
  title: page.title,
255
  distance: page.dist
 
247
 
248
  if (res.ok) {
249
  const data = await res.json();
250
+ const markers = data.pages.map(page => ({
 
 
251
  position: [page.lat, page.lon],
252
  title: page.title,
253
  distance: page.dist
main.py CHANGED
@@ -8,7 +8,8 @@ import geopy.distance
8
  from cachetools import TTLCache
9
  import os
10
  from dotenv import load_dotenv
11
- from backend.utils import generate_circle_centers
 
12
 
13
  load_dotenv()
14
 
@@ -175,16 +176,7 @@ def get_geodistance(payload: Geodistance):
175
  )
176
 
177
 
178
- async def fetch_url(client: httpx.AsyncClient, url: str):
179
- try:
180
- response = await client.get(url, timeout=10.0)
181
- return {
182
- "url": url,
183
- "status": response.status_code,
184
- "data": response.json() if response.status_code == 200 else None,
185
- }
186
- except Exception as e:
187
- return {"url": url, "error": str(e)}
188
 
189
  @app.post("/wiki/nearby")
190
  async def get_nearby_wiki_pages(payload: NearbyWikiPage):
@@ -217,7 +209,9 @@ async def get_nearby_wiki_pages(payload: NearbyWikiPage):
217
  radius = payload.radius
218
  limit = payload.limit
219
 
220
- if radius <= 10000:
 
 
221
  url = ("https://en.wikipedia.org/w/api.php"+"?action=query"
222
  "&list=geosearch"
223
  f"&gscoord={lat_center}|{lon_center}"
@@ -248,11 +242,14 @@ async def get_nearby_wiki_pages(payload: NearbyWikiPage):
248
  content={"error": str(e)},
249
  status_code=500
250
  )
251
- elif radius > 10000:
 
 
252
  small_circle_centers = generate_circle_centers(lat_center, lon_center, radius / 1000, small_radius_km=10)
253
  all_pages = []
254
  base_url = "https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gscoord={lat}|{lon}&gsradius={small_radius_km}&gslimit={page_limit}&format=json"
255
- urls = [base_url.format(lat=center[0], lon=center[1], small_radius_km=10*1000, page_limit=100) for center in small_circle_centers]
 
256
 
257
  print("URL Counts:", len(urls))
258
  try:
@@ -263,18 +260,25 @@ async def get_nearby_wiki_pages(payload: NearbyWikiPage):
263
  # print(results)
264
  for result in results:
265
  for unit in result.get("data", {}).get("query", {}).get("geosearch", []):
 
 
266
  lat, lon = unit.get("lat"), unit.get("lon")
267
  if lat is not None and lon is not None:
268
  dist = int(geopy.distance.distance(
269
  (lat_center, lon_center), (lat, lon)
270
  ).m)
271
- print(dist)
272
  else:
273
  dist = None
274
 
 
 
 
275
  unit_with_dist = {**unit, "dist": dist}
276
  all_pages.append(unit_with_dist)
277
 
 
 
278
  return JSONResponse(
279
  content={
280
  "pages": all_pages,
 
8
  from cachetools import TTLCache
9
  import os
10
  from dotenv import load_dotenv
11
+ from random import shuffle
12
+ from backend.utils import generate_circle_centers, fetch_url
13
 
14
  load_dotenv()
15
 
 
176
  )
177
 
178
 
179
+
 
 
 
 
 
 
 
 
 
180
 
181
  @app.post("/wiki/nearby")
182
  async def get_nearby_wiki_pages(payload: NearbyWikiPage):
 
209
  radius = payload.radius
210
  limit = payload.limit
211
 
212
+ wiki_geosearch_radius_limit_meters = 10000 # Wikipedia API limit for geosearch radius in meters
213
+
214
+ if radius <= wiki_geosearch_radius_limit_meters:
215
  url = ("https://en.wikipedia.org/w/api.php"+"?action=query"
216
  "&list=geosearch"
217
  f"&gscoord={lat_center}|{lon_center}"
 
242
  content={"error": str(e)},
243
  status_code=500
244
  )
245
+
246
+ elif radius > wiki_geosearch_radius_limit_meters:
247
+ print(radius)
248
  small_circle_centers = generate_circle_centers(lat_center, lon_center, radius / 1000, small_radius_km=10)
249
  all_pages = []
250
  base_url = "https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gscoord={lat}|{lon}&gsradius={small_radius_km}&gslimit={page_limit}&format=json"
251
+ urls = [base_url.format(lat=center[0], lon=center[1], small_radius_km=wiki_geosearch_radius_limit_meters, page_limit=100) for center in small_circle_centers]
252
+ shuffle(urls) # If # available pages > # requested pages by user, randomize the results to avoid clustering around a single direction.
253
 
254
  print("URL Counts:", len(urls))
255
  try:
 
260
  # print(results)
261
  for result in results:
262
  for unit in result.get("data", {}).get("query", {}).get("geosearch", []):
263
+ if len(all_pages) >= limit:
264
+ break
265
  lat, lon = unit.get("lat"), unit.get("lon")
266
  if lat is not None and lon is not None:
267
  dist = int(geopy.distance.distance(
268
  (lat_center, lon_center), (lat, lon)
269
  ).m)
270
+ # print(dist)
271
  else:
272
  dist = None
273
 
274
+ if (not dist) or (dist and dist > radius):
275
+ continue
276
+
277
  unit_with_dist = {**unit, "dist": dist}
278
  all_pages.append(unit_with_dist)
279
 
280
+ # print(all_pages)
281
+
282
  return JSONResponse(
283
  content={
284
  "pages": all_pages,