Game4all commited on
Commit
a007a27
·
1 Parent(s): 8d6fbc5

Remove useless endpoint

Browse files
Files changed (3) hide show
  1. app.py +25 -41
  2. requirements.txt +0 -1
  3. search.py → serp.py +2 -8
app.py CHANGED
@@ -1,8 +1,7 @@
1
  from contextlib import asynccontextmanager
2
  from typing import Optional
3
- import expiringdict
4
  from fastapi import APIRouter, FastAPI
5
- from fastapi.routing import APIRouter as Router
6
  import httpx
7
  from pydantic import BaseModel, Field
8
  from playwright.async_api import async_playwright, Browser, BrowserContext, Page
@@ -10,7 +9,7 @@ import logging
10
  import uvicorn
11
 
12
  from scrap import scrap_patent_async, scrap_patent_bulk_async
13
- from search import APISearchResults, APIPatentResults, query_bing_search, query_brave_search, query_ddg_search, query_google_patents
14
 
15
  logging.basicConfig(
16
  level=logging.INFO,
@@ -37,42 +36,31 @@ async def api_lifespan(app: FastAPI):
37
  await pw_browser.close()
38
  await playwright.stop()
39
 
40
- app = FastAPI(lifespan=api_lifespan)
41
- backend_status = expiringdict.ExpiringDict(max_len=5, max_age_seconds=15*60)
42
 
43
  # Router for scrapping related endpoints
44
  scrap_router = APIRouter(prefix="/scrap", tags=["scrapping"])
45
- # Router for search related endpoints
46
- search_router = APIRouter(prefix="/search", tags=["search"])
47
-
48
-
49
- @app.get('/')
50
- async def status():
51
- backend_keys = [k[0] for k in backend_status.items()]
52
- backend_status_dict = {}
53
-
54
- for k in backend_keys:
55
- backend_status_dict[k] = backend_status.get(k)
56
- return {"status": "running", "backend_status": backend_status_dict}
57
 
58
  # ===================== Search endpoints =====================
59
 
60
 
61
- class APISearchParams(BaseModel):
62
  queries: list[str] = Field(...,
63
  description="The list of queries to search for")
64
  n_results: int = Field(
65
  10, description="Number of results to return for each query. Valid values are 10, 25, 50 and 100")
66
 
67
 
68
- @search_router.post("/search_scholar")
69
- async def query_google_scholar(params: APISearchParams):
70
  """Queries google scholar for the specified query"""
71
  return {"error": "Unimplemented"}
72
 
73
 
74
- @search_router.post("/search_patents")
75
- async def search_patents(params: APISearchParams) -> APIPatentResults:
76
  """Searches google patents for the specified queries and returns the found documents."""
77
  results = []
78
  for q in params.queries:
@@ -81,14 +69,13 @@ async def search_patents(params: APISearchParams) -> APIPatentResults:
81
  res = await query_google_patents(pw_browser, q, params.n_results)
82
  results.extend(res)
83
  except Exception as e:
84
- backend_status["gpatents"] = "rate-limited"
85
  logging.error(
86
  f"Failed to query Google Patents with query `{q}`: {e}")
87
- return APIPatentResults(results=results, error=None)
88
 
89
 
90
- @search_router.post("/search_brave")
91
- async def search_brave(params: APISearchParams) -> APISearchResults:
92
  """Searches brave search for the specified queries and returns the found documents."""
93
  results = []
94
  last_exception: Optional[Exception] = None
@@ -99,15 +86,14 @@ async def search_brave(params: APISearchParams) -> APISearchResults:
99
  results.extend(res)
100
  except Exception as e:
101
  last_exception = e
102
- backend_status["brave"] = "rate-limited"
103
  logging.error(
104
  f"Failed to query Brave search with query `{q}`: {e}")
105
 
106
- return APISearchResults(results=results, error=str(last_exception) if len(results) == 0 and last_exception else None)
107
 
108
 
109
- @search_router.post("/search_bing")
110
- async def search_bing(params: APISearchParams) -> APISearchResults:
111
  """Searches Bing search for the specified queries and returns the found documents."""
112
  results = []
113
  last_exception: Optional[Exception] = None
@@ -118,15 +104,14 @@ async def search_bing(params: APISearchParams) -> APISearchResults:
118
  results.extend(res)
119
  except Exception as e:
120
  last_exception = e
121
- backend_status["bing"] = "rate-limited"
122
  logging.error(
123
  f"Failed to query Bing search with query `{q}`: {e}")
124
 
125
- return APISearchResults(results=results, error=str(last_exception) if len(results) == 0 and last_exception else None)
126
 
127
 
128
- @search_router.post("/search_duck")
129
- async def search_duck(params: APISearchParams) -> APISearchResults:
130
  """Searches duckduckgo for the specified queries and returns the found documents"""
131
  results = []
132
  last_exception: Optional[Exception] = None
@@ -138,15 +123,14 @@ async def search_duck(params: APISearchParams) -> APISearchResults:
138
  results.extend(res)
139
  except Exception as e:
140
  last_exception = e
141
- backend_status["duckduckgo"] = "rate-limited"
142
  logging.error(f"Failed to query DDG with query `{q}`: {e}")
143
 
144
- return APISearchResults(results=results, error=str(last_exception) if len(results) == 0 and last_exception else None)
145
 
146
 
147
- @search_router.post("/search")
148
  @app.post("/search")
149
- async def search(params: APISearchParams):
150
  """Attempts to search the specified queries using ALL backends"""
151
  results = []
152
 
@@ -180,9 +164,9 @@ async def search(params: APISearchParams):
180
  logging.info("Trying with next browser backend.")
181
 
182
  if len(results) == 0:
183
- return APISearchResults(results=[], error="All backends are rate-limited.")
184
 
185
- return APISearchResults(results=results, error=None)
186
 
187
  # =========================== Scrapping endpoints ===========================
188
 
@@ -209,7 +193,7 @@ async def scrap_patents(params: ScrapPatentsRequest):
209
 
210
  # ===============================================================================
211
 
212
- app.include_router(search_router)
213
  app.include_router(scrap_router)
214
 
215
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  from contextlib import asynccontextmanager
2
  from typing import Optional
 
3
  from fastapi import APIRouter, FastAPI
4
+ from fastapi.routing import APIRouter
5
  import httpx
6
  from pydantic import BaseModel, Field
7
  from playwright.async_api import async_playwright, Browser, BrowserContext, Page
 
9
  import uvicorn
10
 
11
  from scrap import scrap_patent_async, scrap_patent_bulk_async
12
+ from serp import SerpResults, query_bing_search, query_brave_search, query_ddg_search, query_google_patents
13
 
14
  logging.basicConfig(
15
  level=logging.INFO,
 
36
  await pw_browser.close()
37
  await playwright.stop()
38
 
39
+ app = FastAPI(lifespan=api_lifespan, docs_url="/")
 
40
 
41
  # Router for scrapping related endpoints
42
  scrap_router = APIRouter(prefix="/scrap", tags=["scrapping"])
43
+ # Router for SERP-scrapping related endpoints
44
+ serp_router = APIRouter(prefix="/serp", tags=["serp scrapping"])
 
 
 
 
 
 
 
 
 
 
45
 
46
  # ===================== Search endpoints =====================
47
 
48
 
49
+ class SerpQuery(BaseModel):
50
  queries: list[str] = Field(...,
51
  description="The list of queries to search for")
52
  n_results: int = Field(
53
  10, description="Number of results to return for each query. Valid values are 10, 25, 50 and 100")
54
 
55
 
56
+ @serp_router.post("/search_scholar")
57
+ async def query_google_scholar(params: SerpQuery):
58
  """Queries google scholar for the specified query"""
59
  return {"error": "Unimplemented"}
60
 
61
 
62
+ @serp_router.post("/search_patents")
63
+ async def search_patents(params: SerpQuery) -> SerpResults:
64
  """Searches google patents for the specified queries and returns the found documents."""
65
  results = []
66
  for q in params.queries:
 
69
  res = await query_google_patents(pw_browser, q, params.n_results)
70
  results.extend(res)
71
  except Exception as e:
 
72
  logging.error(
73
  f"Failed to query Google Patents with query `{q}`: {e}")
74
+ return SerpResults(results=results, error=None)
75
 
76
 
77
+ @serp_router.post("/search_brave")
78
+ async def search_brave(params: SerpQuery) -> SerpResults:
79
  """Searches brave search for the specified queries and returns the found documents."""
80
  results = []
81
  last_exception: Optional[Exception] = None
 
86
  results.extend(res)
87
  except Exception as e:
88
  last_exception = e
 
89
  logging.error(
90
  f"Failed to query Brave search with query `{q}`: {e}")
91
 
92
+ return SerpResults(results=results, error=str(last_exception) if len(results) == 0 and last_exception else None)
93
 
94
 
95
+ @serp_router.post("/search_bing")
96
+ async def search_bing(params: SerpQuery) -> SerpResults:
97
  """Searches Bing search for the specified queries and returns the found documents."""
98
  results = []
99
  last_exception: Optional[Exception] = None
 
104
  results.extend(res)
105
  except Exception as e:
106
  last_exception = e
 
107
  logging.error(
108
  f"Failed to query Bing search with query `{q}`: {e}")
109
 
110
+ return SerpResults(results=results, error=str(last_exception) if len(results) == 0 and last_exception else None)
111
 
112
 
113
+ @serp_router.post("/search_duck")
114
+ async def search_duck(params: SerpQuery) -> SerpResults:
115
  """Searches duckduckgo for the specified queries and returns the found documents"""
116
  results = []
117
  last_exception: Optional[Exception] = None
 
123
  results.extend(res)
124
  except Exception as e:
125
  last_exception = e
 
126
  logging.error(f"Failed to query DDG with query `{q}`: {e}")
127
 
128
+ return SerpResults(results=results, error=str(last_exception) if len(results) == 0 and last_exception else None)
129
 
130
 
131
+ @serp_router.post("/search")
132
  @app.post("/search")
133
+ async def search(params: SerpQuery):
134
  """Attempts to search the specified queries using ALL backends"""
135
  results = []
136
 
 
164
  logging.info("Trying with next browser backend.")
165
 
166
  if len(results) == 0:
167
+ return SerpResults(results=[], error="All backends are rate-limited.")
168
 
169
+ return SerpResults(results=results, error=None)
170
 
171
  # =========================== Scrapping endpoints ===========================
172
 
 
193
 
194
  # ===============================================================================
195
 
196
+ app.include_router(serp_router)
197
  app.include_router(scrap_router)
198
 
199
  uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt CHANGED
@@ -3,6 +3,5 @@ uvicorn
3
  pydantic
4
  playwright
5
  duckduckgo_search
6
- expiringdict
7
  beautifulsoup4
8
  httpx
 
3
  pydantic
4
  playwright
5
  duckduckgo_search
 
6
  beautifulsoup4
7
  httpx
search.py → serp.py RENAMED
@@ -7,14 +7,8 @@ from urllib.parse import quote_plus
7
  import logging
8
  import re
9
 
10
-
11
- class APIPatentResults(BaseModel):
12
- """Response of /search_patents endpoint"""
13
- error: Optional[str]
14
- results: Optional[list[dict]]
15
-
16
-
17
- class APISearchResults(BaseModel):
18
  error: Optional[str]
19
  results: Optional[list[dict]]
20
 
 
7
  import logging
8
  import re
9
 
10
+ class SerpResults(BaseModel):
11
+ """Model for SERP scrapping results"""
 
 
 
 
 
 
12
  error: Optional[str]
13
  results: Optional[list[dict]]
14