Lucas ARRIESSE commited on
Commit
a9a935f
·
1 Parent(s): e14c7a4

Update project doc

Browse files
Files changed (2) hide show
  1. app.py +2 -2
  2. docs/docs.md +21 -0
app.py CHANGED
@@ -36,7 +36,8 @@ async def api_lifespan(app: FastAPI):
36
  await pw_browser.close()
37
  await playwright.stop()
38
 
39
- app = FastAPI(lifespan=api_lifespan, docs_url="/")
 
40
 
41
  # Router for scrapping related endpoints
42
  scrap_router = APIRouter(prefix="/scrap", tags=["scrapping"])
@@ -138,7 +139,6 @@ async def search_duck(params: SerpQuery) -> SerpResults:
138
 
139
 
140
  @serp_router.post("/search")
141
- @app.post("/search")
142
  async def search(params: SerpQuery):
143
  """Attempts to search the specified queries using ALL backends"""
144
  results = []
 
36
  await pw_browser.close()
37
  await playwright.stop()
38
 
39
+ app = FastAPI(lifespan=api_lifespan, docs_url="/",
40
+ title="SERPent", description=open("docs/docs.md").read())
41
 
42
  # Router for scrapping related endpoints
43
  scrap_router = APIRouter(prefix="/scrap", tags=["scrapping"])
 
139
 
140
 
141
  @serp_router.post("/search")
 
142
  async def search(params: SerpQuery):
143
  """Attempts to search the specified queries using ALL backends"""
144
  results = []
docs/docs.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # `SERPent`
2
+
3
+ ## SERP results scrapping
4
+
5
+ SERPent exposes an unified API to query SERP (Search Engine Result Pages) for a few common search engines, namely:
6
+
7
+ - DuckDuckGo
8
+ - Brave
9
+ - Bing
10
+ - Google Patents
11
+ - Google
12
+
13
+ The application uses the `playwright` library to control a headless web browser, to simulate normal user activity, to fool the anti-bot measures often present on those sites. See the `/serp/` endpoints for search results scrapping.
14
+
15
+
16
+ ## Website sources scrapping
17
+
18
+ SERPent also exposes a few endpoints to scrap the contents of certain sources (patents, scholar). See the `/scrap/` endpoints for supported website sources scrapping.
19
+
20
+
21
+