Spaces:
Running
Running
Lucas ARRIESSE
commited on
Commit
·
a9a935f
1
Parent(s):
e14c7a4
Update project doc
Browse files- app.py +2 -2
- docs/docs.md +21 -0
app.py
CHANGED
@@ -36,7 +36,8 @@ async def api_lifespan(app: FastAPI):
|
|
36 |
await pw_browser.close()
|
37 |
await playwright.stop()
|
38 |
|
39 |
-
app = FastAPI(lifespan=api_lifespan, docs_url="/"
|
|
|
40 |
|
41 |
# Router for scrapping related endpoints
|
42 |
scrap_router = APIRouter(prefix="/scrap", tags=["scrapping"])
|
@@ -138,7 +139,6 @@ async def search_duck(params: SerpQuery) -> SerpResults:
|
|
138 |
|
139 |
|
140 |
@serp_router.post("/search")
|
141 |
-
@app.post("/search")
|
142 |
async def search(params: SerpQuery):
|
143 |
"""Attempts to search the specified queries using ALL backends"""
|
144 |
results = []
|
|
|
36 |
await pw_browser.close()
|
37 |
await playwright.stop()
|
38 |
|
39 |
+
app = FastAPI(lifespan=api_lifespan, docs_url="/",
|
40 |
+
title="SERPent", description=open("docs/docs.md").read())
|
41 |
|
42 |
# Router for scrapping related endpoints
|
43 |
scrap_router = APIRouter(prefix="/scrap", tags=["scrapping"])
|
|
|
139 |
|
140 |
|
141 |
@serp_router.post("/search")
|
|
|
142 |
async def search(params: SerpQuery):
|
143 |
"""Attempts to search the specified queries using ALL backends"""
|
144 |
results = []
|
docs/docs.md
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# `SERPent`
|
2 |
+
|
3 |
+
## SERP results scrapping
|
4 |
+
|
5 |
+
SERPent exposes an unified API to query SERP (Search Engine Result Pages) for a few common search engines, namely:
|
6 |
+
|
7 |
+
- DuckDuckGo
|
8 |
+
- Brave
|
9 |
+
- Bing
|
10 |
+
- Google Patents
|
11 |
+
- Google
|
12 |
+
|
13 |
+
The application uses the `playwright` library to control a headless web browser, to simulate normal user activity, to fool the anti-bot measures often present on those sites. See the `/serp/` endpoints for search results scrapping.
|
14 |
+
|
15 |
+
|
16 |
+
## Website sources scrapping
|
17 |
+
|
18 |
+
SERPent also exposes a few endpoints to scrap the contents of certain sources (patents, scholar). See the `/scrap/` endpoints for supported website sources scrapping.
|
19 |
+
|
20 |
+
|
21 |
+
|