ai-web-scraper-chat / routers /scraping_router.py
pvanand's picture
Update routers/scraping_router.py
f475beb verified
raw
history blame
1.76 kB
# routers/scraping_router.py
from fastapi import APIRouter, HTTPException, Header
from pydantic import BaseModel
import os
from typing import Optional
import logging
import re
from helpers.ai_client import AIClient
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
router = APIRouter(
prefix="/api/v1",
tags=["Web Scraping"]
)
# Initialize AI client
ai_client = AIClient()
class CheerioScriptRequest(BaseModel):
html: str
user_input: str
api_key: Optional[str] = None
@router.post("/generate-cheerio-script")
async def generate_cheerio_script(
request: CheerioScriptRequest,
x_api_key: str = Header(None)
):
try:
system_prompt = f"""return cheerio script to extract {request.user_input} from the following html,
enclose the cheerio script in <cheerio_script> .. </cheerio_script> format, {request.html}"""
response = ai_client.chat(
prompt=request.user_input,
system_message=system_prompt,
model_id="openai/gpt-4o-mini",
api_key=x_api_key
)
# Extract Cheerio script using regex
cheerio_pattern = r'<cheerio_script>(.*?)</cheerio_script>'
matches = re.search(cheerio_pattern, response, re.DOTALL)
if matches:
cheerio_script = matches.group(1).strip()
return {"cheerio_script": cheerio_script}
else:
raise HTTPException(
status_code=422,
detail="No valid Cheerio script found in the response"
)
except Exception as e:
logger.error(f"Error generating Cheerio script: {e}")
raise HTTPException(status_code=500, detail=str(e))