# routers/scraping_router.py from fastapi import APIRouter, HTTPException, Header from pydantic import BaseModel import os from typing import Optional import logging import re from helpers.ai_client import AIClient # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) router = APIRouter( prefix="/api/v1", tags=["Web Scraping"] ) # Initialize AI client ai_client = AIClient() class CheerioScriptRequest(BaseModel): html: str user_input: str api_key: Optional[str] = None @router.post("/generate-cheerio-script") async def generate_cheerio_script( request: CheerioScriptRequest, x_api_key: str = Header(None) ): try: system_prompt = f"""return cheerio script to extract {request.user_input} from the following html, enclose the cheerio script in .. format, {request.html}""" response = ai_client.chat( prompt=request.user_input, system_message=system_prompt, model_id="openai/gpt-4o-mini", api_key=x_api_key ) # Extract Cheerio script using regex cheerio_pattern = r'(.*?)' matches = re.search(cheerio_pattern, response, re.DOTALL) if matches: cheerio_script = matches.group(1).strip() return {"cheerio_script": cheerio_script} else: raise HTTPException( status_code=422, detail="No valid Cheerio script found in the response" ) except Exception as e: logger.error(f"Error generating Cheerio script: {e}") raise HTTPException(status_code=500, detail=str(e))