from fastapi import FastAPI, HTTPException import asyncio from playwright.async_api import async_playwright from fastapi.responses import HTMLResponse from fastapi.responses import StreamingResponse from fastapi.responses import FileResponse from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from io import StringIO import os app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) async def scrape_links(): async with async_playwright() as p: browser = await p.chromium.launch(headless=True) page = await browser.new_page() # Block unnecessary resources to speed up loading await page.route("**/*", lambda route: route.continue_() if route.request.resource_type in ["document", "script"] else route.abort()) # Open the target website await page.goto('https://www.fool.com/earnings/call-transcripts/2024/01/24/tesla-tsla-q4-2023-earnings-call-transcript/', wait_until='domcontentloaded') # Wait for a short time to ensure dynamic content is loaded await page.wait_for_timeout(10) # Extract all links links = await page.query_selector_all('a') result = [] for link in links: href = await link.get_attribute('href') result.append({'href': href}) # Extract all text content elements = await page.query_selector_all('body *') for element in elements: text_content = await element.text_content() if text_content and text_content.strip(): result.append({'text': text_content.strip()}) await browser.close() return result @app.post("/get_webscrapet_data") async def get_webscrapet_data(url: str): try: # Run the scraping function results = await scrape_links() return results except Exception as e: raise HTTPException(status_code=500, detail=str(e))