from fastapi import FastAPI, HTTPException
import asyncio
from playwright.async_api import async_playwright
from fastapi.responses import HTMLResponse
from fastapi.responses import StreamingResponse
from fastapi.responses import FileResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from io import StringIO
import os

app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

async def scrape_links():
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()

        # Block unnecessary resources to speed up loading
        await page.route("**/*", lambda route: route.continue_() if route.request.resource_type in ["document", "script"] else route.abort())

        # Open the target website
        await page.goto('https://www.fool.com/earnings/call-transcripts/2024/01/24/tesla-tsla-q4-2023-earnings-call-transcript/', wait_until='domcontentloaded')

        # Wait for a short time to ensure dynamic content is loaded
        await page.wait_for_timeout(10)

        # Extract all links
        links = await page.query_selector_all('a')
        result = []
        for link in links:
            href = await link.get_attribute('href')
            result.append({'href': href})

        # Extract all text content
        elements = await page.query_selector_all('body *')
 
        for element in elements:
            text_content = await element.text_content()
            if text_content and text_content.strip():
                result.append({'text': text_content.strip()})

        await browser.close()
        return result

@app.post("/get_webscrapet_data")
async def get_webscrapet_data(url: str):
    try:
        # Run the scraping function
        results = await scrape_links()
        return results
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))