from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
import requests
from AWSClaude import AWSClaude
import json
import concurrent.futures
import time

app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)



@app.post("/get_n_depth_results")
async def get_n_depth_results(url,input_query):
    all_content = {}

    def add_pdf_content(selected_pdf):
        for pdf_url in selected_pdf:
            print(pdf_url)
            response = requests.get(pdf_url)

            # Save the content of the response as a PDF file
            pdf_path = "temp.pdf"
            with open(pdf_path, "wb") as file:
                file.write(response.content)

            print(f"PDF file saved as {pdf_path}")

            url = "http://localhost:5000/ask"
            # url = "https://us-central1-neuralgap-1.cloudfunctions.net/scraperPDFDocxTables_v2"

            data = {"processTables": "True"}

            headers = {"Origin": "http://localhost:8080"}

            with open(pdf_path, "rb") as file:
                file_contents = file.read()

                files = {
                    "pdf": (
                        pdf_path,
                        file_contents,
                        "application/pdf",
                    )
                }

                response = requests.post(url, files=files, data=data, headers=headers)
                all_content[pdf_url] = response.json()

    def scrapper(input_url):
        params = {'url': input_url}
        headers = {'accept': 'application/json'}
        url = 'https://chromium-qpxamiokfa-uc.a.run.app/get_scraped_data'
        response = requests.get(url, headers=headers, params=params)
        all_url = response.json()["URL"]
        all_content[input_url] = response.json()["Content"]
        return all_url

    pdf_urls = []

    def separate_pdf_and_nonPDF_links(urls):
        # Separate URLs into two lists
        pdf_links = [url for url in urls if url and url.endswith('.pdf')]
        if pdf_links:
            pdf_urls.append(pdf_links)
        return [url for url in urls if not (url and url.endswith('.pdf'))]  # other links for rescraping

    def call_llm_service(scraped_data, input_url, input_query, pdf):
        query = f"""
        Here are my scraped links:

        {scraped_data}

        correct hostname: {input_url} use this host name for all other tasks
        
        I need the always full (www.hotname.com/../) {pdf} URLs for the most relevant links related to "{input_query}". use the correct hostname from this provided content, give raw hyperlink with json format only don't give extra text details. only give json output
        example json format is only links don't include keys (i need the always full (www.hotname.com/../))
        """
        llm = "ClaudeHaiku"
        env = ""
        user_id = "KAusXF7jp0Q40urdZWtDLXEhrmA"
        thread_id = "hKxvoVgi7vRJCHhvMzH5"
        stream_id = "stream1"
        app_type = "sentinel"
        other_request_params = {"messages": [
            {"role": "user", "content": query},
        ]}
        return AWSClaude(llm, env, user_id, thread_id, stream_id, app_type, other_request_params).invoke()

    input_url = f'["{url}"]'
    input_query = input_query

    for step in range(1, 3):
        print(f"=================={step} step of scraping to get selected URLs from LLM=================================")
        next_urls = []
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = [executor.submit(scrapper, input_url) for input_url in (json.loads(input_url)[:2])]
            for future in concurrent.futures.as_completed(futures):
                next_urls.append(separate_pdf_and_nonPDF_links(future.result()))
        selected_links_from_llm = call_llm_service(next_urls, input_url, input_query, "")
        input_url = selected_links_from_llm
        print(json.loads(input_url)[:2])

    if not pdf_urls:
        print(pdf_urls)
        #return all_content.keys()
        return all_content
    else:
        selected_pdf = json.loads(call_llm_service(pdf_urls, input_url, input_query, "only end with .pdf extension"))
        print(pdf_urls)
        print("selected pdf")
        print(selected_pdf)
        #return all_content.keys()
        return all_content

# # Start time
# start_time = time.time()

# print(main("https://www.keells.com/", "Please analyse reports"))

# # End time
# end_time = time.time()

# # Calculate the time taken
# time_taken = end_time - start_time

# print(f"Time taken: {time_taken} seconds")