import re

import chainlit as cl
import tiktoken
from langchain.callbacks.base import BaseCallbackHandler


def format_docs(documents, max_context_size=100000, separator="\n\n"):
    context = ""
    encoder = tiktoken.get_encoding("cl100k_base")
    i = 0
    for doc in documents:
        i += 1
        if len(encoder.encode(context)) < max_context_size:
            source = doc.metadata["link"]
            title = doc.metadata["title"]
            context += (
                f"Article: {title}\n"
                + doc.page_content
                + f"\nSource: {source}"
                + separator
            )
    return context


class PostMessageHandler(BaseCallbackHandler):
    """
    Callback handler for handling the retriever and LLM processes.
    Used to post the sources of the retrieved documents as a Chainlit element.
    """

    def __init__(self, msg: cl.Message):
        BaseCallbackHandler.__init__(self)
        self.msg = msg
        self.sources = []

    def on_retriever_end(self, documents, *, run_id, parent_run_id, **kwargs):
        for d in documents:
            source_doc = d.page_content + "\nSource: " + d.metadata["link"]
            self.sources.append(source_doc)

    def on_llm_end(self, response, *, run_id, parent_run_id, **kwargs):
        if len(self.sources):
            # Display the reference docs with a Text widget
            sources_element = [
                cl.Text(name=f"source_{idx+1}", content=content)
                for idx, content in enumerate(self.sources)
            ]
            source_names = [el.name for el in sources_element]
            self.msg.elements += sources_element
            self.msg.content += f"\nSources: {', '.join(source_names)}"

    def clean_text(text):
        text = re.sub("[Tt]weet", "", text)  # type: ignore
        text = re.sub(r"\ +", " ", text)
        text = re.sub(r"\n+", "\n", text)
        return text.strip()