from typing import List, Dict, Tuple from langchain_core.prompts import ChatPromptTemplate from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.schema import Document from langchain_openai import OpenAIEmbeddings, ChatOpenAI def create_prompt(prompt): prompt_template = ChatPromptTemplate.from_template(prompt) return prompt_template def split_documents(documents: List[Document]) -> List[Document]: text_splitter = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=100, length_function=len, is_separator_regex=False ) split_docs = text_splitter.split_documents(documents) total_tokens = sum(len(doc.page_content) for doc in split_docs) # Approximate token count return split_docs, total_tokens def generate_embeddings(docs: List[Document]) -> List[List[float]]: embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small") embeddings = embeddings_model.embed_documents([doc.page_content for doc in docs]) total_tokens = sum(len(doc.page_content) for doc in docs) # Approximate token count return embeddings, total_tokens def create_qamodel(model="gpt-4o-mini", temperature=0): qamodel = ChatOpenAI( model="gpt-4o-mini", temperature=0 ) return qamodel