import streamlit as st
from langchain.schema import Document
from langchain_core.messages import AIMessage, HumanMessage
from sentence_transformers import SentenceTransformer
from langchain.prompts.chat import ChatPromptTemplate
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from aift.multimodal import textqa
from aift import setting
import chromadb
chromadb.api.client.SharedSystemClient.clear_system_cache()
# Set API key for Pathumma
setting.set_api_key('T69FqnYgOdreO5G0nZaM8gHcjo1sifyU')
# App Configuration
st.set_page_config(page_title="Nong Nok", page_icon="ðĪ")
st.markdown(
"""
""",
unsafe_allow_html=True
)
st.markdown(" ")
st.markdown(" ")
st.markdown(" ")
# Custom Embeddings
class CustomEmbeddings:
def __init__(self, model_name="mrp/simcse-model-m-bert-thai-cased"):
self.model = SentenceTransformer(model_name)
def embed_query(self, text):
return self.model.encode([text])[0].tolist()
def embed_documents(self, texts):
return [self.model.encode(text).tolist() for text in texts]
# Pathumma Model Wrapper
class PathummaModel:
def __init__(self):
pass
def generate(self, instruction: str, return_json: bool = False):
response = textqa.generate(instruction=instruction, return_json=return_json)
if return_json:
return response.get("content", "")
return response
def __call__(self, input: str):
return self.generate(input, return_json=False)
# Initialize Pathumma model
model_local = PathummaModel()
# Load PDF file
file_path = "langchain.pdf"
loader = PyPDFLoader(file_path)
docs = loader.load()
# Split text into manageable chunks
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=7500, chunk_overlap=100)
doc_splits = text_splitter.split_documents(docs)
# Convert documents to Embeddings and store them in Chroma
vectorstore = Chroma.from_documents(
documents=doc_splits,
collection_name="rag-chroma",
embedding=CustomEmbeddings(model_name="mrp/simcse-model-m-bert-thai-cased"),
)
retriever = vectorstore.as_retriever()
# Generate a response using retriever
def get_response(user_query):
retrieved_docs = retriever.get_relevant_documents(user_query)
retrieved_context = " ".join([doc.page_content for doc in retrieved_docs])
after_rag_template = """āļāļāļāļāļģāļāļēāļĄāđāļāļĒāļāļīāļāļēāļĢāļāļēāļāļēāļāļāļĢāļīāļāļāļāđāļāđāļāļāļĩāđāđāļāđāļēāļāļąāđāļ:
{context}
āļāļģāļāļēāļĄ: {question}
"""
prompt = after_rag_template.format(context=retrieved_context, question=user_query)
response = model_local(prompt)
return response
# Initialize session state
if "chat_history" not in st.session_state:
st.session_state.chat_history = [
AIMessage(content='ðĶ āļĒāļīāļāļāļĩāļāđāļāļāļĢāļąāļāļŠāļđāđāļāđāļāļāļāļ āđāļāļāļāļāļāļāļĩāđāļāļĢāđāļāļĄāļāļ°āđāļŦāđāļāđāļāļĄāļđāļĨāļāļļāļāđāļāļĩāđāļĒāļ§āļāļąāļāļāļĢāļ°āļĢāļēāļāļāļąāļāļāļąāļāļīāļāļļāđāļĄāļāļĢāļāļāļāđāļāļĄāļđāļĨāļŠāđāļ§āļāļāļļāļāļāļĨ (PDPA) āļĄāļĩāļāļ°āđāļĢāđāļŦāđāļāđāļ§āļĒāđāļŦāļĄāļāļĢāļąāļ?'),
]
# Render chat history
for message in st.session_state.chat_history:
if isinstance(message, AIMessage):
with st.chat_message("AI"):
st.write(message.content)
elif isinstance(message, HumanMessage):
with st.chat_message("Human"):
st.write(message.content)
# User input
user_query = st.chat_input("āļāļīāļĄāļāđāļāđāļāļāļ§āļēāļĄāļāļĩāđāļāļĩāđ...")
if user_query is not None and user_query.strip() != "":
st.session_state.chat_history.append(HumanMessage(content=user_query))
with st.chat_message("Human"):
st.markdown(user_query)
with st.chat_message("AI"):
placeholder = st.empty()
placeholder.markdown("āļāļģāļĨāļąāļāļŠāļĢāđāļēāļāļāļģāļāļāļ...")
response = get_response(user_query)
placeholder.markdown(response)
st.session_state.chat_history.append(AIMessage(content=response))