File size: 4,808 Bytes
3073249 192ee82 3073249 192ee82 3073249 46ead58 3073249 192ee82 3073249 41f7238 3073249 192ee82 3073249 192ee82 3073249 da7acf6 3073249 192ee82 3073249 192ee82 3073249 192ee82 3073249 192ee82 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import streamlit as st
from streamlit_chat import message
import tempfile
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain.chains import ConversationalRetrievalChain
from ctransformers import AutoModelForCausalLM
from langchain_g4f import G4FLLM
from g4f import Provider, models
import unicodedata
import requests
# Define the path for generated embeddings
DB_FAISS_PATH = 'vectorstore/db_faiss'
def is_japanese_character(character):
return 'CJK UNIFIED' in unicodedata.name(character, '')
# Load the model of choice
def load_llm():
# url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin" # 2.87G
# model_loc, file_size = dl_hf_model(url)
# llm = CTransformers(
# model=model_loc,
# temperature=0.2,
# model_type="llama",
# top_k=10,
# top_p=0.9,
# repetition_penalty=1.0,
# max_new_tokens=512, # adjust as needed
# seed=42,
# reset=True, # reset history (cache)
# stream=False,
# # threads=cpu_count,
# # stop=prompt_prefix[1:2],
# )
llm = G4FLLM(
model=models.gpt_35_turbo,
provider=Provider.DeepAi,
)
return llm
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
# Set the title for the Streamlit app
st.title("ZendoηΎε₯³γγ£γγγγγ―γΉ")
csv_url = "https://huggingface.co/spaces/uyen13/chatgirl2/raw/main/testchatdata.csv"
# csv_url="https://docs.google.com/uc?export=download&id=1fQ2v2n9zQcoi6JoOU3lCBDHRt3a1PmaE"
# Define the path where you want to save the downloaded file
tmp_file_path = "testchatdata.csv"
# Download the CSV file
response = requests.get(csv_url)
if response.status_code == 200:
with open(tmp_file_path, 'wb') as file:
file.write(response.content)
else:
raise Exception(f"Failed to download the CSV file from {csv_url}")
# Load CSV data using CSVLoader
loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8", csv_args={'delimiter': ','})
data = loader.load()
# Create embeddings using Sentence Transformers
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cpu'})
# Create a FAISS vector store and save embeddings
db = FAISS.from_documents(data, embeddings)
db.save_local(DB_FAISS_PATH)
# Load the language model
llm = load_llm()
# Create a conversational chain
chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=db.as_retriever())
# Initialize spaCy with the Japanese model
# nlp = spacy.load("ja_core_news_sm")
# Function for conversational chat
def conversational_chat(query):
query = "ζδΎγγγγγΌγΏγ«εΊγ₯γγ¦,"+query
result = chain({"question": query, "chat_history": st.session_state['history']})
# i = 0
# while i < len(result["answer"]):
# character = input_string[i]
# if is_japanese_character(character):
# break
# else:
# result = chain({"question": query, "chat_history": st.session_state['history']})
# i += 1
st.session_state['history'].append((query, result["answer"]))
return result["answer"]
# Initialize chat history
if 'history' not in st.session_state:
st.session_state['history'] = []
# Initialize messages
if 'generated' not in st.session_state:
st.session_state['generated'] = ["γγγ«γ‘γ―οΌzendoηΎε₯³γ§γγδ½γγζ’γγ§γγοΌ... π€"]
if 'past' not in st.session_state:
st.session_state['past'] = ["γγ£γγγ―γγγγ"]
# Create containers for chat history and user input
response_container = st.container()
container = st.container()
# User input form
with container:
with st.form(key='my_form', clear_on_submit=True):
user_input = st.text_input("ChatBox", placeholder="θ³ͺεγγθ¨ε
₯γγ γγ... ", key='input')
submit_button = st.form_submit_button(label='Send')
if submit_button and user_input:
output = conversational_chat(user_input)
st.session_state['past'].append(user_input)
st.session_state['generated'].append(output)
# Display chat history
if st.session_state['generated']:
with response_container:
for i in range(len(st.session_state['generated'])):
message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="big-smile")
message(st.session_state["generated"][i], key=str(i), avatar_style="thumbs")
|