|
|
|
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext |
|
from llama_index.llms.huggingface import HuggingFaceLLM |
|
import torch |
|
import gradio as gr |
|
from llama_index.core import Settings |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
from llama_index.core import ServiceContext |
|
|
|
|
|
documents = SimpleDirectoryReader('files').load_data() |
|
|
|
system_prompt=""" |
|
You are a Q&A assistant. Your goal is to answer questions as |
|
accurately as possible based on the instructions and context provided. |
|
""" |
|
|
|
llm = HuggingFaceLLM( |
|
context_window=4096, |
|
max_new_tokens=256, |
|
generate_kwargs={"temperature": 0.1, "do_sample": True}, |
|
system_prompt=system_prompt, |
|
tokenizer_name="anasmkh/customized_llama3.1_8b", |
|
model_name="anasmkh/customized_llama3.1_8b", |
|
device_map="auto", |
|
model_kwargs={"torch_dtype": torch.float16 } |
|
) |
|
|
|
embed_model= HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") |
|
|
|
|
|
Settings.llm = llm |
|
Settings.embed_model =embed_model |
|
|
|
Settings.num_output = 250 |
|
Settings.context_window = 3900 |
|
|
|
index = VectorStoreIndex.from_documents( |
|
documents, embed_model=embed_model |
|
) |
|
|
|
query_engine = index.as_query_engine(llm=llm) |
|
|
|
|
|
|
|
|
|
def chat(message, history): |
|
history = history or [] |
|
history.append({"role": "user", "content": message}) |
|
response=query_engine.query(message) |
|
|
|
history.append({"role": "assistant", "content": response}) |
|
return history |
|
|
|
with gr.Blocks() as demo: |
|
chatbot = gr.Chatbot() |
|
message = gr.Textbox() |
|
clear = gr.ClearButton([message, chatbot]) |
|
|
|
message.submit(chat, [message, chatbot], chatbot) |
|
clear.click(lambda: None, None, chatbot, queue=False) |
|
|
|
demo.launch() |
|
|