File size: 1,906 Bytes
9130c7f 72ba547 0de253d 72ba547 0de253d 9130c7f b94b45c 9130c7f 72ba547 9130c7f 72ba547 9130c7f 72ba547 9130c7f a3445e5 1f2129b 72ba547 1f2129b a3445e5 1f2129b a3445e5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.llms.huggingface import HuggingFaceLLM
import torch
import gradio as gr
from llama_index.core import Settings
from langchain_community.embeddings import HuggingFaceEmbeddings
from llama_index.core import ServiceContext
# from langchain.embeddings.huggingface import HuggingFaceEmbeddings
documents = SimpleDirectoryReader('files').load_data()
system_prompt="""
You are a Q&A assistant. Your goal is to answer questions as
accurately as possible based on the instructions and context provided.
"""
llm = HuggingFaceLLM(
context_window=4096,
max_new_tokens=256,
generate_kwargs={"temperature": 0.1, "do_sample": True},
system_prompt=system_prompt,
tokenizer_name="anasmkh/customized_llama3.1_8b",
model_name="anasmkh/customized_llama3.1_8b",
device_map="auto",
model_kwargs={"torch_dtype": torch.float16 }
)
embed_model= HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
Settings.llm = llm
Settings.embed_model =embed_model
# Settings.node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=20)
Settings.num_output = 250
Settings.context_window = 3900
index = VectorStoreIndex.from_documents(
documents, embed_model=embed_model
)
query_engine = index.as_query_engine(llm=llm)
def chat(message, history):
history = history or []
history.append({"role": "user", "content": message})
response=query_engine.query(message)
# response = generator(history)[-1]["generated_text"]
history.append({"role": "assistant", "content": response})
return history
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
message = gr.Textbox()
clear = gr.ClearButton([message, chatbot])
message.submit(chat, [message, chatbot], chatbot)
clear.click(lambda: None, None, chatbot, queue=False)
demo.launch()
|