File size: 1,906 Bytes
9130c7f
72ba547
 
 
 
 
0de253d
72ba547
0de253d
9130c7f
b94b45c
9130c7f
72ba547
 
 
 
9130c7f
72ba547
 
 
 
 
 
 
 
 
 
9130c7f
72ba547
 
 
 
 
 
 
 
 
 
 
 
 
 
9130c7f
 
 
a3445e5
1f2129b
 
 
72ba547
 
1f2129b
 
a3445e5
1f2129b
 
 
 
 
 
 
a3445e5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.llms.huggingface import HuggingFaceLLM
import torch
import gradio as gr
from llama_index.core import Settings
from langchain_community.embeddings import HuggingFaceEmbeddings
from llama_index.core import ServiceContext
# from langchain.embeddings.huggingface import HuggingFaceEmbeddings

documents = SimpleDirectoryReader('files').load_data()

system_prompt="""
You are a Q&A assistant. Your goal is to answer questions as
accurately as possible based on the instructions and context provided.
"""

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.1, "do_sample": True},
    system_prompt=system_prompt,
    tokenizer_name="anasmkh/customized_llama3.1_8b",
    model_name="anasmkh/customized_llama3.1_8b",
    device_map="auto",
    model_kwargs={"torch_dtype": torch.float16 }
)

embed_model= HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")


Settings.llm = llm
Settings.embed_model =embed_model
# Settings.node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=20)
Settings.num_output = 250
Settings.context_window = 3900

index = VectorStoreIndex.from_documents(
    documents, embed_model=embed_model
)

query_engine = index.as_query_engine(llm=llm)




def chat(message, history):
    history = history or []
    history.append({"role": "user", "content": message})
    response=query_engine.query(message)
    # response = generator(history)[-1]["generated_text"]
    history.append({"role": "assistant", "content": response})
    return history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    message = gr.Textbox()
    clear = gr.ClearButton([message, chatbot])

    message.submit(chat, [message, chatbot], chatbot)
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch()