Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,45 @@
|
|
1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
|
|
3 |
|
|
|
|
|
|
|
|
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
|
8 |
|
@@ -10,7 +47,8 @@
|
|
10 |
def chat(message, history):
|
11 |
history = history or []
|
12 |
history.append({"role": "user", "content": message})
|
13 |
-
response
|
|
|
14 |
history.append({"role": "assistant", "content": response})
|
15 |
return history
|
16 |
|
|
|
1 |
|
2 |
+
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
|
3 |
+
from llama_index.llms.huggingface import HuggingFaceLLM
|
4 |
+
import torch
|
5 |
+
import gradio as gr
|
6 |
+
from llama_index.core import Settings
|
7 |
+
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
8 |
+
from llama_index.core import ServiceContext
|
9 |
+
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
10 |
|
11 |
+
documents = SimpleDirectoryReader('/files').load_data()
|
12 |
|
13 |
+
system_prompt="""
|
14 |
+
You are a Q&A assistant. Your goal is to answer questions as
|
15 |
+
accurately as possible based on the instructions and context provided.
|
16 |
+
"""
|
17 |
|
18 |
+
llm = HuggingFaceLLM(
|
19 |
+
context_window=4096,
|
20 |
+
max_new_tokens=256,
|
21 |
+
generate_kwargs={"temperature": 0.1, "do_sample": True},
|
22 |
+
system_prompt=system_prompt,
|
23 |
+
tokenizer_name="anasmkh/customized_llama3.1_8b",
|
24 |
+
model_name="anasmkh/customized_llama3.1_8b",
|
25 |
+
device_map="auto",
|
26 |
+
model_kwargs={"torch_dtype": torch.float16 }
|
27 |
+
)
|
28 |
|
29 |
+
embed_model= HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
30 |
+
|
31 |
+
|
32 |
+
Settings.llm = llm
|
33 |
+
Settings.embed_model =embed_model
|
34 |
+
# Settings.node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=20)
|
35 |
+
Settings.num_output = 250
|
36 |
+
Settings.context_window = 3900
|
37 |
+
|
38 |
+
index = VectorStoreIndex.from_documents(
|
39 |
+
documents, embed_model=embed_model
|
40 |
+
)
|
41 |
+
|
42 |
+
query_engine = index.as_query_engine(llm=llm)
|
43 |
|
44 |
|
45 |
|
|
|
47 |
def chat(message, history):
|
48 |
history = history or []
|
49 |
history.append({"role": "user", "content": message})
|
50 |
+
response=query_engine.query(message)
|
51 |
+
# response = generator(history)[-1]["generated_text"]
|
52 |
history.append({"role": "assistant", "content": response})
|
53 |
return history
|
54 |
|