anasmkh commited on
Commit
72ba547
·
verified ·
1 Parent(s): a5a4cba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -1
app.py CHANGED
@@ -1,8 +1,45 @@
1
 
 
 
 
 
 
 
 
 
2
 
 
3
 
 
 
 
 
4
 
 
 
 
 
 
 
 
 
 
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
 
8
 
@@ -10,7 +47,8 @@
10
  def chat(message, history):
11
  history = history or []
12
  history.append({"role": "user", "content": message})
13
- response = generator(history)[-1]["generated_text"]
 
14
  history.append({"role": "assistant", "content": response})
15
  return history
16
 
 
1
 
2
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
3
+ from llama_index.llms.huggingface import HuggingFaceLLM
4
+ import torch
5
+ import gradio as gr
6
+ from llama_index.core import Settings
7
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
8
+ from llama_index.core import ServiceContext
9
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
10
 
11
+ documents = SimpleDirectoryReader('/files').load_data()
12
 
13
+ system_prompt="""
14
+ You are a Q&A assistant. Your goal is to answer questions as
15
+ accurately as possible based on the instructions and context provided.
16
+ """
17
 
18
+ llm = HuggingFaceLLM(
19
+ context_window=4096,
20
+ max_new_tokens=256,
21
+ generate_kwargs={"temperature": 0.1, "do_sample": True},
22
+ system_prompt=system_prompt,
23
+ tokenizer_name="anasmkh/customized_llama3.1_8b",
24
+ model_name="anasmkh/customized_llama3.1_8b",
25
+ device_map="auto",
26
+ model_kwargs={"torch_dtype": torch.float16 }
27
+ )
28
 
29
+ embed_model= HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
30
+
31
+
32
+ Settings.llm = llm
33
+ Settings.embed_model =embed_model
34
+ # Settings.node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=20)
35
+ Settings.num_output = 250
36
+ Settings.context_window = 3900
37
+
38
+ index = VectorStoreIndex.from_documents(
39
+ documents, embed_model=embed_model
40
+ )
41
+
42
+ query_engine = index.as_query_engine(llm=llm)
43
 
44
 
45
 
 
47
  def chat(message, history):
48
  history = history or []
49
  history.append({"role": "user", "content": message})
50
+ response=query_engine.query(message)
51
+ # response = generator(history)[-1]["generated_text"]
52
  history.append({"role": "assistant", "content": response})
53
  return history
54