Namitg02 commited on
Commit
e715c6b
·
verified ·
1 Parent(s): b9fbd83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -51
app.py CHANGED
@@ -1,70 +1,68 @@
1
  from datasets import load_dataset
2
  from datasets import Dataset
3
- from langchain.docstore.document import Document as LangchainDocument
 
4
  from sentence_transformers import SentenceTransformer
5
  import faiss
6
- import pandas as pd
7
  import time
8
- import torch
 
9
 
10
- from transformers import AutoTokenizer
11
- from transformers import AutoModelForCausalLM
12
  from transformers import TextIteratorStreamer
13
  from threading import Thread
 
 
 
14
 
15
- llm_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
16
- tokenizer = AutoTokenizer.from_pretrained(llm_model)
17
- # pulling tokeinzer for text generation model
18
 
19
- dataset = load_dataset("Namitg02/Test", split='train', streaming=False)
20
- #dataset = load_dataset("epfl-llm/guidelines", split='train')
21
- #Returns a list of dictionaries, each representing a row in the dataset.
22
- length = len(dataset)
23
 
24
- embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
25
- #all-MiniLM-L6-v2, BAAI/bge-base-en-v1.5,infgrad/stella-base-en-v2, BAAI/bge-large-en-v1.5, mixedbread-ai/mxbai-embed-large-v1 working with default dimensions
 
 
 
 
 
26
 
27
- df = pd.DataFrame(dataset)
28
- #print(df.iloc[[1]])
29
- print("check1")
30
- df['embeddings'] = df['text'].apply(lambda x: embedding_model.encode(x))
31
- # add_embeddings as a new column
32
-
33
- print("check1a")
34
- print(df.iloc[[1]])
35
- dataset = Dataset.from_pandas(df)
36
- print("check1b")
37
 
38
- #dataset['text'][:length]
 
 
39
 
40
- print(dataset[2])
 
 
41
 
 
42
  embedding_dim = embedding_model.get_sentence_embedding_dimension()
43
  # Returns dimensions of embedidng
44
- data = dataset
45
 
46
- print(embedding_dim)
47
- d = 384 # vectors dimension
48
- m = 32 # hnsw parameter. Higher is more accurate but takes more time to index (default is 32, 128 should be ok)
49
- #index = faiss.IndexHNSWFlat(d, m)
50
  index = faiss.IndexFlatL2(embedding_dim)
51
  data.add_faiss_index("embeddings", custom_index=index)
52
- #data.add_faiss_index("embeddings")
53
  # adds an index column for the embeddings
54
 
55
  print("check1d")
56
  #question = "How can I reverse Diabetes?"
57
 
58
  SYS_PROMPT = """You are an assistant for answering questions.
59
- You are given the extracted parts of a long document and a question. Provide a conversational answer.
60
  If you don't know the answer, just say "I do not know." Don't make up an answer."""
61
  # Provides context of how to answer the question
62
 
 
63
  print("check2")
64
 
 
65
 
66
- model = AutoModelForCausalLM.from_pretrained(llm_model)
67
- # Initializing the text generation model
68
 
69
  terminators = [
70
  tokenizer.eos_token_id, # End-of-Sequence Token that indicates where the model should consider the text sequence to be complete
@@ -73,7 +71,7 @@ terminators = [
73
  # indicates the end of a sequence
74
 
75
 
76
- def search(query: str, k: int = 3 ):
77
  """a function that embeds a new query and returns the most probable results"""
78
  embedded_query = embedding_model.encode(query) # create embedding of a new query
79
  scores, retrieved_examples = data.get_nearest_examples( # retrieve results
@@ -89,26 +87,29 @@ print("check2A")
89
 
90
 
91
  def format_prompt(prompt,retrieved_documents,k):
92
- """using the retrieved documents we will prompt the model to generate our responses"""
93
  PROMPT = f"Question:{prompt}\nContext:"
94
  for idx in range(k) :
95
- PROMPT+= f"{retrieved_documents['text'][idx]}\n"
96
  return PROMPT
97
 
98
  # Called by talk function to add retrieved documents to the prompt. Keeps adding text of retrieved documents to string taht are retreived
99
 
100
  print("check3")
101
- #print(PROMPT)
102
-
103
- print("check3A")
104
-
105
 
106
- def talk(prompt,history):
107
- k = 1 # number of retrieved documents
108
  scores , retrieved_documents = search(prompt, k) # get retrival scores and examples in dictionary format based on the prompt passed
 
109
  formatted_prompt = format_prompt(prompt,retrieved_documents,k) # create a new prompt using the retrieved documents
110
- formatted_prompt = formatted_prompt[:400] # to avoid memory issue
111
- messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}] # binding the system context and new prompt for LLM
 
 
 
 
 
 
112
  # the chat template structure should be based on text generation model format
113
  print("check3B")
114
  input_ids = tokenizer.apply_chat_template(
@@ -124,8 +125,8 @@ def talk(prompt,history):
124
  max_new_tokens=300,
125
  eos_token_id=terminators,
126
  do_sample=True,
127
- temperature=0.6,
128
- top_p=0.9,
129
  )
130
  # calling the model to generate response based on message/ input
131
  # do_sample if set to True uses strategies to select the next token from the probability distribution over the entire vocabulary
@@ -143,10 +144,10 @@ def talk(prompt,history):
143
  generate_kwargs = dict(
144
  input_ids= input_ids,
145
  streamer=streamer,
146
- max_new_tokens= 512,
147
  do_sample=True,
148
  top_p=0.95,
149
- temperature=0.75,
150
  eos_token_id=terminators,
151
  )
152
  # send additional parameters to model for generation
@@ -163,10 +164,44 @@ def talk(prompt,history):
163
  yield "".join(outputs)
164
  print("check3H")
165
 
 
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  TITLE = "AI Copilot for Diabetes Patients"
168
 
169
- DESCRIPTION = ""
170
 
171
  import gradio as gr
172
  # Design chatbot
@@ -188,4 +223,8 @@ demo = gr.ChatInterface(
188
  )
189
  # launch chatbot and calls the talk function which in turn calls other functions
190
  print("check3I")
 
 
 
 
191
  demo.launch()
 
1
  from datasets import load_dataset
2
  from datasets import Dataset
3
+ #from langchain.docstore.document import Document as LangchainDocument
4
+ # from langchain.memory import ConversationBufferMemory
5
  from sentence_transformers import SentenceTransformer
6
  import faiss
 
7
  import time
8
+ #import torch
9
+ import pandas as pd
10
 
11
+ from transformers import AutoTokenizer, AutoModelForCausalLM
12
+ #from transformers import AutoModelForCausalLM, AutoModel
13
  from transformers import TextIteratorStreamer
14
  from threading import Thread
15
+ #from transformers import LlamaForCausalLM, LlamaTokenizer
16
+ #git lfs install
17
+ #from ctransformers import AutoModelForCausalLM, AutoConfig, Config, AutoTokenizer
18
 
19
+ #from huggingface_hub import InferenceClient
20
+ from huggingface_hub import Repository, upload_file
21
+ import os
22
 
 
 
 
 
23
 
24
+ HF_TOKEN = os.getenv('HF_Token')
25
+ #Log_Path="./Logfolder"
26
+ logfile = 'DiabetesChatLog.txt'
27
+ historylog = [{
28
+ "Prompt": '',
29
+ "Output": ''
30
+ }]
31
 
32
+ llm_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
33
+ # TheBloke/Llama-2-7B-Chat-GGML , TinyLlama/TinyLlama-1.1B-Chat-v1.0 , microsoft/Phi-3-mini-4k-instruct, health360/Healix-1.1B-V1-Chat-dDPO
34
+ # TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF and tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf not working
 
 
 
 
 
 
 
35
 
36
+ model = AutoModelForCausalLM.from_pretrained(llm_model)
37
+ tokenizer = AutoTokenizer.from_pretrained(llm_model)
38
+ #initiate model and tokenizer
39
 
40
+ data = load_dataset("Namitg02/Test", split='train', streaming=False)
41
+ #Returns a list of dictionaries, each representing a row in the dataset.
42
+ length = len(data)
43
 
44
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
45
  embedding_dim = embedding_model.get_sentence_embedding_dimension()
46
  # Returns dimensions of embedidng
 
47
 
48
+
 
 
 
49
  index = faiss.IndexFlatL2(embedding_dim)
50
  data.add_faiss_index("embeddings", custom_index=index)
 
51
  # adds an index column for the embeddings
52
 
53
  print("check1d")
54
  #question = "How can I reverse Diabetes?"
55
 
56
  SYS_PROMPT = """You are an assistant for answering questions.
57
+ You are given the extracted parts of documents and a question. Provide a conversational answer.
58
  If you don't know the answer, just say "I do not know." Don't make up an answer."""
59
  # Provides context of how to answer the question
60
 
61
+
62
  print("check2")
63
 
64
+ # memory = ConversationBufferMemory(return_messages=True)
65
 
 
 
66
 
67
  terminators = [
68
  tokenizer.eos_token_id, # End-of-Sequence Token that indicates where the model should consider the text sequence to be complete
 
71
  # indicates the end of a sequence
72
 
73
 
74
+ def search(query: str, k: int = 2 ):
75
  """a function that embeds a new query and returns the most probable results"""
76
  embedded_query = embedding_model.encode(query) # create embedding of a new query
77
  scores, retrieved_examples = data.get_nearest_examples( # retrieve results
 
87
 
88
 
89
  def format_prompt(prompt,retrieved_documents,k):
90
+ """using the retrieved documents we will prompt the model to generate our responses"""
91
  PROMPT = f"Question:{prompt}\nContext:"
92
  for idx in range(k) :
93
+ PROMPT+= f"{retrieved_documents['0'][idx]}\n"
94
  return PROMPT
95
 
96
  # Called by talk function to add retrieved documents to the prompt. Keeps adding text of retrieved documents to string taht are retreived
97
 
98
  print("check3")
 
 
 
 
99
 
100
+ def talk(prompt, history):
101
+ k = 2 # number of retrieved documents
102
  scores , retrieved_documents = search(prompt, k) # get retrival scores and examples in dictionary format based on the prompt passed
103
+ print(retrieved_documents.keys())
104
  formatted_prompt = format_prompt(prompt,retrieved_documents,k) # create a new prompt using the retrieved documents
105
+ print(retrieved_documents['0'])
106
+ print(formatted_prompt)
107
+ formatted_prompt = formatted_prompt[:600] # to avoid memory issue
108
+ # print(retrieved_documents['0'][1]
109
+ # print(retrieved_documents['0'][2]
110
+ print(formatted_prompt)
111
+ messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}]
112
+ # binding the system context and new prompt for LLM
113
  # the chat template structure should be based on text generation model format
114
  print("check3B")
115
  input_ids = tokenizer.apply_chat_template(
 
125
  max_new_tokens=300,
126
  eos_token_id=terminators,
127
  do_sample=True,
128
+ temperature=0.4,
129
+ top_p=0.95,
130
  )
131
  # calling the model to generate response based on message/ input
132
  # do_sample if set to True uses strategies to select the next token from the probability distribution over the entire vocabulary
 
144
  generate_kwargs = dict(
145
  input_ids= input_ids,
146
  streamer=streamer,
147
+ max_new_tokens= 200,
148
  do_sample=True,
149
  top_p=0.95,
150
+ temperature=0.4,
151
  eos_token_id=terminators,
152
  )
153
  # send additional parameters to model for generation
 
164
  yield "".join(outputs)
165
  print("check3H")
166
 
167
+ pd.options.display.max_colwidth = 800
168
 
169
+ outputstring = ''.join(outputs)
170
+
171
+ global historylog
172
+ historynew = {
173
+ "Prompt": prompt,
174
+ "Output": outputstring
175
+ }
176
+ historylog.append(historynew)
177
+ return historylog
178
+ print(historylog)
179
+
180
+ # history.update({prompt: outputstring})
181
+ # print(history)
182
+ #print(memory_string2)
183
+ #with open(logfile, 'a', encoding='utf-8') as f:
184
+ # f.write(memory_string2)
185
+ # f.write('\n')
186
+ #f.close()
187
+ #print(logfile)
188
+ #logfile.push_to_hub("Namitg02/",token = HF_TOKEN)
189
+ #memory_panda = pd.DataFrame()
190
+ #if len(memory_panda) == 0:
191
+ # memory_panda = pd.DataFrame(memory_string)
192
+ #else:
193
+ # memory_panda = memory_panda.append(memory_string, ignore_index=True)
194
+ #print(memory_panda.iloc[[0]])
195
+
196
+ #memory_panda.loc[len(memory_panda.index)] = ['prompt', outputstring]
197
+ #print(memory_panda.iloc[[1]])
198
+ #Logfile = Dataset.from_pandas(memory_panda)
199
+ #Logfile.push_to_hub("Namitg02/Logfile",token = HF_TOKEN)
200
+
201
+
202
  TITLE = "AI Copilot for Diabetes Patients"
203
 
204
+ DESCRIPTION = "I provide answers to concerns related to Diabetes"
205
 
206
  import gradio as gr
207
  # Design chatbot
 
223
  )
224
  # launch chatbot and calls the talk function which in turn calls other functions
225
  print("check3I")
226
+ print(historylog)
227
+ memory_panda = pd.DataFrame(historylog)
228
+ Logfile = Dataset.from_pandas(memory_panda)
229
+ Logfile.push_to_hub("Namitg02/Logfile",token = HF_TOKEN)
230
  demo.launch()