Namitg02 commited on
Commit
2a84894
·
verified ·
1 Parent(s): e4728d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -68
app.py CHANGED
@@ -1,55 +1,38 @@
1
  from datasets import load_dataset
2
- dataset = load_dataset("Namitg02/Test", split='train', streaming=False)
3
- print(dataset)
4
-
5
  from langchain.docstore.document import Document as LangchainDocument
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
- splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15,separators=["\n\n", "\n", " ", ""])
8
- docs = splitter.create_documents(str(dataset))
9
-
10
  from sentence_transformers import SentenceTransformer
11
  from langchain_community.embeddings import HuggingFaceEmbeddings
12
- embedding_model = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")
13
-
14
  from langchain_community.vectorstores import FAISS
15
- data = FAISS.from_texts(docs, embedding_model)
16
-
17
- #data = dataset["train"]
18
- data = data.add_faiss_index("embeddings") # column name that has the embeddings of the dataset
19
-
20
-
21
- from langchain_community.vectorstores import Chroma
22
- #persist_directory = 'docs/chroma/'
23
-
24
- #vectordb = Chroma.from_documents(
25
- # documents=docs,
26
- # embedding=embedding_model,
27
- # persist_directory=persist_directory
28
- #)
29
 
30
 
 
 
 
31
 
32
- #retriever = vectordb.as_retriever(
33
- # search_type="similarity", search_kwargs={"k": 2}
34
- #)
35
 
 
36
 
37
- from langchain.prompts import PromptTemplate
38
- from langchain.chains import ConversationalRetrievalChain
39
- from langchain.memory import ConversationBufferMemory
40
 
41
- memory = ConversationBufferMemory(
42
- memory_key="chat_history",
43
- return_messages=True
44
- )
45
 
46
- from transformers import pipeline
47
- from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
48
- from langchain_core.messages import SystemMessage
49
- from langchain_core.prompts import HumanMessagePromptTemplate
50
- from langchain_core.prompts import ChatPromptTemplate
51
- from langchain.prompts import PromptTemplate
52
- import time
53
 
54
 
55
  print("check1")
@@ -58,37 +41,36 @@ question = "How can I reverse Diabetes?"
58
  SYS_PROMPT = """You are an assistant for answering questions.
59
  You are given the extracted parts of a long document and a question. Provide a conversational answer.
60
  If you don't know the answer, just say "I do not know." Don't make up an answer."""
 
61
 
62
  print("check2")
63
 
64
 
65
-
66
- from transformers import AutoTokenizer
67
- from transformers import AutoModelForCausalLM
68
- from transformers import TextIteratorStreamer
69
- from threading import Thread
70
-
71
  llm_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
72
  tokenizer = AutoTokenizer.from_pretrained(llm_model)
 
73
  model = AutoModelForCausalLM.from_pretrained(llm_model)
74
- #pipe = pipeline(model = llm_model, tokenizer = tokenizer, task = "text-generation", temperature=0.5)
75
 
76
  terminators = [
77
- tokenizer.eos_token_id,
78
- tokenizer.convert_tokens_to_ids("<|eot_id|>")
79
  ]
80
-
81
 
82
 
83
  def search(query: str, k: int = 3 ):
84
  """a function that embeds a new query and returns the most probable results"""
85
- embedded_query = embedding_model.encode(query) # embed new query
86
  scores, retrieved_examples = data.get_nearest_examples( # retrieve results
87
  "embeddings", embedded_query, # compare our new embedded query with the dataset embeddings
88
  k=k # get only top k results
89
  )
90
  return scores, retrieved_examples
 
 
91
 
 
92
  print("check2A")
93
 
94
 
@@ -99,57 +81,73 @@ def format_prompt(prompt,retrieved_documents,k):
99
  PROMPT+= f"{retrieved_documents['text'][idx]}\n"
100
  return PROMPT
101
 
 
102
 
103
  print("check3")
104
-
105
 
106
  print("check3A")
107
 
108
 
109
  def talk(prompt,history):
110
  k = 1 # number of retrieved documents
111
- scores , retrieved_documents = search(prompt, k)
112
- formatted_prompt = format_prompt(prompt,retrieved_documents,k)
113
- formatted_prompt = formatted_prompt[:2000] # to avoid GPU OOM
114
- messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}]
115
- # tell the model to generate
 
116
  input_ids = tokenizer.apply_chat_template(
117
  messages,
118
  add_generation_prompt=True,
119
  return_tensors="pt"
120
  ).to(model.device)
 
 
 
121
  outputs = model.generate(
122
  input_ids,
123
- max_new_tokens=1024,
124
  eos_token_id=terminators,
125
  do_sample=True,
126
  temperature=0.6,
127
  top_p=0.9,
128
  )
 
 
 
 
 
 
129
  streamer = TextIteratorStreamer(
130
  tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
131
- )
 
 
 
 
132
  generate_kwargs = dict(
133
  input_ids= input_ids,
134
  streamer=streamer,
135
- max_new_tokens=1024,
136
  do_sample=True,
137
  top_p=0.95,
138
  temperature=0.75,
139
  eos_token_id=terminators,
140
  )
 
 
141
  t = Thread(target=model.generate, kwargs=generate_kwargs)
 
142
  t.start()
143
-
144
-
145
  outputs = []
146
  # for text in streamer:
147
  # outputs.append(text)
148
  # print(outputs)
149
  # yield "".join(outputs)
150
-
151
-
152
- print("check3B")
153
 
154
 
155
  TITLE = "AI Copilot for Diabetes Patients"
@@ -157,7 +155,7 @@ TITLE = "AI Copilot for Diabetes Patients"
157
  DESCRIPTION = ""
158
 
159
  import gradio as gr
160
-
161
  demo = gr.ChatInterface(
162
  fn=talk,
163
  chatbot=gr.Chatbot(
@@ -174,6 +172,6 @@ demo = gr.ChatInterface(
174
  description=DESCRIPTION,
175
 
176
  )
177
- demo.launch(debug=True)
178
-
179
- print("check4")
 
1
  from datasets import load_dataset
 
 
 
2
  from langchain.docstore.document import Document as LangchainDocument
3
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
 
 
4
  from sentence_transformers import SentenceTransformer
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
 
6
  from langchain_community.vectorstores import FAISS
7
+ from langchain.prompts import PromptTemplate
8
+ #from langchain.chains import ConversationalRetrievalChain
9
+ #from transformers import pipeline
10
+ #from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
11
+ #from langchain_core.messages import SystemMessage
12
+ import time
13
+ from transformers import AutoTokenizer
14
+ from transformers import AutoModelForCausalLM
15
+ from transformers import TextIteratorStreamer
16
+ from threading import Thread
 
 
 
 
17
 
18
 
19
+ dataset = load_dataset("Namitg02/Test", split='train', streaming=False)
20
+ print(dataset)
21
+ # Returns a list of dictionaries, each representing a row in the dataset.
22
 
23
+ splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15,separators=["\n\n", "\n", " ", ""])
24
+ docs = splitter.create_documents(str(dataset))
25
+ # Returns a list of documents
26
 
27
+ embedding_model = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")
28
 
29
+ data = FAISS.from_texts(docs, embedding_model)
 
 
30
 
31
+ # Returns a FAISS wrapper vector store. Input is a list of strings. from_documents method used documents to Return VectorStore
 
 
 
32
 
33
+ #data = dataset["train"]
34
+ data = data.add_faiss_index("embeddings")
35
+ # adds a column that has a index of embeddings
 
 
 
 
36
 
37
 
38
  print("check1")
 
41
  SYS_PROMPT = """You are an assistant for answering questions.
42
  You are given the extracted parts of a long document and a question. Provide a conversational answer.
43
  If you don't know the answer, just say "I do not know." Don't make up an answer."""
44
+ # Provides context of how to answer the question
45
 
46
  print("check2")
47
 
48
 
 
 
 
 
 
 
49
  llm_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
50
  tokenizer = AutoTokenizer.from_pretrained(llm_model)
51
+ # pulling tokeinzer for text generation model
52
  model = AutoModelForCausalLM.from_pretrained(llm_model)
53
+ # Initializing the text generation model
54
 
55
  terminators = [
56
+ tokenizer.eos_token_id, # End-of-Sequence Token that indicates where the model should consider the text sequence to be complete
57
+ tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
58
  ]
59
+ # indicates the end of a sequence
60
 
61
 
62
  def search(query: str, k: int = 3 ):
63
  """a function that embeds a new query and returns the most probable results"""
64
+ embedded_query = embedding_model.encode(query) # create embedding of a new query
65
  scores, retrieved_examples = data.get_nearest_examples( # retrieve results
66
  "embeddings", embedded_query, # compare our new embedded query with the dataset embeddings
67
  k=k # get only top k results
68
  )
69
  return scores, retrieved_examples
70
+ # returns scores (List[float]): the retrieval scores from either FAISS (IndexFlatL2 by default) and examples (dict) format
71
+ # called by talk function that passes prompt
72
 
73
+ print(score, retrieved_examples)
74
  print("check2A")
75
 
76
 
 
81
  PROMPT+= f"{retrieved_documents['text'][idx]}\n"
82
  return PROMPT
83
 
84
+ # Called by talk function to add retrieved documents to the prompt. Keeps adding text of retrieved documents to string taht are retreived
85
 
86
  print("check3")
87
+ print(PROMPT)
88
 
89
  print("check3A")
90
 
91
 
92
  def talk(prompt,history):
93
  k = 1 # number of retrieved documents
94
+ scores , retrieved_documents = search(prompt, k) # get retrival scores and examples in dictionary format based on the promt passed
95
+ formatted_prompt = format_prompt(prompt,retrieved_documents,k) # create a new prompt using the retrieved documents
96
+ formatted_prompt = formatted_prompt[:400] # to avoid memory issue
97
+ messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}] # binding the system context and new prompt for LLM
98
+ # the chat template structure should be based on text generation model format
99
+ print("check3B")
100
  input_ids = tokenizer.apply_chat_template(
101
  messages,
102
  add_generation_prompt=True,
103
  return_tensors="pt"
104
  ).to(model.device)
105
+ # tell the model to generate
106
+ # add_generation_prompt argument tells the template to add tokens that indicate the start of a bot response
107
+ print("check3C")
108
  outputs = model.generate(
109
  input_ids,
110
+ max_new_tokens=300,
111
  eos_token_id=terminators,
112
  do_sample=True,
113
  temperature=0.6,
114
  top_p=0.9,
115
  )
116
+ # calling the model to generate response based on message/ input
117
+ # do_sample if set to True uses strategies to select the next token from the probability distribution over the entire vocabulary
118
+ # temperature controls randomness. more renadomness with higher temperature
119
+ # only the tokens comprising the top_p probability mass are considered for responses
120
+ # This output is a data structure containing all the information returned by generate(), but that can also be used as tuple or dictionary.
121
+ print("check3D")
122
  streamer = TextIteratorStreamer(
123
  tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
124
+ )
125
+ # stores print-ready text in a queue, to be used by a downstream application as an iterator. removes specail tokens in generated text.
126
+ # timeout for text queue. tokenizer for decoding tokens
127
+ # called by generate_kwargs
128
+ print("check3E")
129
  generate_kwargs = dict(
130
  input_ids= input_ids,
131
  streamer=streamer,
132
+ max_new_tokens= 512,
133
  do_sample=True,
134
  top_p=0.95,
135
  temperature=0.75,
136
  eos_token_id=terminators,
137
  )
138
+ # send additional parameters to model for generation
139
+ print("check3F")
140
  t = Thread(target=model.generate, kwargs=generate_kwargs)
141
+ # to process multiple instances
142
  t.start()
143
+ # start a thread
144
+ print("check3G")
145
  outputs = []
146
  # for text in streamer:
147
  # outputs.append(text)
148
  # print(outputs)
149
  # yield "".join(outputs)
150
+ print("check3H")
 
 
151
 
152
 
153
  TITLE = "AI Copilot for Diabetes Patients"
 
155
  DESCRIPTION = ""
156
 
157
  import gradio as gr
158
+ # Design chatbot
159
  demo = gr.ChatInterface(
160
  fn=talk,
161
  chatbot=gr.Chatbot(
 
172
  description=DESCRIPTION,
173
 
174
  )
175
+ # launch chatbot and calls the talk function which in turn calls other functions
176
+ print("check3I")
177
+ demo.launch(debug=True)