|
from datasets import load_dataset |
|
from langchain.docstore.document import Document as LangchainDocument |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from sentence_transformers import SentenceTransformer |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
|
|
import faiss |
|
from langchain.prompts import PromptTemplate |
|
|
|
|
|
|
|
|
|
import time |
|
from transformers import AutoTokenizer |
|
from transformers import AutoModelForCausalLM |
|
from transformers import TextIteratorStreamer |
|
from threading import Thread |
|
|
|
|
|
|
|
|
|
|
|
dataset = load_dataset("epfl-llm/guidelines"",) |
|
|
|
#Returns a list of dictionaries, each representing a row in the dataset. |
|
#print(dataset[1]) |
|
# splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=25,separators=["\n"]) # ["\n\n", "\n", " ", ""]) |
|
|
|
|
|
#docs = splitter.create_documents(str(dataset)) |
|
# Returns a list of documents |
|
#print(docs) |
|
embedding_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1") |
|
#all-MiniLM-L6-v2, BAAI/bge-base-en-v1.5,infgrad/stella-base-en-v2, BAAI/bge-large-en-v1.5 working with default dimensions |
|
#docs_text = [doc.text for doc in docs] |
|
#embed = embedding_model.embed_documents(docs_text) |
|
embedding_dim = embedding_model.get_sentence_embedding_dimension() |
|
print(embedding_dim) |
|
#data = FAISS.from_embeddings(embed, embedding_model) |
|
#data = FAISS.from_texts(docs, embedding_model) |
|
|
|
# Returns a FAISS wrapper vector store. Input is a list of strings. from_documents method used documents to Return VectorStore |
|
|
|
data = dataset["clean_text"] |
|
#data = dataset["train"] |
|
|
|
#print(data) |
|
d = 384 # vectors dimension |
|
m = 32 # hnsw parameter. Higher is more accurate but takes more time to index (default is 32, 128 should be ok) |
|
#index = faiss.IndexHNSWFlat(d, m) |
|
#index = faiss.IndexFlatL2(embedding_dim) |
|
#data.add_faiss_index(embeddings.shape[1], custom_index=index) |
|
data.add_faiss_index("embeddings") |
|
# adds an index column that for the embeddings |
|
|
|
print("check1") |
|
#question = "How can I reverse Diabetes?" |
|
|
|
SYS_PROMPT = """You are an assistant for answering questions. |
|
You are given the extracted parts of a long document and a question. Provide a conversational answer. |
|
If you don't know the answer, just say "I do not know." Don't make up an answer.""" |
|
# Provides context of how to answer the question |
|
|
|
print("check2") |
|
|
|
|
|
llm_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" |
|
tokenizer = AutoTokenizer.from_pretrained(llm_model) |
|
# pulling tokeinzer for text generation model |
|
model = AutoModelForCausalLM.from_pretrained(llm_model) |
|
# Initializing the text generation model |
|
|
|
terminators = [ |
|
tokenizer.eos_token_id, # End-of-Sequence Token that indicates where the model should consider the text sequence to be complete |
|
tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary |
|
] |
|
# indicates the end of a sequence |
|
|
|
|
|
def search(query: str, k: int = 3 ): |
|
"""a function that embeds a new query and returns the most probable results""" |
|
embedded_query = embedding_model.encode(query) # create embedding of a new query |
|
scores, retrieved_examples = data.get_nearest_examples( # retrieve results |
|
"embeddings", embedded_query, # compare our new embedded query with the dataset embeddings |
|
k=k # get only top k results |
|
) |
|
return scores, retrieved_examples |
|
# returns scores (List[float]): the retrieval scores from either FAISS (IndexFlatL2 by default) and examples (dict) format |
|
# called by talk function that passes prompt |
|
|
|
#print(scores, retrieved_examples) |
|
print("check2A") |
|
|
|
|
|
def format_prompt(prompt,retrieved_documents,k): |
|
"""using the retrieved documents we will prompt the model to generate our responses""" |
|
PROMPT = f"Question:{prompt}\nContext:" |
|
for idx in range(k) : |
|
PROMPT+= f"{retrieved_documents['text'][idx]}\n" |
|
return PROMPT |
|
|
|
# Called by talk function to add retrieved documents to the prompt. Keeps adding text of retrieved documents to string taht are retreived |
|
|
|
print("check3") |
|
#print(PROMPT) |
|
|
|
print("check3A") |
|
|
|
|
|
def talk(prompt,history): |
|
k = 1 # number of retrieved documents |
|
scores , retrieved_documents = search(prompt, k) # get retrival scores and examples in dictionary format based on the promt passed |
|
formatted_prompt = format_prompt(prompt,retrieved_documents,k) # create a new prompt using the retrieved documents |
|
formatted_prompt = formatted_prompt[:400] # to avoid memory issue |
|
messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}] # binding the system context and new prompt for LLM |
|
# the chat template structure should be based on text generation model format |
|
print("check3B") |
|
input_ids = tokenizer.apply_chat_template( |
|
messages, |
|
add_generation_prompt=True, |
|
return_tensors="pt" |
|
).to(model.device) |
|
# tell the model to generate |
|
# add_generation_prompt argument tells the template to add tokens that indicate the start of a bot response |
|
print("check3C") |
|
outputs = model.generate( |
|
input_ids, |
|
max_new_tokens=300, |
|
eos_token_id=terminators, |
|
do_sample=True, |
|
temperature=0.6, |
|
top_p=0.9, |
|
) |
|
# calling the model to generate response based on message/ input |
|
# do_sample if set to True uses strategies to select the next token from the probability distribution over the entire vocabulary |
|
# temperature controls randomness. more renadomness with higher temperature |
|
# only the tokens comprising the top_p probability mass are considered for responses |
|
# This output is a data structure containing all the information returned by generate(), but that can also be used as tuple or dictionary. |
|
print("check3D") |
|
streamer = TextIteratorStreamer( |
|
tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True |
|
) |
|
# stores print-ready text in a queue, to be used by a downstream application as an iterator. removes specail tokens in generated text. |
|
# timeout for text queue. tokenizer for decoding tokens |
|
# called by generate_kwargs |
|
print("check3E") |
|
generate_kwargs = dict( |
|
input_ids= input_ids, |
|
streamer=streamer, |
|
max_new_tokens= 512, |
|
do_sample=True, |
|
top_p=0.95, |
|
temperature=0.75, |
|
eos_token_id=terminators, |
|
) |
|
# send additional parameters to model for generation |
|
print("check3F") |
|
t = Thread(target=model.generate, kwargs=generate_kwargs) |
|
# to process multiple instances |
|
t.start() |
|
# start a thread |
|
print("check3G") |
|
outputs = [] |
|
for text in streamer: |
|
outputs.append(text) |
|
print(outputs) |
|
yield "".join(outputs) |
|
print("check3H") |
|
|
|
|
|
TITLE = "AI Copilot for Diabetes Patients" |
|
|
|
DESCRIPTION = "" |
|
|
|
import gradio as gr |
|
# Design chatbot |
|
demo = gr.ChatInterface( |
|
fn=talk, |
|
chatbot=gr.Chatbot( |
|
show_label=True, |
|
show_share_button=True, |
|
show_copy_button=True, |
|
likeable=True, |
|
layout="bubble", |
|
bubble_full_width=False, |
|
), |
|
theme="Soft", |
|
examples=[["what is Diabetes? "]], |
|
title=TITLE, |
|
description=DESCRIPTION, |
|
|
|
) |
|
# launch chatbot and calls the talk function which in turn calls other functions |
|
print("check3I") |
|
demo.launch(share=True) |