from datasets import load_dataset dataset = load_dataset("Namitg02/Test") print(dataset) from langchain.docstore.document import Document as LangchainDocument from langchain.text_splitter import RecursiveCharacterTextSplitter splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15,separators=["\n\n", "\n", " ", ""]) docs = splitter.create_documents(str(dataset)) from langchain_community.embeddings import HuggingFaceEmbeddings embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") from langchain_community.vectorstores import Chroma persist_directory = 'docs/chroma/' vectordb = Chroma.from_documents( documents=docs, embedding=embedding_model, persist_directory=persist_directory ) retriever = vectordb.as_retriever( search_type="similarity", search_kwargs={"k": 2} ) from langchain.prompts import PromptTemplate from langchain.chains import ConversationalRetrievalChain from langchain.memory import ConversationBufferMemory memory = ConversationBufferMemory( memory_key="chat_history", return_messages=True ) from transformers import pipeline from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline from langchain_core.messages import SystemMessage from langchain_core.prompts import HumanMessagePromptTemplate from langchain_core.prompts import ChatPromptTemplate from langchain.prompts import PromptTemplate import time print("check1") question = "How can I reverse Diabetes?" SYS_PROMPT = """You are an assistant for answering questions. You are given the extracted parts of a long document and a question. Provide a conversational answer. If you don't know the answer, just say "I do not know." Don't make up an answer.""" print("check2") from transformers import AutoTokenizer from transformers import AutoModelForCausalLM, TextIteratorStreamer from threading import Thread llm_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" tokenizer = AutoTokenizer.from_pretrained(llm_model,token=token) model = AutoModelForCausalLM.from_pretrained(llm_model,token=token) #pipe = pipeline(model = llm_model, tokenizer = tokenizer, task = "text-generation", temperature=0.5) terminators = [ tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>") ] def search(query: str, k: int = 3 ): """a function that embeds a new query and returns the most probable results""" embedded_query = embedding_model.encode(query) # embed new query scores, retrieved_examples = data.get_nearest_examples( # retrieve results "embeddings", embedded_query, # compare our new embedded query with the dataset embeddings k=k # get only top k results ) return scores, retrieved_examples print("check2A") def format_prompt(prompt,retrieved_documents,k): """using the retrieved documents we will prompt the model to generate our responses""" PROMPT = f"Question:{prompt}\nContext:" for idx in range(k) : PROMPT+= f"{retrieved_documents['text'][idx]}\n" return PROMPT print("check3") print("check3A") def talk(prompt,history): k = 1 # number of retrieved documents scores , retrieved_documents = search(prompt, k) formatted_prompt = format_prompt(prompt,retrieved_documents,k) formatted_prompt = formatted_prompt[:2000] # to avoid GPU OOM messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}] # tell the model to generate input_ids = tokenizer.apply_chat_template( messages, add_generation_prompt=True, return_tensors="pt" ).to(model.device) outputs = model.generate( input_ids, max_new_tokens=1024, eos_token_id=terminators, do_sample=True, temperature=0.6, top_p=0.9, ) streamer = TextIteratorStreamer( tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True ) generate_kwargs = dict( input_ids= input_ids, streamer=streamer, max_new_tokens=1024, do_sample=True, top_p=0.95, temperature=0.75, eos_token_id=terminators, ) t = Thread(target=model.generate, kwargs=generate_kwargs) t.start() outputs = [] for text in streamer: outputs.append(text) print(outputs) yield "".join(outputs) print("check3B") TITLE = "AI Copilot for Diabetes Patients" DESCRIPTION = "" import gradio as gr demo = gr.ChatInterface( fn=talk, chatbot=gr.Chatbot( show_label=True, show_share_button=True, show_copy_button=True, likeable=True, layout="bubble", bubble_full_width=False, ), theme="Soft", examples=[["what is Diabetes? "]], title=TITLE, description=DESCRIPTION, ) demo.launch(debug=True) print("check4")