|
from datasets import load_dataset |
|
dataset = load_dataset("Namitg02/Test") |
|
print(dataset) |
|
|
|
from langchain.docstore.document import Document as LangchainDocument |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15,separators=["\n\n", "\n", " ", ""]) |
|
docs = splitter.create_documents(str(dataset)) |
|
|
|
|
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") |
|
|
|
from langchain_community.vectorstores import Chroma |
|
persist_directory = 'docs/chroma/' |
|
|
|
vectordb = Chroma.from_documents( |
|
documents=docs, |
|
embedding=embedding_model, |
|
persist_directory=persist_directory |
|
) |
|
|
|
|
|
|
|
retriever = vectordb.as_retriever( |
|
search_type="similarity", search_kwargs={"k": 2} |
|
) |
|
|
|
|
|
from langchain.prompts import PromptTemplate |
|
from langchain.chains import ConversationalRetrievalChain |
|
from langchain.memory import ConversationBufferMemory |
|
|
|
memory = ConversationBufferMemory( |
|
memory_key="chat_history", |
|
return_messages=True |
|
) |
|
|
|
from transformers import pipeline |
|
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline |
|
from langchain_core.messages import SystemMessage |
|
from langchain_core.prompts import HumanMessagePromptTemplate |
|
from langchain_core.prompts import ChatPromptTemplate |
|
from langchain.prompts import PromptTemplate |
|
|
|
print("check1") |
|
question = "How can I reverse Diabetes?" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from transformers import AutoTokenizer |
|
from transformers import AutoModelForCausalLM |
|
|
|
llm_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" |
|
tokenizer = AutoTokenizer.from_pretrained(llm_model) |
|
model = AutoModelForCausalLM.from_pretrained(llm_model) |
|
pipe = pipeline(model = llm_model, tokenizer = tokenizer, task = "text-generation", temperature=0.5) |
|
|
|
|
|
|
|
|
|
|
|
|
|
import pandas as pd |
|
|
|
|
|
|
|
|
|
|
|
|
|
print("check2") |
|
|
|
|
|
|
|
print("result") |
|
|
|
print("check3") |
|
|
|
messages = [ |
|
{ |
|
"role": "system", |
|
"content": "You are a friendly chatbot who responds in the style of a doctor", |
|
}, |
|
{"role": "user", "content": "How can I reverse diabetes?"}, |
|
] |
|
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
|
chain = pipe(prompt, max_new_tokens=256, do_sample=True) |
|
print(chain[0]["generated_text"]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("check3A") |
|
|
|
print("check3B") |
|
|
|
import gradio as gr |
|
|
|
|
|
interface = gr.Interface.from_pipeline(chain).launch(share=True) |
|
|
|
print("check4") |
|
|
|
print("check5") |