Spaces:
Runtime error
Runtime error
File size: 2,110 Bytes
2063044 c511b8b 18b0534 c511b8b 18b0534 c511b8b 2063044 10251fa e5a3ee6 2063044 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA
from transformers import AutoTokenizer
import pickle
import os
from glob import glob
files = glob("./shakespeare/**/*.html")
import shutil
import os
os.mkdir('./data')
destination_folder = './data/'
for html_file in files:
shutil.move(html_file, destination_folder + html_file.split("/")[-1])
from langchain.document_loaders import BSHTMLLoader, DirectoryLoader
bshtml_dir_loader = DirectoryLoader('./data/', loader_cls=BSHTMLLoader)
data = bshtml_dir_loader.load()
bloomz_tokenizer = AutoTokenizer.from_pretrained('bigscience/bloomz-1b7')
text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(bloomz_tokenizer, chunk_size=100, chunk_overlap=0, separator='\n')
documents = text_splitter.split_documents(data)
pip install sentence_transformers -q
embeddings = HuggingFaceEmbeddings()
persist_directory = "vector_db"
vectordb = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=persist_directory)
vectordb.persist()
vectordb = None
vectordb_persist = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
llm = HuggingFacePipeline.from_model_id(
model_id="bigscience/bloomz-1b7",
task="text-generation",
model_kwargs={"temperature" : 0, "max_length" : 500})
doc_retriever = vectordb_persist.as_retriever()
shakespeare_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=doc_retriever)
def make_inference(query):
inference = shakespeare_qa.run(query)
return inference
if __name__ == "__main__":
# make a gradio interface
import gradio as gr
gr.Interface(
make_inference,
gr.inputs.Textbox(lines=2, label="Query"),
gr.outputs.Textbox(label="Response"),
title="Ask_Shakespeare",
description="️building_w_llms_qa_Shakespeare allows you to inquire about the Shakespeare's plays.",
).launch() |