Spaces:

kishorefafa
/

AutomationBot

Runtime error

App Files Files Community

AutomationBot / app.py

kishorefafa

remove colap

7795113 verified 10 months ago

raw

history blame contribute delete

3.29 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
	import gradio as gr
	import chromadb
	from langchain.document_loaders import PyPDFDirectoryLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import Chroma
	from langchain.chains import ConversationalRetrievalChain
	from langchain.memory import ConversationBufferMemory
	from langchain_huggingface import HuggingFacePipeline

	# Download the model from HuggingFace
	model_name = "anakin87/zephyr-7b-alpha-sharded"
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16
	)

	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.bfloat16,
	quantization_config=bnb_config
	)

	tokenizer = AutoTokenizer.from_pretrained(model_name)
	tokenizer.bos_token_id = 1 # Set beginning of sentence token id

	# Specify embedding model
	embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
	model_kwargs = {"device": "cpu"} # Using CPU since GPU is not available
	embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name, model_kwargs=model_kwargs)

	# Load the documents (replace this with your document loading logic)
	documents = ["Sample document text 1", "Sample document text 2"]

	# Split the documents into small chunks
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
	all_splits = text_splitter.split_documents(documents)

	# Embed document chunks
	vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="chroma_db")

	# Specify the retriever
	retriever = vectordb.as_retriever()

	# Build HuggingFace pipeline for using zephyr-7b-alpha
	pipeline = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	use_cache=True,
	device_map="auto",
	max_length=2048,
	do_sample=True,
	top_k=5,
	num_return_sequences=1,
	eos_token_id=tokenizer.eos_token_id,
	pad_token_id=tokenizer.eos_token_id,
	)

	# Specify the llm
	llm = HuggingFacePipeline(pipeline=pipeline)

	# Define the create_conversation function
	def create_conversation(query: str, chat_history: list) -> tuple:
	try:
	memory = ConversationBufferMemory(
	memory_key='chat_history',
	return_messages=False
	)
	qa_chain = ConversationalRetrievalChain.from_llm(
	llm=llm,
	retriever=retriever,
	memory=memory,
	get_chat_history=lambda h: h,
	)

	result = qa_chain({'question': query, 'chat_history': chat_history})
	chat_history.append((query, result['answer']))
	return '', chat_history

	except Exception as e:
	chat_history.append((query, e))
	return '', chat_history

	# Define the Gradio UI
	with gr.Blocks() as demo:
	chatbot = gr.Chatbot(label='My Chatbot')
	msg = gr.Textbox()
	clear = gr.ClearButton([msg, chatbot])

	msg.submit(create_conversation, [msg, chatbot], [msg, chatbot])

	# Launch the Gradio demo
	demo.launch()