Spaces:

aiwithankit
/

llama

Runtime error

App Files Files Community

llama / app.py

aiwithankit

Update app.py

ec3c939 over 1 year ago

raw

history blame contribute delete

2.08 kB

	import logging
	import sys

	logging.basicConfig(stream=sys.stdout, level=logging.INFO)
	logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

	from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
	from llama_index.llms import HuggingFaceLLM

	documents = SimpleDirectoryReader("./data").load_data()

	from llama_index.prompts.prompts import SimpleInputPrompt


	system_prompt = "You are a Q&A assistant. Your goal is to answer questions as accurately as possible based on the instructions and context provided."



	# This will wrap the default prompts that are internal to llama-index
	query_wrapper_prompt = SimpleInputPrompt("<\|USER\|>{query_str}<\|ASSISTANT\|>")

	import torch

	llm = HuggingFaceLLM(
	context_window=4096,
	max_new_tokens=256,
	generate_kwargs={"temperature": 0.0, "do_sample": False},
	system_prompt=system_prompt,
	query_wrapper_prompt=query_wrapper_prompt,
	tokenizer_name="NousResearch/Llama-2-7b-hf",
	model_name="NousResearch/Llama-2-7b-hf",
	device_map="auto",
	# uncomment this if using CUDA to reduce memory usage
	# model_kwargs={"torch_dtype": torch.float16 , "load_in_8bit":False}
	)

	from langchain.embeddings.huggingface import HuggingFaceEmbeddings
	from llama_index import LangchainEmbedding, ServiceContext

	embed_model = LangchainEmbedding(
	HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
	)

	service_context = ServiceContext.from_defaults(
	chunk_size=1024,
	llm=llm,
	embed_model=embed_model
	)

	index = VectorStoreIndex.from_documents(documents, service_context=service_context)

	#query_engine = index.as_query_engine()
	#response = query_engine.query("what is the name of this document?")

	#print(response)


	import gradio as gr
	def random_response(message, history):
	query_engine = index.as_query_engine()
	response = query_engine.query("according to the document provided,"+message)
	print(response)
	return str(response)

	demo = gr.ChatInterface(random_response)
	if __name__ == "__main__":
	demo.queue().launch(debug=True)