Spaces:

vakodiya
/

news_research_tool_with_llama3_8b

Sleeping

App Files Files Community

news_research_tool_with_llama3_8b / app.py

vakodiya

Update app.py

d4b5c04 verified 11 months ago

raw

history blame contribute delete

4.21 kB

	import os
	import streamlit as st
	import pickle
	import time
	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
	from langchain.llms.base import LLM
	from langchain.chains import RetrievalQAWithSourcesChain
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.document_loaders import UnstructuredURLLoader
	from langchain.vectorstores import FAISS
	from langchain.embeddings import HuggingFaceEmbeddings
	from huggingface_hub import login

	# Login to Hugging Face
	login(os.getenv('HF_llama3chat8b'))

	class CustomHuggingFaceLLM(LLM):
	def __init__(self, model_name, temperature=0.7):

	# Configure 8-bit quantization using `BitsAndBytesConfig`
	quantization_config = BitsAndBytesConfig(
	load_in_8bit=True, # Enable 8-bit quantization
	llm_int8_enable_fp32_cpu_offload=True # Offload FP32 operations to CPU for further memory savings
	)

	self.model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu", quantization_config=quantization_config)
	self.tokenizer = AutoTokenizer.from_pretrained(model_name)
	self.temperature = temperature

	def _call(self, prompt, stop=None):
	input_ids = self.tokenizer.encode(prompt, return_tensors="pt")
	output = self.model.generate(
	input_ids,
	max_length=512,
	temperature=self.temperature,
	do_sample=True,
	top_p=0.95,
	top_k=3
	)
	generated_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
	return generated_text

	@property
	def _identifying_params(self):
	return {"model_name": self.model.config._name_or_path, "temperature": self.temperature}

	@property
	def _llm_type(self):
	return "custom_huggingface"



	main_directory = os.path.dirname(os.path.abspath(__file__))

	st.title("Web Page search Bot: Research Tool 📈")
	st.sidebar.title("Article URLs")

	urls = []
	for i in range(3):
	url = st.sidebar.text_input(f"URL {i+1}")
	urls.append(url)

	process_url_clicked = st.sidebar.button("Process URLs")
	file_path_faiss = "faiss_store.pkl"

	main_placeholder = st.empty()

	# Load a pre-trained embedding model
	embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
	llm = CustomHuggingFaceLLM(model_name="meta-llama/Meta-Llama-3.1-8B", temperature=0.6)
	if process_url_clicked:
	# load data
	loader = UnstructuredURLLoader(urls=urls)
	main_placeholder.text("Data Loading...Started...✅✅✅")
	data = loader.load()
	# split data
	# Do not include unnecessary separators like , and . It will reduce chunks too small.
	text_splitter = RecursiveCharacterTextSplitter(
	separators=['\n\n'],
	chunk_size=1000,
	chunk_overlap=100
	)
	main_placeholder.text("Text Splitter...Started...✅✅✅")
	docs = text_splitter.split_documents(data)

	# create embeddings and save it to FAISS index
	vectorstore_faiss = FAISS.from_documents(documents=docs,embedding=embedding_model)
	main_placeholder.text("Embedding Vector Started Building...✅✅✅")
	time.sleep(2)

	# Save the FAISS index to a pickle file
	with open(file_path_faiss, "wb") as f:
	pickle.dump(vectorstore_faiss, f)

	query = main_placeholder.text_input("Question: ")
	if query:
	if os.path.exists(file_path_faiss):
	with open(file_path_faiss, "rb") as f:
	vectorstore = pickle.load(f)
	chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), verbose=True) # type: ignore
	result = chain({"question": query}, return_only_outputs=True)
	# result will be a dictionary of this format --> {"answer": "", "sources": [] }
	st.header("Answer")
	st.write(result["answer"])

	# Display sources, if available
	sources = result.get("sources", "")
	if sources:
	st.subheader("Sources:")
	sources_list = sources.split("\n") # Split the sources by newline
	for source in sources_list:
	st.write(source)