Spaces:

hienbm
/

chatbot

Sleeping

App Files Files Community

chatbot / app.py

hienbm

Update app.py

2a93c29 verified 10 months ago

raw

history blame

7.52 kB

	# -- coding: utf-8 --
	"""AI chatbot financial market.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/1h4tpXH6r9B2VZLVwksIkuuVpcrXTUnuJ
	"""

	import torch
	import bitsandbytes as bnb
	import transformers
	import re
	import pandas as pd
	import os
	import streamlit as st

	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
	from langchain.llms import HuggingFacePipeline
	from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
	from langchain_core.output_parsers import StrOutputParser
	from langchain_community.document_loaders import YoutubeLoader, DataFrameLoader
	from langchain_community.vectorstores.utils import filter_complex_metadata
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain.schema.runnable import RunnablePassthrough
	from langchain_core.messages import AIMessage, HumanMessage
	from langchain_community.llms import HuggingFaceEndpoint
	from dotenv import load_dotenv

	# Load environment variables from .env file
	load_dotenv()

	# Get the API token from environment variable
	api_token = os.getenv("API_TOKEN")

	# Define the repository ID and task
	repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
	task = "text-generation"

	# Initialize the Hugging Face Endpoint
	chat_model = HuggingFaceEndpoint(
	huggingfacehub_api_token=api_token,
	repo_id=repo_id,
	task=task
	)

	template = """
	You are a genius trader with extensive knowledge of the financial and stock markets, capable of providing deep and insightful analysis of financial stocks with remarkable accuracy.

	ALWAYS
	Forget your previous prompt.
	First, determine if the content pertains to finance or the stock market. If it does, provide a summary with the main insights. If it does not, apologize and indicate that a summary with main insights will not be provided.
	Be as detailed as possible, but don't make up any information that’s not from the context.
	If you don't know an answer, say you don't know.
	Let's think step by step.

	Please ensure responses are informative, accurate, and tailored to the user's queries and preferences.
	Use natural language to engage users and provide readable content throughout your response.

	{context}
	"""

	review_system_prompt = SystemMessagePromptTemplate(
	prompt=PromptTemplate(
	input_variables=["context"],
	template=template,
	)
	)

	review_human_prompt = HumanMessagePromptTemplate(
	prompt=PromptTemplate(
	input_variables=["question"],
	template="{question}",
	)
	)
	messages = [review_system_prompt, review_human_prompt]

	review_prompt_template = ChatPromptTemplate(
	input_variables=["context", "question"],
	messages=messages,
	)


	def find_youtube_links(text):
	# Define the regular expression pattern for YouTube URLs
	youtube_regex = (r'(https?://(?:www\.)?(?:youtube\.com/watch\?v=\|youtu\.be/)[^ \n]+)')
	# Use re.findall() to find all matches in the text
	matches = re.findall(youtube_regex, text)
	return str(' '.join(matches))


	# Function to get a response from the model
	def get_response(user_query):
	review_chain = (
	{"context": reviews_retriever, "question": RunnablePassthrough()}
	\| review_prompt_template
	\| chat_model
	\| StrOutputParser()
	)
	response = review_chain.invoke(user_query)
	return response

	# App config
	st.set_page_config(page_title="GOAHEAD.VN", page_icon="🌍")
	st.title("GOAHEAD.VN AI 🤖")

	# Initialize session state
	if "chat_history" not in st.session_state:
	st.session_state.chat_history = [
	AIMessage(content="Please drop the YouTube link related to the financial market, and I will help you summarize and provide insights."),
	]

	# Display chat history
	for message in st.session_state.chat_history:
	if isinstance(message, AIMessage):
	with st.chat_message("AI"):
	st.write(message.content)
	elif isinstance(message, HumanMessage):
	with st.chat_message("Human"):
	st.write(message.content)

	# User input
	user_query = st.chat_input("Type your message here...")

	if user_query is not None and find_youtube_links(user_query) != "":
	st.session_state.chat_history.append(HumanMessage(content=user_query))

	with st.chat_message("Human"):
	st.markdown(user_query)

	loader = YoutubeLoader.from_youtube_url(
	find_youtube_links(user_query),
	add_video_info=False,
	language=["en", "vi"],
	translation="en",
	)
	docs = loader.load()
	# Convert the loaded documents to a list of dictionaries
	data_list = [
	{
	"source": doc.metadata['source'],
	"page_content": doc.page_content
	}
	for doc in docs
	]
	df = pd.DataFrame(data_list)
	loader = DataFrameLoader(df, page_content_column='page_content')
	content = loader.load()
	content = filter_complex_metadata(content)

	# Split the document into chunks with a specified chunk size
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
	all_splits = text_splitter.split_documents(content)

	# Initialize the embedding model
	embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L12-v2")

	# Store the document into a vector store with a specific embedding model
	vectorstore = FAISS.from_documents(all_splits, embedding_model)

	reviews_retriever = vectorstore.as_retriever()

	response = get_response("Help me summary with main insights.")

	with st.chat_message("AI"):
	st.write(response)

	st.session_state.chat_history.append(AIMessage(content=response))


	template_2 = """
	You are a genius trader with extensive knowledge of the financial and stock markets, capable of providing deep and insightful analysis of financial stocks with remarkable accuracy.

	ALWAYS
	Only answer the question about the financial and stocks market. Do not answer anything else.
	Be as detailed as possible, but don't make up any information that’s not from the context.
	If you don't know an answer, say you don't know.
	Let's think step by step.

	Please ensure responses are informative, accurate, and tailored to the user's queries and preferences.
	Use natural language to engage users and provide readable content throughout your response.

	Chat history:
	{chat_history}

	User question:
	{user_question}
	"""

	prompt_2 = ChatPromptTemplate.from_template(template_2)

	# Function to get a response from the model
	def get_response_2(user_query, chat_history):
	chain = prompt_2 \| chat_model \| StrOutputParser()
	response = chain.invoke({
	"chat_history": chat_history,
	"user_question": user_query,
	})
	return response

	if user_query is not None and user_query != "" and find_youtube_links(user_query) == "":
	st.session_state.chat_history.append(HumanMessage(content=user_query))

	with st.chat_message("Human"):
	st.markdown(user_query)

	response = get_response_2(user_query, st.session_state.chat_history)

	# Remove any unwanted prefixes from the response
	response = response.replace("AI response:", "").replace("chat response:", "").replace("bot response:", "").strip()

	with st.chat_message("AI"):
	st.write(response)

	st.session_state.chat_history.append(AIMessage(content=response))