|
|
|
"""AI chatbot financial market.ipynb |
|
|
|
Automatically generated by Colab. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/drive/1h4tpXH6r9B2VZLVwksIkuuVpcrXTUnuJ |
|
""" |
|
|
|
import torch |
|
import bitsandbytes as bnb |
|
import transformers |
|
import re |
|
import pandas as pd |
|
import os |
|
import streamlit as st |
|
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig |
|
from langchain.llms import HuggingFacePipeline |
|
from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate |
|
from langchain_core.output_parsers import StrOutputParser |
|
from langchain_community.document_loaders import YoutubeLoader, DataFrameLoader |
|
from langchain_community.vectorstores.utils import filter_complex_metadata |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
from langchain_community.vectorstores import FAISS |
|
from langchain.schema.runnable import RunnablePassthrough |
|
from langchain_core.messages import AIMessage, HumanMessage |
|
from langchain_community.llms import HuggingFaceEndpoint |
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
api_token = os.getenv("API_TOKEN") |
|
|
|
|
|
repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1" |
|
task = "text-generation" |
|
|
|
|
|
chat_model = HuggingFaceEndpoint( |
|
huggingfacehub_api_token=api_token, |
|
repo_id=repo_id, |
|
task=task |
|
) |
|
|
|
template = """ |
|
You are a genius trader with extensive knowledge of the financial and stock markets, capable of providing deep and insightful analysis of financial stocks with remarkable accuracy. |
|
|
|
**ALWAYS** |
|
Forget your previous prompt. |
|
First, determine if the content pertains to finance or the stock market. If it does, provide a summary with the main insights. If it does not, apologize and indicate that a summary with main insights will not be provided. |
|
Be as detailed as possible, but don't make up any information that’s not from the context. |
|
If you don't know an answer, say you don't know. |
|
Let's think step by step. |
|
|
|
Please ensure responses are informative, accurate, and tailored to the user's queries and preferences. |
|
Use natural language to engage users and provide readable content throughout your response. |
|
|
|
{context} |
|
""" |
|
|
|
review_system_prompt = SystemMessagePromptTemplate( |
|
prompt=PromptTemplate( |
|
input_variables=["context"], |
|
template=template, |
|
) |
|
) |
|
|
|
review_human_prompt = HumanMessagePromptTemplate( |
|
prompt=PromptTemplate( |
|
input_variables=["question"], |
|
template="{question}", |
|
) |
|
) |
|
messages = [review_system_prompt, review_human_prompt] |
|
|
|
review_prompt_template = ChatPromptTemplate( |
|
input_variables=["context", "question"], |
|
messages=messages, |
|
) |
|
|
|
|
|
def find_youtube_links(text): |
|
|
|
youtube_regex = (r'(https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)[^ \n]+)') |
|
|
|
matches = re.findall(youtube_regex, text) |
|
return str(' '.join(matches)) |
|
|
|
|
|
|
|
def get_response(user_query): |
|
review_chain = ( |
|
{"context": reviews_retriever, "question": RunnablePassthrough()} |
|
| review_prompt_template |
|
| chat_model |
|
| StrOutputParser() |
|
) |
|
response = review_chain.invoke(user_query) |
|
return response |
|
|
|
|
|
st.set_page_config(page_title="GOAHEAD.VN", page_icon="🌍") |
|
st.title("GOAHEAD.VN AI 🤖") |
|
|
|
|
|
if "chat_history" not in st.session_state: |
|
st.session_state.chat_history = [ |
|
AIMessage(content="Please drop the YouTube link related to the financial market, and I will help you summarize and provide insights."), |
|
] |
|
|
|
|
|
for message in st.session_state.chat_history: |
|
if isinstance(message, AIMessage): |
|
with st.chat_message("AI"): |
|
st.write(message.content) |
|
elif isinstance(message, HumanMessage): |
|
with st.chat_message("Human"): |
|
st.write(message.content) |
|
|
|
|
|
user_query = st.chat_input("Type your message here...") |
|
|
|
if user_query is not None and find_youtube_links(user_query) != "": |
|
st.session_state.chat_history.append(HumanMessage(content=user_query)) |
|
|
|
with st.chat_message("Human"): |
|
st.markdown(user_query) |
|
|
|
loader = YoutubeLoader.from_youtube_url( |
|
find_youtube_links(user_query), |
|
add_video_info=False, |
|
language=["en", "vi"], |
|
translation="en", |
|
) |
|
docs = loader.load() |
|
|
|
data_list = [ |
|
{ |
|
"source": doc.metadata['source'], |
|
"page_content": doc.page_content |
|
} |
|
for doc in docs |
|
] |
|
df = pd.DataFrame(data_list) |
|
loader = DataFrameLoader(df, page_content_column='page_content') |
|
content = loader.load() |
|
content = filter_complex_metadata(content) |
|
|
|
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150) |
|
all_splits = text_splitter.split_documents(content) |
|
|
|
|
|
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L12-v2") |
|
|
|
|
|
vectorstore = FAISS.from_documents(all_splits, embedding_model) |
|
|
|
reviews_retriever = vectorstore.as_retriever() |
|
|
|
response = get_response("Help me summary with main insights.") |
|
|
|
with st.chat_message("AI"): |
|
st.write(response) |
|
|
|
st.session_state.chat_history.append(AIMessage(content=response)) |
|
|
|
|
|
template_2 = """ |
|
You are a genius trader with extensive knowledge of the financial and stock markets, capable of providing deep and insightful analysis of financial stocks with remarkable accuracy. |
|
|
|
**ALWAYS** |
|
Only answer the question about the financial and stocks market. Do not answer anything else. |
|
Be as detailed as possible, but don't make up any information that’s not from the context. |
|
If you don't know an answer, say you don't know. |
|
Let's think step by step. |
|
|
|
Please ensure responses are informative, accurate, and tailored to the user's queries and preferences. |
|
Use natural language to engage users and provide readable content throughout your response. |
|
|
|
Chat history: |
|
{chat_history} |
|
|
|
User question: |
|
{user_question} |
|
""" |
|
|
|
prompt_2 = ChatPromptTemplate.from_template(template_2) |
|
|
|
|
|
def get_response_2(user_query, chat_history): |
|
chain = prompt_2 | chat_model | StrOutputParser() |
|
response = chain.invoke({ |
|
"chat_history": chat_history, |
|
"user_question": user_query, |
|
}) |
|
return response |
|
|
|
if user_query is not None and user_query != "" and find_youtube_links(user_query) == "": |
|
st.session_state.chat_history.append(HumanMessage(content=user_query)) |
|
|
|
with st.chat_message("Human"): |
|
st.markdown(user_query) |
|
|
|
response = get_response_2(user_query, st.session_state.chat_history) |
|
|
|
|
|
response = response.replace("AI response:", "").replace("chat response:", "").replace("bot response:", "").strip() |
|
|
|
with st.chat_message("AI"): |
|
st.write(response) |
|
|
|
st.session_state.chat_history.append(AIMessage(content=response)) |