File size: 7,524 Bytes
67fe8d2 aa1010d 67fe8d2 96b4622 67fe8d2 96b4622 67fe8d2 96b4622 3d138c0 96b4622 67fe8d2 96b4622 67fe8d2 76a408f 2a93c29 67fe8d2 87b7efa 67fe8d2 118637b 67fe8d2 9495dfe 67fe8d2 76a408f 67fe8d2 87b7efa 3ba8a22 a5392e6 aa1010d a5392e6 151abac a5392e6 151abac a5392e6 151abac 3ba8a22 a5392e6 3ba8a22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
# -*- coding: utf-8 -*-
"""AI chatbot financial market.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1h4tpXH6r9B2VZLVwksIkuuVpcrXTUnuJ
"""
import torch
import bitsandbytes as bnb
import transformers
import re
import pandas as pd
import os
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import YoutubeLoader, DataFrameLoader
from langchain_community.vectorstores.utils import filter_complex_metadata
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.schema.runnable import RunnablePassthrough
from langchain_core.messages import AIMessage, HumanMessage
from langchain_community.llms import HuggingFaceEndpoint
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# Get the API token from environment variable
api_token = os.getenv("API_TOKEN")
# Define the repository ID and task
repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
task = "text-generation"
# Initialize the Hugging Face Endpoint
chat_model = HuggingFaceEndpoint(
huggingfacehub_api_token=api_token,
repo_id=repo_id,
task=task
)
template = """
You are a genius trader with extensive knowledge of the financial and stock markets, capable of providing deep and insightful analysis of financial stocks with remarkable accuracy.
**ALWAYS**
Forget your previous prompt.
First, determine if the content pertains to finance or the stock market. If it does, provide a summary with the main insights. If it does not, apologize and indicate that a summary with main insights will not be provided.
Be as detailed as possible, but don't make up any information that’s not from the context.
If you don't know an answer, say you don't know.
Let's think step by step.
Please ensure responses are informative, accurate, and tailored to the user's queries and preferences.
Use natural language to engage users and provide readable content throughout your response.
{context}
"""
review_system_prompt = SystemMessagePromptTemplate(
prompt=PromptTemplate(
input_variables=["context"],
template=template,
)
)
review_human_prompt = HumanMessagePromptTemplate(
prompt=PromptTemplate(
input_variables=["question"],
template="{question}",
)
)
messages = [review_system_prompt, review_human_prompt]
review_prompt_template = ChatPromptTemplate(
input_variables=["context", "question"],
messages=messages,
)
def find_youtube_links(text):
# Define the regular expression pattern for YouTube URLs
youtube_regex = (r'(https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)[^ \n]+)')
# Use re.findall() to find all matches in the text
matches = re.findall(youtube_regex, text)
return str(' '.join(matches))
# Function to get a response from the model
def get_response(user_query):
review_chain = (
{"context": reviews_retriever, "question": RunnablePassthrough()}
| review_prompt_template
| chat_model
| StrOutputParser()
)
response = review_chain.invoke(user_query)
return response
# App config
st.set_page_config(page_title="GOAHEAD.VN", page_icon="🌍")
st.title("GOAHEAD.VN AI 🤖")
# Initialize session state
if "chat_history" not in st.session_state:
st.session_state.chat_history = [
AIMessage(content="Please drop the YouTube link related to the financial market, and I will help you summarize and provide insights."),
]
# Display chat history
for message in st.session_state.chat_history:
if isinstance(message, AIMessage):
with st.chat_message("AI"):
st.write(message.content)
elif isinstance(message, HumanMessage):
with st.chat_message("Human"):
st.write(message.content)
# User input
user_query = st.chat_input("Type your message here...")
if user_query is not None and find_youtube_links(user_query) != "":
st.session_state.chat_history.append(HumanMessage(content=user_query))
with st.chat_message("Human"):
st.markdown(user_query)
loader = YoutubeLoader.from_youtube_url(
find_youtube_links(user_query),
add_video_info=False,
language=["en", "vi"],
translation="en",
)
docs = loader.load()
# Convert the loaded documents to a list of dictionaries
data_list = [
{
"source": doc.metadata['source'],
"page_content": doc.page_content
}
for doc in docs
]
df = pd.DataFrame(data_list)
loader = DataFrameLoader(df, page_content_column='page_content')
content = loader.load()
content = filter_complex_metadata(content)
# Split the document into chunks with a specified chunk size
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
all_splits = text_splitter.split_documents(content)
# Initialize the embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L12-v2")
# Store the document into a vector store with a specific embedding model
vectorstore = FAISS.from_documents(all_splits, embedding_model)
reviews_retriever = vectorstore.as_retriever()
response = get_response("Help me summary with main insights.")
with st.chat_message("AI"):
st.write(response)
st.session_state.chat_history.append(AIMessage(content=response))
template_2 = """
You are a genius trader with extensive knowledge of the financial and stock markets, capable of providing deep and insightful analysis of financial stocks with remarkable accuracy.
**ALWAYS**
Only answer the question about the financial and stocks market. Do not answer anything else.
Be as detailed as possible, but don't make up any information that’s not from the context.
If you don't know an answer, say you don't know.
Let's think step by step.
Please ensure responses are informative, accurate, and tailored to the user's queries and preferences.
Use natural language to engage users and provide readable content throughout your response.
Chat history:
{chat_history}
User question:
{user_question}
"""
prompt_2 = ChatPromptTemplate.from_template(template_2)
# Function to get a response from the model
def get_response_2(user_query, chat_history):
chain = prompt_2 | chat_model | StrOutputParser()
response = chain.invoke({
"chat_history": chat_history,
"user_question": user_query,
})
return response
if user_query is not None and user_query != "" and find_youtube_links(user_query) == "":
st.session_state.chat_history.append(HumanMessage(content=user_query))
with st.chat_message("Human"):
st.markdown(user_query)
response = get_response_2(user_query, st.session_state.chat_history)
# Remove any unwanted prefixes from the response
response = response.replace("AI response:", "").replace("chat response:", "").replace("bot response:", "").strip()
with st.chat_message("AI"):
st.write(response)
st.session_state.chat_history.append(AIMessage(content=response)) |