Rulga commited on
Commit
8f4893e
·
1 Parent(s): 1b11787
Files changed (2) hide show
  1. app.py +137 -84
  2. requirements.txt +2 -0
app.py CHANGED
@@ -1,5 +1,9 @@
1
  import os
 
 
2
  import streamlit as st
 
 
3
  from dotenv import load_dotenv
4
  from langchain_groq import ChatGroq
5
  from langchain_huggingface import HuggingFaceEmbeddings
@@ -11,76 +15,84 @@ from langchain_core.output_parsers import StrOutputParser
11
  from langchain_core.runnables import RunnableLambda
12
  from requests.exceptions import RequestException, Timeout
13
 
14
- # Установка конфигурации страницы
15
  st.set_page_config(page_title="Legal Chatbot", page_icon="🤖")
16
 
17
- # Загрузка переменных окружения
18
  if os.path.exists(".env"):
19
  load_dotenv(verbose=True)
20
 
21
- # Загрузка API-ключей
22
  try:
23
  GROQ_API_KEY = st.secrets["GROQ_API_KEY"]
24
  USER_AGENT = st.secrets["USER_AGENT"]
25
  OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
 
 
26
  except FileNotFoundError:
27
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
28
  USER_AGENT = os.getenv("USER_AGENT")
29
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 
 
30
 
31
- # Проверка API-ключей
32
- if not all([GROQ_API_KEY, USER_AGENT, OPENAI_API_KEY]):
33
- st.error("Ошибка: Не все переменные окружения заданы.")
34
  st.stop()
35
 
36
- # Инициализация LLM
37
  try:
38
  llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.6, api_key=GROQ_API_KEY)
39
  except Exception as e:
40
- st.error(f"Ошибка инициализации LLM: {e}")
41
  st.stop()
42
 
43
- # Инициализация эмбеддингов
44
  embeddings_model = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large-instruct")
45
 
46
- # Список страниц для анализа
47
  urls = [
48
- "https://status.law",
49
  "https://status.law/about",
50
  "https://status.law/careers",
51
  "https://status.law/challenging-sanctions",
52
- "https://status.law/contact",
53
- "https://status.law/cross-border-banking-legal-issues",
54
- "https://status.law/extradition-defense",
55
- "https://status.law/international-prosecution-protection",
56
- "https://status.law/interpol-red-notice-removal",
57
- "https://status.law/practice-areas",
58
  "https://status.law/reputation-protection",
59
  "https://status.law/faq"
60
  ]
61
 
62
- # Путь к файлу векторного хранилища
63
- VECTOR_STORE_PATH = "vector_store"
 
64
 
65
- # Функция для создания базы знаний
66
  def build_knowledge_base():
67
  documents = []
68
  for url in urls:
69
  try:
70
  loader = WebBaseLoader(url)
71
  documents.extend(loader.load())
 
72
  except (RequestException, Timeout) as e:
73
- st.write(f"[ERROR] Ошибка загрузки страницы {url}: {e}")
74
-
75
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
76
  chunks = text_splitter.split_documents(documents)
77
-
 
78
  vector_store = FAISS.from_documents(chunks, embeddings_model)
79
  vector_store.save_local(VECTOR_STORE_PATH)
80
-
 
81
  return vector_store
82
 
83
- # Функция для загрузки базы знаний
84
  def load_knowledge_base():
85
  if os.path.exists(VECTOR_STORE_PATH):
86
  return FAISS.load_local(
@@ -90,66 +102,107 @@ def load_knowledge_base():
90
  )
91
  return None
92
 
93
- # Загружаем базу, если её нет в `st.session_state`
94
- if "vector_store" not in st.session_state or st.session_state.vector_store is None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  st.session_state.vector_store = load_knowledge_base()
96
 
97
- vector_store = st.session_state.vector_store
98
-
99
- # Если база знаний отсутствует, предлагаем её создать
100
- if vector_store is None:
101
- st.write("База знаний не найдена. Нажмите кнопку, чтобы создать её.")
102
- if st.button("Создать базу знаний"):
103
- with st.spinner("Создание базы знаний..."):
104
  st.session_state.vector_store = build_knowledge_base()
105
- st.success("База знаний успешно создана!")
106
- st.rerun() # Перезапуск приложения
107
  else:
108
- st.write("База знаний загружена. Вы можете задать вопрос.")
109
-
110
- # Промпт для бота
111
- template = """
112
- You are a helpful legal assistant that answers questions based on information from status.law.
113
- Answer accurately and concisely.
114
- Question: {question}
115
- Only use the provided context to answer the question.
116
- Context: {context}
117
- """
118
- prompt = PromptTemplate.from_template(template)
119
-
120
- # Инициализация цепочки обработки запроса
121
- if "chain" not in st.session_state:
122
- st.session_state.chain = (
123
- RunnableLambda(lambda x: {"context": x["context"], "question": x["question"]})
124
- | prompt
125
- | llm
126
- | StrOutputParser()
127
- )
128
- chain = st.session_state.chain
129
-
130
- # Поле для ввода вопроса
131
- user_input = st.text_input("Введите ваш вопрос:")
132
- if st.button("Отправить") and user_input:
133
- if st.session_state.vector_store: # Проверяем, что база знаний загружена
134
- retrieved_docs = st.session_state.vector_store.similarity_search(user_input)
135
- context_text = "\n\n".join([doc.page_content for doc in retrieved_docs])
136
-
137
- # Генерация ответа
138
- response = chain.invoke({"question": user_input, "context": context_text})
139
-
140
- # Сохранение истории сообщений
141
- if "message_history" not in st.session_state:
142
- st.session_state.message_history = []
143
- st.session_state.message_history.append({"question": user_input, "answer": response})
144
-
145
- # Вывод ответа
146
- st.write(response)
147
- else:
148
- st.error("Ошибка: база знаний не загружена.")
149
-
150
- # Вывод истории сообщений
151
- if "message_history" in st.session_state:
152
- st.write("### История сообщений")
153
- for msg in st.session_state.message_history:
154
- st.write(f"**User:** {msg['question']}")
155
- st.write(f"**Bot:** {msg['answer']}")
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import json
3
+ import smtplib
4
  import streamlit as st
5
+ from email.mime.multipart import MIMEMultipart
6
+ from email.mime.text import MIMEText
7
  from dotenv import load_dotenv
8
  from langchain_groq import ChatGroq
9
  from langchain_huggingface import HuggingFaceEmbeddings
 
15
  from langchain_core.runnables import RunnableLambda
16
  from requests.exceptions import RequestException, Timeout
17
 
18
+ # Streamlit page configuration
19
  st.set_page_config(page_title="Legal Chatbot", page_icon="🤖")
20
 
21
+ # Load environment variables
22
  if os.path.exists(".env"):
23
  load_dotenv(verbose=True)
24
 
25
+ # Load API keys
26
  try:
27
  GROQ_API_KEY = st.secrets["GROQ_API_KEY"]
28
  USER_AGENT = st.secrets["USER_AGENT"]
29
  OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
30
+ EMAIL_SENDER = st.secrets["EMAIL_SENDER"]
31
+ EMAIL_PASSWORD = st.secrets["EMAIL_PASSWORD"]
32
  except FileNotFoundError:
33
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
34
  USER_AGENT = os.getenv("USER_AGENT")
35
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
36
+ EMAIL_SENDER = os.getenv("EMAIL_SENDER")
37
+ EMAIL_PASSWORD = os.getenv("EMAIL_PASSWORD")
38
 
39
+ # Check if API keys are set
40
+ if not all([GROQ_API_KEY, USER_AGENT, OPENAI_API_KEY, EMAIL_SENDER, EMAIL_PASSWORD]):
41
+ st.error("Error: Missing required environment variables.")
42
  st.stop()
43
 
44
+ # Initialize LLM
45
  try:
46
  llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.6, api_key=GROQ_API_KEY)
47
  except Exception as e:
48
+ st.error(f"LLM initialization failed: {e}")
49
  st.stop()
50
 
51
+ # Initialize embeddings model
52
  embeddings_model = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large-instruct")
53
 
54
+ # List of website pages for knowledge base
55
  urls = [
56
+ "https://status.law",
57
  "https://status.law/about",
58
  "https://status.law/careers",
59
  "https://status.law/challenging-sanctions",
60
+ "https://status.law/contact",
61
+ "https://status.law/cross-border-banking-legal-issues",
62
+ "https://status.law/extradition-defense",
63
+ "https://status.law/international-prosecution-protection",
64
+ "https://status.law/interpol-red-notice-removal",
65
+ "https://status.law/practice-areas",
66
  "https://status.law/reputation-protection",
67
  "https://status.law/faq"
68
  ]
69
 
70
+ # Path to knowledge base storage
71
+ VECTOR_STORE_PATH = "storage/vector_store"
72
+ HISTORY_PATH = "storage/chat_history.json"
73
 
74
+ # Function to build knowledge base
75
  def build_knowledge_base():
76
  documents = []
77
  for url in urls:
78
  try:
79
  loader = WebBaseLoader(url)
80
  documents.extend(loader.load())
81
+ st.write(f"[INFO] Loaded content from {url}")
82
  except (RequestException, Timeout) as e:
83
+ st.write(f"[ERROR] Failed to load {url}: {e}")
84
+
85
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
86
  chunks = text_splitter.split_documents(documents)
87
+ st.write(f"[INFO] Split into {len(chunks)} chunks")
88
+
89
  vector_store = FAISS.from_documents(chunks, embeddings_model)
90
  vector_store.save_local(VECTOR_STORE_PATH)
91
+
92
+ st.write("[INFO] Knowledge base successfully created and saved")
93
  return vector_store
94
 
95
+ # Function to load existing knowledge base
96
  def load_knowledge_base():
97
  if os.path.exists(VECTOR_STORE_PATH):
98
  return FAISS.load_local(
 
102
  )
103
  return None
104
 
105
+ # Function to load chat history
106
+ def load_history():
107
+ if os.path.exists(HISTORY_PATH):
108
+ with open(HISTORY_PATH, "r", encoding="utf-8") as file:
109
+ return json.load(file)
110
+ return []
111
+
112
+ # Function to save chat history
113
+ def save_history(history):
114
+ with open(HISTORY_PATH, "w", encoding="utf-8") as file:
115
+ json.dump(history, file, ensure_ascii=False, indent=4)
116
+
117
+ # Function to send chat history via email
118
+ def send_email(recipient_email, subject, message):
119
+ try:
120
+ msg = MIMEMultipart()
121
+ msg["From"] = EMAIL_SENDER
122
+ msg["To"] = recipient_email
123
+ msg["Subject"] = subject
124
+ msg.attach(MIMEText(message, "plain"))
125
+
126
+ server = smtplib.SMTP("smtp.gmail.com", 587)
127
+ server.starttls()
128
+ server.login(EMAIL_SENDER, EMAIL_PASSWORD)
129
+ server.send_message(msg)
130
+ server.quit()
131
+
132
+ return True
133
+ except Exception as e:
134
+ st.error(f"Email sending error: {e}")
135
+ return False
136
+
137
+ # Load or create knowledge base
138
+ if "vector_store" not in st.session_state:
139
  st.session_state.vector_store = load_knowledge_base()
140
 
141
+ # Prompt to create knowledge base if missing
142
+ if st.session_state.vector_store is None:
143
+ st.write("Knowledge base not found. Click the button to generate it.")
144
+ if st.button("Generate Knowledge Base"):
145
+ with st.spinner("Building knowledge base..."):
 
 
146
  st.session_state.vector_store = build_knowledge_base()
147
+ st.success("Knowledge base successfully created!")
148
+ st.rerun()
149
  else:
150
+ st.write("Knowledge base loaded. You can ask questions.")
151
+
152
+ # Chatbot prompt template
153
+ template = """
154
+ You are a helpful legal assistant answering questions based on information from status.law.
155
+ Answer accurately and concisely.
156
+ Question: {question}
157
+ Only use the provided context to answer the question.
158
+ Context: {context}
159
+ """
160
+ prompt = PromptTemplate.from_template(template)
161
+
162
+ # Initialize processing chain
163
+ if "chain" not in st.session_state:
164
+ st.session_state.chain = (
165
+ RunnableLambda(lambda x: {"context": x["context"], "question": x["question"]})
166
+ | prompt
167
+ | llm
168
+ | StrOutputParser()
169
+ )
170
+
171
+ chain = st.session_state.chain
172
+
173
+ # Load chat history
174
+ if "message_history" not in st.session_state:
175
+ st.session_state.message_history = load_history()
176
+
177
+ # Chat input
178
+ user_input = st.text_input("Enter your question:")
179
+ if st.button("Send") and user_input:
180
+ vector_store = st.session_state.vector_store
181
+ retrieved_docs = vector_store.similarity_search(user_input)
182
+ context_text = "\n\n".join([doc.page_content for doc in retrieved_docs])
183
+
184
+ response = chain.invoke({"question": user_input, "context": context_text})
185
+
186
+ # Save to session and persist history
187
+ st.session_state.message_history.append({"question": user_input, "answer": response})
188
+ save_history(st.session_state.message_history)
189
+
190
+ st.write(response)
191
+
192
+ # Display chat history
193
+ if st.session_state.message_history:
194
+ st.write("### Chat History")
195
+ for msg in st.session_state.message_history:
196
+ st.write(f"**User:** {msg['question']}")
197
+ st.write(f"**Bot:** {msg['answer']}")
198
+
199
+ # Email history feature
200
+ recipient_email = st.text_input("Enter email to receive chat history:")
201
+ if st.button("Send History via Email"):
202
+ if st.session_state.message_history:
203
+ history_text = "\n\n".join([f"User: {msg['question']}\nBot: {msg['answer']}" for msg in st.session_state.message_history])
204
+ success = send_email(recipient_email, "Chat History", history_text)
205
+ if success:
206
+ st.success(f"Chat history sent to {recipient_email}!")
207
+ else:
208
+ st.warning("Chat history is empty.")
requirements.txt CHANGED
@@ -6,3 +6,5 @@ langchain-groq
6
  python-dotenv
7
  beautifulsoup4
8
  faiss-cpu
 
 
 
6
  python-dotenv
7
  beautifulsoup4
8
  faiss-cpu
9
+ requests
10
+ smtplib