Rulga commited on
Commit
9e16676
·
1 Parent(s): 67b9361

create main

Browse files
Files changed (1) hide show
  1. app.py +164 -41
app.py CHANGED
@@ -1,52 +1,175 @@
1
- def build_knowledge_base():
2
- import time
3
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  start_time = time.time()
5
- documents = []
6
- st.info("Starting knowledge base creation...")
7
-
8
- # Create progress bar
9
- progress_bar = st.progress(0)
10
- total_urls = len(urls)
11
 
12
- for idx, url in enumerate(urls):
13
- try:
14
- loader = WebBaseLoader(url)
15
- documents.extend(loader.load())
16
- st.write(f"✅ Loaded content from {url}")
17
- # Update progress bar
18
- progress_bar.progress((idx + 1) / total_urls)
19
- except (RequestException, Timeout) as e:
20
- st.write(f"❌ Error loading page {url}: {e}")
21
-
22
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
 
 
 
 
23
  chunks = text_splitter.split_documents(documents)
24
 
25
- # Show chunks info
26
- st.write(f"📄 Split into {len(chunks)} chunks")
27
-
28
- vector_store = FAISS.from_documents(chunks, embeddings_model)
29
  vector_store.save_local(VECTOR_STORE_PATH)
30
 
31
- # Calculate metrics
32
  end_time = time.time()
33
- time_taken = end_time - start_time
34
-
35
- # Calculate size of vector store directory
36
- total_size = 0
37
- for path, dirs, files in os.walk(VECTOR_STORE_PATH):
38
- for f in files:
39
- fp = os.path.join(path, f)
40
- total_size += os.path.getsize(fp)
41
-
42
- size_mb = total_size / (1024 * 1024)
43
 
44
- # Display completion message
45
  st.success(f"""
46
- Knowledge base creation completed:
47
- ⏱️ Time taken: {time_taken:.2f} seconds
48
- 💾 Size: {size_mb:.2f} MB
49
- 🔢 Total chunks: {len(chunks)}
50
  """)
51
 
52
- return vector_store
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import streamlit as st
4
+ from dotenv import load_dotenv
5
+ from langchain_groq import ChatGroq
6
+ from langchain_huggingface import HuggingFaceEmbeddings
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
9
+ from langchain_community.document_loaders import WebBaseLoader
10
+ from langchain_core.prompts import PromptTemplate
11
+ from langchain_core.output_parsers import StrOutputParser
12
+ from langchain_core.runnables import RunnableLambda
13
+ import smtplib
14
+ from email.mime.text import MIMEText
15
+ from email.mime.multipart import MIMEMultipart
16
+
17
+ # Базовая конфигурация страницы
18
+ st.set_page_config(page_title="Legal Assistant", page_icon="⚖️")
19
+ st.title("Legal Assistant")
20
+
21
+ # Путь для хранения базы знаний
22
+ VECTOR_STORE_PATH = "vector_store"
23
+
24
+ # URLs вашего сайта
25
+ urls = [
26
+ "https://status.law",
27
+ "https://status.law/about",
28
+ # ... остальные URLs ...
29
+ ]
30
+
31
+ # Загрузка секретов
32
+ try:
33
+ EMAIL_SENDER = st.secrets["EMAIL_SENDER"]
34
+ EMAIL_PASSWORD = st.secrets["EMAIL_PASSWORD"]
35
+ GROQ_API_KEY = st.secrets["GROQ_API_KEY"]
36
+ except Exception as e:
37
+ st.error("Error loading secrets. Please check your configuration.")
38
+ st.stop()
39
+
40
+ # Инициализация моделей
41
+ @st.cache_resource
42
+ def init_models():
43
+ llm = ChatGroq(
44
+ model_name="llama-3.3-70b-versatile",
45
+ temperature=0.6,
46
+ api_key=GROQ_API_KEY
47
+ )
48
+ embeddings = HuggingFaceEmbeddings(
49
+ model_name="intfloat/multilingual-e5-large-instruct"
50
+ )
51
+ return llm, embeddings
52
+
53
+ # Создание базы знаний
54
+ def build_knowledge_base(embeddings):
55
  start_time = time.time()
 
 
 
 
 
 
56
 
57
+ documents = []
58
+ with st.status("Loading website content...") as status:
59
+ for url in urls:
60
+ try:
61
+ loader = WebBaseLoader(url)
62
+ docs = loader.load()
63
+ documents.extend(docs)
64
+ status.update(label=f"Loaded {url}")
65
+ except Exception as e:
66
+ st.error(f"Error loading {url}: {str(e)}")
67
+
68
+ text_splitter = RecursiveCharacterTextSplitter(
69
+ chunk_size=500,
70
+ chunk_overlap=100
71
+ )
72
  chunks = text_splitter.split_documents(documents)
73
 
74
+ vector_store = FAISS.from_documents(chunks, embeddings)
 
 
 
75
  vector_store.save_local(VECTOR_STORE_PATH)
76
 
 
77
  end_time = time.time()
78
+ build_time = end_time - start_time
 
 
 
 
 
 
 
 
 
79
 
 
80
  st.success(f"""
81
+ Knowledge base created successfully:
82
+ - Time taken: {build_time:.2f} seconds
83
+ - Number of chunks: {len(chunks)}
 
84
  """)
85
 
86
+ return vector_store
87
+
88
+ # Отправка email
89
+ def send_chat_history(history):
90
+ try:
91
+ msg = MIMEMultipart()
92
+ msg['From'] = EMAIL_SENDER
93
+ msg['To'] = EMAIL_SENDER
94
+ msg['Subject'] = "Chat History Update"
95
+
96
+ body = "\n\n".join([
97
+ f"Q: {item['question']}\nA: {item['answer']}"
98
+ for item in history
99
+ ])
100
+ msg.attach(MIMEText(body, 'plain'))
101
+
102
+ with smtplib.SMTP('smtp.gmail.com', 587) as server:
103
+ server.starttls()
104
+ server.login(EMAIL_SENDER, EMAIL_PASSWORD)
105
+ server.send_message(msg)
106
+ except Exception as e:
107
+ st.error(f"Failed to send email: {str(e)}")
108
+
109
+ # Основной код
110
+ def main():
111
+ # Инициализация моделей
112
+ llm, embeddings = init_models()
113
+
114
+ # Проверка существ��вания базы знаний
115
+ if not os.path.exists(VECTOR_STORE_PATH):
116
+ st.warning("Knowledge base not found.")
117
+ if st.button("Create Knowledge Base"):
118
+ vector_store = build_knowledge_base(embeddings)
119
+ st.session_state.vector_store = vector_store
120
+ st.rerun()
121
+ else:
122
+ if 'vector_store' not in st.session_state:
123
+ st.session_state.vector_store = FAISS.load_local(
124
+ VECTOR_STORE_PATH,
125
+ embeddings,
126
+ allow_dangerous_deserialization=True
127
+ )
128
+
129
+ # Режим чата
130
+ if 'vector_store' in st.session_state:
131
+ if 'messages' not in st.session_state:
132
+ st.session_state.messages = []
133
+
134
+ # Показ истории сообщений
135
+ for message in st.session_state.messages:
136
+ st.chat_message("user").write(message["question"])
137
+ st.chat_message("assistant").write(message["answer"])
138
+
139
+ # Ввод пользователя
140
+ if question := st.chat_input("Ask your question"):
141
+ st.chat_message("user").write(question)
142
+
143
+ # Поиск контекста и генерация ответа
144
+ with st.chat_message("assistant"):
145
+ with st.spinner("Thinking..."):
146
+ context = st.session_state.vector_store.similarity_search(question)
147
+ context_text = "\n".join([doc.page_content for doc in context])
148
+
149
+ prompt = PromptTemplate.from_template("""
150
+ You are a helpful and polite legal assistant. Answer the question based on the provided context.
151
+ If you cannot answer based on the context, say so politely.
152
+
153
+ Context: {context}
154
+ Question: {question}
155
+ """)
156
+
157
+ chain = prompt | llm | StrOutputParser()
158
+ response = chain.invoke({
159
+ "context": context_text,
160
+ "question": question
161
+ })
162
+
163
+ st.write(response)
164
+
165
+ # Сохранение истории
166
+ st.session_state.messages.append({
167
+ "question": question,
168
+ "answer": response
169
+ })
170
+
171
+ # Отправка email
172
+ send_chat_history(st.session_state.messages)
173
+
174
+ if __name__ == "__main__":
175
+ main()