Spaces:

Rulga
/

Doc-chat

Sleeping

App Files Files Community

Rulga commited on Feb 21

Commit

5539560

1 Parent(s): 07dd6be

Add chat history logging and legal assistant functionality

Browse files

Files changed (5) hide show

.gitattributes +0 -35
chat_analysis.py +137 -0
app - Copy.py → chat_history/app - Copy.py +242 -242
gitignore +0 -4
requirements.txt +2 -0

.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

chat_analysis.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import pandas as pd
+from datetime import datetime
+import json
+from typing import List, Dict
+from langchain_core.messages import HumanMessage, AIMessage
+from langchain_core.tracers import BaseTracer
+from dataclasses import dataclass, asdict
+import plotly.express as px
+import streamlit as st
+@dataclass
+class ChatAnalysis:
+    timestamp: str
+    user_input: str
+    bot_response: str
+    context: str
+    kb_version: str
+    response_time: float
+    tokens_used: int
+    context_relevance_score: float
+class ChatAnalyzer(BaseTracer):
+    def __init__(self):
+        super().__init__()
+        self.analyses: List[ChatAnalysis] = []
+    def load_logs(self, log_file_path: str) -> List[Dict]:
+        """Загрузка и парсинг логов чата из JSON файла"""
+        logs = []
+        with open(log_file_path, 'r', encoding='utf-8') as f:
+            for line in f:
+                try:
+                    logs.append(json.loads(line.strip()))
+                except json.JSONDecodeError:
+                    continue
+        return logs
+    def analyze_interaction(self, log_entry: Dict) -> ChatAnalysis:
+        """Анализ одного взаимодействия в чате"""
+        # Расчет базовых метрик
+        timestamp = datetime.fromisoformat(log_entry["timestamp"])
+        # Расчет времени ответа (можно заменить на реальную логику измерения)
+        response_time = len(log_entry["bot_response"]) * 0.01  # Простая аппроксимация
+        # Подсчет использованных токенов (заменить на реальный подсчет)
+        tokens_used = len(log_entry["bot_response"].split()) + len(log_entry["user_input"].split())
+        # Расчет релевантности контекста
+        context_relevance = self._calculate_context_relevance(
+            log_entry["user_input"],
+            log_entry["context"],
+            log_entry["bot_response"]
+        )
+        return ChatAnalysis(
+            timestamp=timestamp.isoformat(),
+            user_input=log_entry["user_input"],
+            bot_response=log_entry["bot_response"],
+            context=log_entry["context"],
+            kb_version=log_entry["kb_version"],
+            response_time=response_time,
+            tokens_used=tokens_used,
+            context_relevance_score=context_relevance
+        )
+    def _calculate_context_relevance(self, query: str, context: str, response: str) -> float:
+        """Расчет оценки релевантности между запросом и предоставленным контекстом"""
+        # Простая реализация - можно заменить на более сложную систему оценки
+        query_terms = set(query.lower().split())
+        context_terms = set(context.lower().split())
+        response_terms = set(response.lower().split())
+        query_context_overlap = len(query_terms & context_terms)
+        context_response_overlap = len(context_terms & response_terms)
+        if not query_terms or not context_terms:
+            return 0.0
+        return (query_context_overlap + context_response_overlap) / (len(query_terms) + len(context_terms))
+    def create_analysis_dashboard(self):
+        """Создание дашборда анализа чата в Streamlit"""
+        st.title("Панель анализа чата")
+        # Преобразование анализа в DataFrame
+        df = pd.DataFrame([asdict(a) for a in self.analyses])
+        # Базовая статистика
+        st.header("Обзор")
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            st.metric("Всего взаимодействий", len(df))
+        with col2:
+            st.metric("Среднее время ответа", f"{df['response_time'].mean():.2f}с")
+        with col3:
+            st.metric("Средняя релевантность контекста", f"{df['context_relevance_score'].mean():.2%}")
+        with col4:
+            st.metric("Всего использовано токенов", df['tokens_used'].sum())
+        # Анализ временных рядов
+        st.header("Тренды взаимодействий")
+        df['timestamp'] = pd.to_datetime(df['timestamp'])
+        fig = px.line(df, x='timestamp', y='response_time', title='Время ответа с течением времени')
+        st.plotly_chart(fig)
+        # Распределение релевантности контекста
+        fig = px.histogram(df, x='context_relevance_score',
+                         title='Распределение оценок релевантности контекста',
+                         nbins=20)
+        st.plotly_chart(fig)
+        # Детальные логи
+        st.header("Детальные логи взаимодействий")
+        st.dataframe(df)
+def setup_chat_analysis():
+    """Инициализация и настройка системы анализа чата"""
+    analyzer = ChatAnalyzer()
+    # Добавление к существующему логированию
+    def enhanced_log_interaction(user_input: str, bot_response: str, context: str):
+        # Ваш существующий код логирования
+        log_interaction(user_input, bot_response, context)
+        # Добавление анализа
+        log_entry = {
+            "timestamp": datetime.now().isoformat(),
+            "user_input": user_input,
+            "bot_response": bot_response,
+            "context": context,
+            "kb_version": st.session_state.kb_info['version']
+        }
+        analysis = analyzer.analyze_interaction(log_entry)
+        analyzer.analyses.append(analysis)
+    return analyzer, enhanced_log_interaction

app - Copy.py → chat_history/app - Copy.py RENAMED Viewed

@@ -1,242 +1,242 @@
-import os
-import time
-import streamlit as st
-from dotenv import load_dotenv
-from langchain_groq import ChatGroq
-from langchain_huggingface import HuggingFaceEmbeddings
-from langchain_community.vectorstores import FAISS
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-from langchain_community.document_loaders import WebBaseLoader
-from langchain_core.prompts import PromptTemplate
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.runnables import RunnableLambda
-import requests
-import json
-# Логирует взаимодействие в JSON-файл
-from datetime import datetime
-def log_interaction(user_input: str, bot_response: str):
-    """Логирует взаимодействие в JSON-файл"""
-    log_entry = {
-        "timestamp": datetime.now().isoformat(),
-        "user_input": user_input,
-        "bot_response": bot_response
-    }
-    log_dir = "chat_history"
-    os.makedirs(log_dir, exist_ok=True)
-    log_path = os.path.join(log_dir, "chat_logs.json")
-    with open(log_path, "a") as f:
-        f.write(json.dumps(log_entry) + "\n")
-#
-# Page configuration
-st.set_page_config(page_title="Status Law Assistant", page_icon="⚖️")
-# Knowledge base info in session_state
-if 'kb_info' not in st.session_state:
-    st.session_state.kb_info = {
-        'build_time': None,
-        'size': None
-    }
-# Display title and knowledge base info
-# st.title("www.Status.Law Legal Assistant")
-st.markdown(
-    '''
-    <h1>
-        ⚖️
-        <a href="https://status.law/" style="text-decoration: underline; color: blue; font-size: inherit;">
-            Status.Law
-        </a>
-        Legal Assistant
-    </h1>
-    ''',
-    unsafe_allow_html=True
-)
-if st.session_state.kb_info['build_time'] and st.session_state.kb_info['size']:
-    st.caption(f"(Knowledge base build time: {st.session_state.kb_info['build_time']:.2f} seconds, "
-               f"size: {st.session_state.kb_info['size']:.2f} MB)")
-# Path to store vector database
-VECTOR_STORE_PATH = "vector_store"
-# Создание папки истории, если она не существует
-if not os.path.exists("chat_history"):
-    os.makedirs("chat_history")
-# Website URLs
-urls = [
-    "https://status.law",
-    "https://status.law/about",
-    "https://status.law/careers",
-    "https://status.law/tariffs-for-services-of-protection-against-extradition",
-    "https://status.law/challenging-sanctions",
-    "https://status.law/law-firm-contact-legal-protection"
-    "https://status.law/cross-border-banking-legal-issues",
-    "https://status.law/extradition-defense",
-    "https://status.law/international-prosecution-protection",
-    "https://status.law/interpol-red-notice-removal",
-    "https://status.law/practice-areas",
-    "https://status.law/reputation-protection",
-    "https://status.law/faq"
-]
-# Load secrets
-try:
-    GROQ_API_KEY = st.secrets["GROQ_API_KEY"]
-except Exception as e:
-    st.error("Error loading secrets. Please check your configuration.")
-    st.stop()
-# Initialize models
-@st.cache_resource
-def init_models():
-    llm = ChatGroq(
-        model_name="llama-3.3-70b-versatile",
-        temperature=0.6,
-        api_key=GROQ_API_KEY
-    )
-    embeddings = HuggingFaceEmbeddings(
-        model_name="intfloat/multilingual-e5-large-instruct"
-    )
-    return llm, embeddings
-# Build knowledge base
-def build_knowledge_base(embeddings):
-    start_time = time.time()
-    documents = []
-    with st.status("Loading website content...") as status:
-        for url in urls:
-            try:
-                loader = WebBaseLoader(url)
-                docs = loader.load()
-                documents.extend(docs)
-                status.update(label=f"Loaded {url}")
-            except Exception as e:
-                st.error(f"Error loading {url}: {str(e)}")
-    text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=500,
-        chunk_overlap=100
-    )
-    chunks = text_splitter.split_documents(documents)
-    vector_store = FAISS.from_documents(chunks, embeddings)
-    vector_store.save_local(VECTOR_STORE_PATH)
-    end_time = time.time()
-    build_time = end_time - start_time
-    # Calculate knowledge base size
-    total_size = 0
-    for path, dirs, files in os.walk(VECTOR_STORE_PATH):
-        for f in files:
-            fp = os.path.join(path, f)
-            total_size += os.path.getsize(fp)
-    size_mb = total_size / (1024 * 1024)
-    # Save knowledge base info
-    st.session_state.kb_info['build_time'] = build_time
-    st.session_state.kb_info['size'] = size_mb
-    st.success(f"""
-    Knowledge base created successfully:
-    - Time taken: {build_time:.2f} seconds
-    - Size: {size_mb:.2f} MB
-    - Number of chunks: {len(chunks)}
-    """)
-    return vector_store
-# Main function
-def main():
-    # Initialize models
-    llm, embeddings = init_models()
-    # Check if knowledge base exists
-    if not os.path.exists(VECTOR_STORE_PATH):
-        st.warning("Knowledge base not found.")
-        if st.button("Create Knowledge Base"):
-            vector_store = build_knowledge_base(embeddings)
-            st.session_state.vector_store = vector_store
-            st.rerun()
-    else:
-        if 'vector_store' not in st.session_state:
-            st.session_state.vector_store = FAISS.load_local(
-                VECTOR_STORE_PATH,
-                embeddings,
-                allow_dangerous_deserialization=True
-            )
-    # Chat mode
-    if 'vector_store' in st.session_state:
-        if 'messages' not in st.session_state:
-            st.session_state.messages = []
-        # Display chat history
-        for message in st.session_state.messages:
-            st.chat_message("user").write(message["question"])
-            st.chat_message("assistant").write(message["answer"])
-        # User input
-        if question := st.chat_input("Ask your question"):
-            st.chat_message("user").write(question)
-            # Retrieve context and generate response
-            with st.chat_message("assistant"):
-                with st.spinner("Thinking..."):
-                    context = st.session_state.vector_store.similarity_search(question)
-                    context_text = "\n".join([doc.page_content for doc in context])
-                    prompt = PromptTemplate.from_template("""
-                    You are a helpful and polite legal assistant at Status Law.
-                    You answer in the language in which the question was asked.
-                    Answer the question based on the context provided.
-                    If you cannot answer based on the context, say so politely and offer to contact Status Law directly via the following channels:
-                    - For all users: +32465594521 (landline phone).
-                    - For English and Swedish speakers only: +46728495129 (available on WhatsApp, Telegram, Signal, IMO).
-                    - Provide a link to the contact form: [Contact Form](https://status.law/law-firm-contact-legal-protection/).
-                    If the user has questions about specific services and their costs, suggest they visit the page https://status.law/tariffs-for-services-of-protection-against-extradition-and-international-prosecution/ for detailed information.
-                    Ask the user additional questions to understand which service to recommend and provide an estimated cost. For example, clarify their situation and needs to suggest the most appropriate options.
-                    Also, offer free consultations if they are available and suitable for the user's request.
-                    Answer professionally but in a friendly manner.
-                    Example:
-                    Q: How can I challenge the sanctions?
-                    A: To challenge the sanctions, you should consult with our legal team, who specialize in this area. Please contact us directly for detailed advice. You can fill out our contact form here: [Contact Form](https://status.law/law-firm-contact-legal-protection/).
-                    Context: {context}
-                    Question: {question}
-                    """)
-                    chain = prompt | llm | StrOutputParser()
-                    response = chain.invoke({
-                        "context": context_text,
-                        "question": question
-                    })
-                    st.write(response)
-                    # В блоке генерации ответа (после st.write(response))
-                    log_interaction(question, response)
-                    # Save chat history
-                    st.session_state.messages.append({
-                        "question": question,
-                        "answer": response
-                    })
-if __name__ == "__main__":
-    main()

+import os
+import time
+import streamlit as st
+from dotenv import load_dotenv
+from langchain_groq import ChatGroq
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import WebBaseLoader
+from langchain_core.prompts import PromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnableLambda
+import requests
+import json
+# Логирует взаимодействие в JSON-файл
+from datetime import datetime
+def log_interaction(user_input: str, bot_response: str):
+    """Логи��ует взаимодействие в JSON-файл"""
+    log_entry = {
+        "timestamp": datetime.now().isoformat(),
+        "user_input": user_input,
+        "bot_response": bot_response
+    }
+    log_dir = "chat_history"
+    os.makedirs(log_dir, exist_ok=True)
+    log_path = os.path.join(log_dir, "chat_logs.json")
+    with open(log_path, "a") as f:
+        f.write(json.dumps(log_entry) + "\n")
+#
+# Page configuration
+st.set_page_config(page_title="Status Law Assistant", page_icon="⚖️")
+# Knowledge base info in session_state
+if 'kb_info' not in st.session_state:
+    st.session_state.kb_info = {
+        'build_time': None,
+        'size': None
+    }
+# Display title and knowledge base info
+# st.title("www.Status.Law Legal Assistant")
+st.markdown(
+    '''
+    <h1>
+        ⚖️
+        <a href="https://status.law/" style="text-decoration: underline; color: blue; font-size: inherit;">
+            Status.Law
+        </a>
+        Legal Assistant
+    </h1>
+    ''',
+    unsafe_allow_html=True
+)
+if st.session_state.kb_info['build_time'] and st.session_state.kb_info['size']:
+    st.caption(f"(Knowledge base build time: {st.session_state.kb_info['build_time']:.2f} seconds, "
+               f"size: {st.session_state.kb_info['size']:.2f} MB)")
+# Path to store vector database
+VECTOR_STORE_PATH = "vector_store"
+# Создание папки истории, если она не существует
+if not os.path.exists("chat_history"):
+    os.makedirs("chat_history")
+# Website URLs
+urls = [
+    "https://status.law",
+    "https://status.law/about",
+    "https://status.law/careers",
+    "https://status.law/tariffs-for-services-of-protection-against-extradition",
+    "https://status.law/challenging-sanctions",
+    "https://status.law/law-firm-contact-legal-protection"
+    "https://status.law/cross-border-banking-legal-issues",
+    "https://status.law/extradition-defense",
+    "https://status.law/international-prosecution-protection",
+    "https://status.law/interpol-red-notice-removal",
+    "https://status.law/practice-areas",
+    "https://status.law/reputation-protection",
+    "https://status.law/faq"
+]
+# Load secrets
+try:
+    GROQ_API_KEY = st.secrets["GROQ_API_KEY"]
+except Exception as e:
+    st.error("Error loading secrets. Please check your configuration.")
+    st.stop()
+# Initialize models
+@st.cache_resource
+def init_models():
+    llm = ChatGroq(
+        model_name="llama-3.3-70b-versatile",
+        temperature=0.6,
+        api_key=GROQ_API_KEY
+    )
+    embeddings = HuggingFaceEmbeddings(
+        model_name="intfloat/multilingual-e5-large-instruct"
+    )
+    return llm, embeddings
+# Build knowledge base
+def build_knowledge_base(embeddings):
+    start_time = time.time()
+    documents = []
+    with st.status("Loading website content...") as status:
+        for url in urls:
+            try:
+                loader = WebBaseLoader(url)
+                docs = loader.load()
+                documents.extend(docs)
+                status.update(label=f"Loaded {url}")
+            except Exception as e:
+                st.error(f"Error loading {url}: {str(e)}")
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=500,
+        chunk_overlap=100
+    )
+    chunks = text_splitter.split_documents(documents)
+    vector_store = FAISS.from_documents(chunks, embeddings)
+    vector_store.save_local(VECTOR_STORE_PATH)
+    end_time = time.time()
+    build_time = end_time - start_time
+    # Calculate knowledge base size
+    total_size = 0
+    for path, dirs, files in os.walk(VECTOR_STORE_PATH):
+        for f in files:
+            fp = os.path.join(path, f)
+            total_size += os.path.getsize(fp)
+    size_mb = total_size / (1024 * 1024)
+    # Save knowledge base info
+    st.session_state.kb_info['build_time'] = build_time
+    st.session_state.kb_info['size'] = size_mb
+    st.success(f"""
+    Knowledge base created successfully:
+    - Time taken: {build_time:.2f} seconds
+    - Size: {size_mb:.2f} MB
+    - Number of chunks: {len(chunks)}
+    """)
+    return vector_store
+# Main function
+def main():
+    # Initialize models
+    llm, embeddings = init_models()
+    # Check if knowledge base exists
+    if not os.path.exists(VECTOR_STORE_PATH):
+        st.warning("Knowledge base not found.")
+        if st.button("Create Knowledge Base"):
+            vector_store = build_knowledge_base(embeddings)
+            st.session_state.vector_store = vector_store
+            st.rerun()
+    else:
+        if 'vector_store' not in st.session_state:
+            st.session_state.vector_store = FAISS.load_local(
+                VECTOR_STORE_PATH,
+                embeddings,
+                allow_dangerous_deserialization=True
+            )
+    # Chat mode
+    if 'vector_store' in st.session_state:
+        if 'messages' not in st.session_state:
+            st.session_state.messages = []
+        # Display chat history
+        for message in st.session_state.messages:
+            st.chat_message("user").write(message["question"])
+            st.chat_message("assistant").write(message["answer"])
+        # User input
+        if question := st.chat_input("Ask your question"):
+            st.chat_message("user").write(question)
+            # Retrieve context and generate response
+            with st.chat_message("assistant"):
+                with st.spinner("Thinking..."):
+                    context = st.session_state.vector_store.similarity_search(question)
+                    context_text = "\n".join([doc.page_content for doc in context])
+                    prompt = PromptTemplate.from_template("""
+                    You are a helpful and polite legal assistant at Status Law.
+                    You answer in the language in which the question was asked.
+                    Answer the question based on the context provided.
+                    If you cannot answer based on the context, say so politely and offer to contact Status Law directly via the following channels:
+                    - For all users: +32465594521 (landline phone).
+                    - For English and Swedish speakers only: +46728495129 (available on WhatsApp, Telegram, Signal, IMO).
+                    - Provide a link to the contact form: [Contact Form](https://status.law/law-firm-contact-legal-protection/).
+                    If the user has questions about specific services and their costs, suggest they visit the page https://status.law/tariffs-for-services-of-protection-against-extradition-and-international-prosecution/ for detailed information.
+                    Ask the user additional questions to understand which service to recommend and provide an estimated cost. For example, clarify their situation and needs to suggest the most appropriate options.
+                    Also, offer free consultations if they are available and suitable for the user's request.
+                    Answer professionally but in a friendly manner.
+                    Example:
+                    Q: How can I challenge the sanctions?
+                    A: To challenge the sanctions, you should consult with our legal team, who specialize in this area. Please contact us directly for detailed advice. You can fill out our contact form here: [Contact Form](https://status.law/law-firm-contact-legal-protection/).
+                    Context: {context}
+                    Question: {question}
+                    """)
+                    chain = prompt | llm | StrOutputParser()
+                    response = chain.invoke({
+                        "context": context_text,
+                        "question": question
+                    })
+                    st.write(response)
+                    # В блоке генерации ответа (после st.write(response))
+                    log_interaction(question, response)
+                    # Save chat history
+                    st.session_state.messages.append({
+                        "question": question,
+                        "answer": response
+                    })
+if __name__ == "__main__":
+    main()

gitignore DELETED Viewed

@@ -1,4 +0,0 @@
-*.env
-venv
-.streamlit/secrets.toml

requirements.txt CHANGED Viewed

@@ -15,6 +15,8 @@ pydantic
 python-multipart
 pandas
 langchain

 python-multipart
 pandas
 langchain
+plotly