Spaces:

btoews
/

FinVA_Demo

Sleeping

App Files Files Community

Brandon Toews commited on Mar 10

Commit

156ed6f

1 Parent(s): 4c2fd83

Add secure application file

Browse files

Files changed (1) hide show

app.py +181 -156

app.py CHANGED Viewed

@@ -1,164 +1,189 @@
 import streamlit as st
 from openai import OpenAI
-# Streamlit page configuration
-st.set_page_config(page_title="Financial Virtual Assistant", layout="wide")
-# Initialize session state for chat history and API client
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-if "client" not in st.session_state:
-    base_url = f"https://brandontoews--vllm-openai-compatible-serve.modal.run/v1"
-    # Initialize OpenAI client
-    st.session_state.client = OpenAI(
-        api_key=st.secrets["openai_api_key"],  # Replace with your API key or use modal.Secret
-        base_url=base_url
-    )
-if "models" not in st.session_state:
-    # Fetch available models from the server
-    try:
-        models = st.session_state.client.models.list().data
-        st.session_state.models = [model.id for model in models]
-    except Exception as e:
-        st.session_state.models = ["neuralmagic/Mistral-7B-Instruct-v0.3-quantized.w8a16"]  # Fallback if fetch fails
-        st.warning(f"Failed to fetch models: {e}. Using default model.")
-# Function to estimate token count (heuristic: ~4 chars per token)
-def estimate_token_count(messages):
-    total_chars = sum(len(message["content"]) for message in messages)
-    return total_chars // 4  # Approximate: 4 characters per token
-# Function to truncate messages if token count exceeds limit
-def truncate_messages(messages, max_tokens=2048, keep_last_n=5):
-    # Always keep the system prompt (if present) and the last N messages
-    system_prompt = [msg for msg in messages if msg["role"] == "system"]
-    non_system_messages = [msg for msg in messages if msg["role"] != "system"]
-    # Estimate current token count
-    current_tokens = estimate_token_count(messages)
-    # If under the limit, no truncation needed
-    if current_tokens <= max_tokens:
-        return messages
-    # Truncate older non-system messages, keeping the last N
-    truncated_non_system_messages = non_system_messages[-keep_last_n:] if len(
-        non_system_messages) > keep_last_n else non_system_messages
-    # Reconstruct messages: system prompt (if any) + truncated non-system messages
-    return system_prompt + truncated_non_system_messages
-# Function to get completion from vLLM server
-def get_completion(client, model_id, messages, stream=True, temperature=0.2, top_p=0.85, max_tokens=512):
-    completion_args = {
-        "model": model_id,
-        "messages": messages,
-        "temperature": temperature,
-        "top_p": top_p,
-        "max_tokens": max_tokens,
-        "stream": stream,
-    }
-    try:
-        response = client.chat.completions.create(**completion_args)
-        return response
-    except Exception as e:
-        st.error(f"Error during API call: {e}")
-        return None
-# Sidebar for configuration
-with st.sidebar:
-    st.header("Chat Settings")
-    # Model selection dropdown
-    model_id = st.selectbox(
-        "Select Model",
-        options=st.session_state.models,
-        index=0,
-        help="Choose a model available on the vLLM server"
-    )
-    # System prompt input
-    system_prompt = st.text_area(
-        "System Prompt",
-        value="You are a finance expert, providing clear, accurate, and concise answers to financial questions.",
-        height=100,
-        help="Enter a system prompt to guide the model's behavior (optional)"
-    )
-    if st.button("Apply System Prompt"):
-        if st.session_state.messages and st.session_state.messages[0]["role"] == "system":
-            st.session_state.messages[0] = {"role": "system", "content": system_prompt}
         else:
-            st.session_state.messages.insert(0, {"role": "system", "content": system_prompt})
-        st.success("System prompt updated!")
-    # Other settings
-    temperature = st.slider("Temperature", 0.0, 1.0, 0.2, help="Controls randomness of responses")
-    top_p = st.slider("Top P", 0.0, 1.0, 0.85, help="Controls diversity via nucleus sampling")
-    max_tokens = st.number_input("Max Tokens", min_value=1, value=512, help="Maximum length of response (optional)")
-    if st.button("Clear Chat"):
-        st.session_state.messages = (
-            [{"role": "system", "content": system_prompt}] if system_prompt else []
         )
-# Main chat interface
-st.title("Financial Virtual Assistant")
-st.write("Chat with a finance-tuned LLM powered by vLLM on Modal. Select a model and customize your system prompt!")
-# Display chat history
-for message in st.session_state.messages:
-    if message["role"] == "system":
-        with st.expander("System Prompt", expanded=False):
-            st.markdown(f"**System**: {message['content']}")
-    else:
-        with st.chat_message(message["role"]):
-            st.markdown(message["content"])
-# User input
-if prompt := st.chat_input("Type your message here..."):
-    # Add user message to history
-    st.session_state.messages.append({"role": "user", "content": prompt})
-    with st.chat_message("user"):
-        st.markdown(prompt)
-    # Truncate messages if necessary to stay under token limit
-    st.session_state.messages = truncate_messages(
-        st.session_state.messages,
-        max_tokens=2048 - max_tokens,  # Reserve space for the output
-        keep_last_n=5
-    )
-    # Debug: Log token count and messages
-    current_tokens = estimate_token_count(st.session_state.messages)
-    st.write(f"Debug: Current token count: {current_tokens}")
-    st.write(f"Debug: Messages sent to model: {st.session_state.messages}")
-    # Get and display assistant response
-    with st.chat_message("assistant"):
-        response = get_completion(
-            st.session_state.client,
-            model_id,
-            st.session_state.messages,
-            stream=True,
-            temperature=temperature,
-            top_p=top_p,
-            max_tokens=max_tokens
         )
-        if response:
-            # Stream the response
-            placeholder = st.empty()
-            assistant_message = ""
-            for chunk in response:
-                if chunk.choices[0].delta.content:
-                    assistant_message += chunk.choices[0].delta.content
-                    placeholder.markdown(assistant_message + "▌")  # Cursor effect
-            placeholder.markdown(assistant_message)  # Final message without cursor
-            st.session_state.messages.append({"role": "assistant", "content": assistant_message})
         else:
-            st.error("Failed to get a response from the server.")
-# Instructions
-st.caption("Built with Streamlit and vLLM on Modal. Adjust settings in the sidebar and chat away!")

 import streamlit as st
 from openai import OpenAI
+# Authentication function
+def authenticate():
+    st.title("Financial Virtual Assistant")
+    st.subheader("Login")
+    username = st.text_input("Username")
+    password = st.text_input("Password", type="password")
+    if st.button("Login"):
+        if username == os.getenv('username') and password == os.getenv('password'):
+            st.session_state.authenticated = True
+            return True
         else:
+            st.error("Invalid username or password")
+    return False
+# Check authentication state
+if "authenticated" not in st.session_state:
+    st.session_state.authenticated = False
+if not st.session_state.authenticated:
+    if authenticate():
+        st.rerun()
+else:
+    # Streamlit page configuration
+    st.set_page_config(page_title="Financial Virtual Assistant", layout="wide")
+    # Initialize session state for chat history and API client
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    if "client" not in st.session_state:
+        base_url = f"https://brandontoews--vllm-openai-compatible-serve.modal.run/v1"
+        # Initialize OpenAI client
+        st.session_state.client = OpenAI(
+            api_key=os.getenv('openai_api_key'),  # Replace with your API key or use modal.Secret
+            base_url=base_url
+        )
+    if "models" not in st.session_state:
+        # Fetch available models from the server
+        try:
+            models = st.session_state.client.models.list().data
+            st.session_state.models = [model.id for model in models]
+        except Exception as e:
+            st.session_state.models = ["neuralmagic/Mistral-7B-Instruct-v0.3-quantized.w8a16"]  # Fallback if fetch fails
+            st.warning(f"Failed to fetch models: {e}. Using default model.")
+    # Function to estimate token count (heuristic: ~4 chars per token)
+    def estimate_token_count(messages):
+        total_chars = sum(len(message["content"]) for message in messages)
+        return total_chars // 4  # Approximate: 4 characters per token
+    # Function to truncate messages if token count exceeds limit
+    def truncate_messages(messages, max_tokens=2048, keep_last_n=5):
+        # Always keep the system prompt (if present) and the last N messages
+        system_prompt = [msg for msg in messages if msg["role"] == "system"]
+        non_system_messages = [msg for msg in messages if msg["role"] != "system"]
+        # Estimate current token count
+        current_tokens = estimate_token_count(messages)
+        # If under the limit, no truncation needed
+        if current_tokens <= max_tokens:
+            return messages
+        # Truncate older non-system messages, keeping the last N
+        truncated_non_system_messages = non_system_messages[-keep_last_n:] if len(
+            non_system_messages) > keep_last_n else non_system_messages
+        # Reconstruct messages: system prompt (if any) + truncated non-system messages
+        return system_prompt + truncated_non_system_messages
+    # Function to get completion from vLLM server
+    def get_completion(client, model_id, messages, stream=True, temperature=0.2, top_p=0.85, max_tokens=512):
+        completion_args = {
+            "model": model_id,
+            "messages": messages,
+            "temperature": temperature,
+            "top_p": top_p,
+            "max_tokens": max_tokens,
+            "stream": stream,
+        }
+        try:
+            response = client.chat.completions.create(**completion_args)
+            return response
+        except Exception as e:
+            st.error(f"Error during API call: {e}")
+            return None
+    # Sidebar for configuration
+    with st.sidebar:
+        st.header("Chat Settings")
+        # Model selection dropdown
+        model_id = st.selectbox(
+            "Select Model",
+            options=st.session_state.models,
+            index=0,
+            help="Choose a model available on the vLLM server"
         )
+        # System prompt input
+        system_prompt = st.text_area(
+            "System Prompt",
+            value="You are a finance expert, providing clear, accurate, and concise answers to financial questions.",
+            height=100,
+            help="Enter a system prompt to guide the model's behavior (optional)"
         )
+        if st.button("Apply System Prompt"):
+            if st.session_state.messages and st.session_state.messages[0]["role"] == "system":
+                st.session_state.messages[0] = {"role": "system", "content": system_prompt}
+            else:
+                st.session_state.messages.insert(0, {"role": "system", "content": system_prompt})
+            st.success("System prompt updated!")
+        # Other settings
+        temperature = st.slider("Temperature", 0.0, 1.0, 0.2, help="Controls randomness of responses")
+        top_p = st.slider("Top P", 0.0, 1.0, 0.85, help="Controls diversity via nucleus sampling")
+        max_tokens = st.number_input("Max Tokens", min_value=1, value=512, help="Maximum length of response (optional)")
+        if st.button("Clear Chat"):
+            st.session_state.messages = (
+                [{"role": "system", "content": system_prompt}] if system_prompt else []
+            )
+    # Main chat interface
+    st.title("Financial Virtual Assistant")
+    st.write("Chat with a finance-tuned LLM powered by vLLM on Modal. Select a model and customize your system prompt!")
+    # Display chat history
+    for message in st.session_state.messages:
+        if message["role"] == "system":
+            with st.expander("System Prompt", expanded=False):
+                st.markdown(f"**System**: {message['content']}")
         else:
+            with st.chat_message(message["role"]):
+                st.markdown(message["content"])
+    # User input
+    if prompt := st.chat_input("Type your message here..."):
+        # Add user message to history
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        # Truncate messages if necessary to stay under token limit
+        st.session_state.messages = truncate_messages(
+            st.session_state.messages,
+            max_tokens=2048 - max_tokens,  # Reserve space for the output
+            keep_last_n=5
+        )
+        # Debug: Log token count and messages
+        current_tokens = estimate_token_count(st.session_state.messages)
+        st.write(f"Debug: Current token count: {current_tokens}")
+        st.write(f"Debug: Messages sent to model: {st.session_state.messages}")
+        # Get and display assistant response
+        with st.chat_message("assistant"):
+            response = get_completion(
+                st.session_state.client,
+                model_id,
+                st.session_state.messages,
+                stream=True,
+                temperature=temperature,
+                top_p=top_p,
+                max_tokens=max_tokens
+            )
+            if response:
+                # Stream the response
+                placeholder = st.empty()
+                assistant_message = ""
+                for chunk in response:
+                    if chunk.choices[0].delta.content:
+                        assistant_message += chunk.choices[0].delta.content
+                        placeholder.markdown(assistant_message + "▌")  # Cursor effect
+                placeholder.markdown(assistant_message)  # Final message without cursor
+                st.session_state.messages.append({"role": "assistant", "content": assistant_message})
+            else:
+                st.error("Failed to get a response from the server.")
+    # Instructions
+    st.caption("Built with Streamlit and vLLM on Modal. Adjust settings in the sidebar and chat away!")