Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
from openai import OpenAI | |
import time | |
import re | |
# Set up API key | |
API_KEY = os.getenv("API_KEY") | |
URL = os.getenv("URL") | |
client = OpenAI( | |
api_key=API_KEY, | |
base_url=URL | |
) | |
# Available models | |
MODELS = [ | |
"Meta-Llama-3.1-405B-Instruct", | |
"Meta-Llama-3.1-70B-Instruct", | |
"Meta-Llama-3.1-8B-Instruct" | |
] | |
# Available search strategies | |
SEARCH_STRATEGY = [ | |
"None", | |
"Greedy-Best-Score", | |
"Iterative-Refinement", | |
"Monte-Carlo-Tree-Search" | |
] | |
def chat_with_ai(message, chat_history, system_prompt): | |
messages = [ | |
{"role": "system", "content": system_prompt}, | |
] | |
for human, ai, _ in chat_history: | |
messages.append({"role": "user", "content": human}) | |
messages.append({"role": "assistant", "content": ai}) | |
messages.append({"role": "user", "content": message}) | |
return messages | |
def respond(message, chat_history, model, system_prompt, thinking_budget): | |
messages = chat_with_ai(message, chat_history, system_prompt.format(budget = thinking_budget)) | |
response = "" | |
start_time = time.time() | |
with st.spinner("AI is thinking..."): | |
for chunk in client.chat.completions.create( | |
model=model, | |
messages=messages, | |
stream=True | |
): | |
content = chunk.choices[0].delta.content or "" | |
response += content | |
yield response, time.time() - start_time | |
def parse_and_display_response(response): | |
# Extract answer and reflection | |
answer_match = re.search(r'<answer>(.*?)</answer>', response, re.DOTALL) | |
reflection_match = re.search(r'<reflection>(.*?)</reflection>', response, re.DOTALL) | |
answer = answer_match.group(1).strip() if answer_match else "" | |
reflection = reflection_match.group(1).strip() if reflection_match else "" | |
# Remove answer, reflection, and final reward from the main response | |
response = re.sub(r'<answer>.*?</answer>', '', response, flags=re.DOTALL) | |
response = re.sub(r'<reflection>.*?</reflection>', '', response, flags=re.DOTALL) | |
response = re.sub(r'<reward>.*?</reward>\s*$', '', response, flags=re.DOTALL) | |
# Extract and display steps | |
steps = re.findall(r'<step>(.*?)</step>', response, re.DOTALL) | |
with st.expander("Show thinking process", expanded=False): | |
for i, step in enumerate(steps, 1): | |
st.markdown(f"**Step {i}:**") | |
st.write(step.strip()) | |
st.markdown("---") | |
# Display answer and reflection | |
if answer: | |
st.markdown("### Answer:") | |
st.write(answer) | |
if reflection: | |
st.markdown("### Reflection:") | |
st.write(reflection) | |
def display_message_with_code_blocks(message): | |
# First, check if the message contains the special tags | |
if '<step>' in message or '<answer>' in message or '<reflection>' in message: | |
parse_and_display_response(message) | |
else: | |
# If not, use the original display logic | |
parts = re.split(r'(```[\s\S]*?```)', message) | |
for part in parts: | |
if part.startswith('```') and part.endswith('```'): | |
# This is a code block | |
code = part.strip('`').strip() | |
lang = code.split('\n')[0] if '\n' in code else '' | |
code = '\n'.join(code.split('\n')[1:]) if lang else code | |
st.code(code, language=lang, line_numbers=True) | |
else: | |
# This is regular text | |
st.write(part) | |
def main(): | |
st.set_page_config(page_title="AI Chatbot", layout="wide") | |
st.title("Llama3.1-Instruct-O1") | |
st.markdown("<a href='https://sambanova.ai/fast-api?api_ref=907266' target='_blank'>Powered by Llama3.1 models through SN Cloud</a>", unsafe_allow_html=True) | |
if "chat_history" not in st.session_state: | |
st.session_state.chat_history = [] | |
col1, col2 = st.columns([1, 1]) | |
with col1: | |
model = st.selectbox("Select Model", MODELS, index=0) | |
thinking_budget = st.slider("Thinking Budget", 1, 100, 1, help="Control how much it thinks, pick between 1 to 100 inclusive") | |
with col2: | |
system_prompt = st.text_area( | |
"System Prompt", | |
value=""" | |
You are a helpful assistant in normal conversation. | |
When given a problem to solve, you are an expert problem-solving assistant. Your task is to provide a detailed, step-by-step solution to a given question. Follow these instructions carefully: | |
1. Read the given question carefully and reset counter between <count> and </count> to {budget} | |
2. Generate a detailed, logical step-by-step solution. | |
3. Enclose each step of your solution within <step> and </step> tags. | |
4. You are allowed to use at most {budget} steps (starting budget), keep track of it by counting down within tags <count> </count>, STOP GENERATING MORE STEPS when hitting 0, you don't have to use all of them. | |
5. Do a self-reflection when you are unsure about how to proceed, based on the self-reflection and reward, decides whether you need to return to the previous steps. | |
6. After completing the solution steps, reorganize and synthesize the steps into the final answer within <answer> and </answer> tags. | |
7. Provide a critical, honest and subjective self-evaluation of your reasoning process within <reflection> and </reflection> tags. | |
8. Assign a quality score to your solution as a float between 0.0 (lowest quality) and 1.0 (highest quality), enclosed in <reward> and </reward> tags. | |
Example format: | |
<count> [starting budget] </count> | |
<step> [Content of step 1] </step> | |
<count> [remaining budget] </count> | |
<step> [Content of step 2] </step> | |
<reflection> [Evaluation of the steps so far] </reflection> | |
<reward> [Float between 0.0 and 1.0] </reward> | |
<count> [remaining budget] </count> | |
<step> [Content of step 3 or Content of some previous step] </step> | |
<count> [remaining budget] </count> | |
... | |
<step> [Content of final step] </step> | |
<count> [remaining budget] </count> | |
<answer> [Final Answer] </answer> | |
<reflection> [Evaluation of the solution] </reflection> | |
<reward> [Float between 0.0 and 1.0] </reward> | |
""", | |
height=200 | |
) | |
st.markdown("---") | |
for human, ai, thinking_time in st.session_state.chat_history: | |
with st.chat_message("human"): | |
st.write(human) | |
with st.chat_message("ai"): | |
display_message_with_code_blocks(ai) | |
st.caption(f"Thinking time: {thinking_time:.2f} s") | |
message = st.chat_input("Type your message here...") | |
if message: | |
with st.chat_message("human"): | |
st.write(message) | |
with st.chat_message("ai"): | |
response_placeholder = st.empty() | |
time_placeholder = st.empty() | |
for response, elapsed_time in respond(message, st.session_state.chat_history, model, system_prompt, thinking_budget): | |
response_placeholder.markdown(response) | |
time_placeholder.caption(f"Thinking time: {elapsed_time:.2f} s") | |
response_placeholder.empty() | |
time_placeholder.empty() | |
display_message_with_code_blocks(response) | |
time_placeholder.caption(f"Thinking time: {elapsed_time:.2f} s") | |
st.session_state.chat_history.append((message, response, elapsed_time)) | |
if st.button("Clear Chat"): | |
st.session_state.chat_history = [] | |
st.rerun() | |
if __name__ == "__main__": | |
main() |