Spaces:
Sleeping
Sleeping
File size: 4,160 Bytes
4861273 7bf24af 6fbe46c 4861273 7bf24af 4861273 7bf24af 6fbe46c 4861273 7bf24af 4861273 7bf24af 4861273 7bf24af 4861273 7bf24af 4861273 6fbe46c 7bf24af 4861273 7bf24af 4861273 7bf24af 4861273 7bf24af 4861273 7bf24af 4861273 7bf24af 4861273 6fbe46c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import streamlit as st
from huggingface_hub import InferenceClient, HfApi
import time
import requests
from requests.exceptions import RequestException
from gtts import gTTS # Google Text-to-Speech
import tempfile
import os
# Set page config at the very beginning
st.set_page_config(page_title="Phi-3.5 Chatbot", page_icon="🤖")
# Add a text input for the Hugging Face API token
hf_token = st.text_input("Enter your Hugging Face API token", type="password")
@st.cache_resource
def get_client(token):
return InferenceClient("microsoft/Phi-3.5-mini-instruct", token=token)
def validate_token(token):
try:
api = HfApi(token=token)
api.whoami()
return True
except Exception as e:
st.error(f"Token validation failed: {str(e)}")
return False
def make_request_with_retries(client, prompt, max_new_tokens, temperature, top_p, max_retries=5, initial_delay=1):
for attempt in range(max_retries):
try:
response = client.text_generation(
prompt,
max_new_tokens=max_new_tokens,
temperature=temperature,
top_p=top_p,
)
return response
except RequestException as e:
if attempt < max_retries - 1:
delay = initial_delay * (2 ** attempt) # Exponential backoff
st.warning(f"Request failed. Retrying in {delay} seconds... (Attempt {attempt + 1}/{max_retries})")
time.sleep(delay)
else:
raise e
def respond(message, history, system_message, max_tokens, temperature, top_p):
if not hf_token:
st.error("Please enter your Hugging Face API token.")
return
if not validate_token(hf_token):
return
client = get_client(hf_token)
# Construct the prompt
prompt = f"{system_message}\n\n"
for user_msg, assistant_msg in history:
prompt += f"Human: {user_msg}\nAssistant: {assistant_msg}\n\n"
prompt += f"Human: {message}\nAssistant:"
try:
response = make_request_with_retries(client, prompt, max_tokens, temperature, top_p)
yield response
except Exception as e:
st.error(f"An error occurred: {str(e)}")
yield "I'm sorry, but I encountered an error while processing your request."
def text_to_speech(text):
# Create a named temporary file
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts = gTTS(text=text, lang='en')
tts.save(tmp_file.name)
return tmp_file.name
st.title("Phi-3.5 Mini Chatbot")
if "messages" not in st.session_state:
st.session_state.messages = []
system_message = st.text_input("System message", value="You are a helpful AI assistant.")
max_tokens = st.slider("Max new tokens", min_value=1, max_value=1024, value=256, step=1)
temperature = st.slider("Temperature", min_value=0.01, max_value=1.0, value=0.7, step=0.01)
top_p = st.slider("Top P", min_value=0.0, max_value=1.0, value=0.9, step=0.01)
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input("What is your message?"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = ""
for response in respond(prompt,
[(msg["content"], st.session_state.messages[i+1]["content"])
for i, msg in enumerate(st.session_state.messages[:-1:2])],
system_message,
max_tokens,
temperature,
top_p):
message_placeholder.markdown(response)
full_response = response
st.session_state.messages.append({"role": "assistant", "content": full_response})
# Generate audio from the assistant's response
audio_file = text_to_speech(full_response)
st.audio(audio_file, format='audio/mp3') |