File size: 4,160 Bytes
4861273
7bf24af
 
 
 
6fbe46c
 
 
4861273
7bf24af
 
 
 
 
4861273
 
7bf24af
6fbe46c
4861273
7bf24af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4861273
7bf24af
 
4861273
7bf24af
 
 
 
 
 
 
4861273
7bf24af
 
 
 
 
 
4861273
6fbe46c
 
 
 
 
 
 
7bf24af
4861273
7bf24af
 
4861273
7bf24af
 
 
 
4861273
7bf24af
 
 
4861273
7bf24af
 
 
 
4861273
7bf24af
 
 
 
 
 
 
 
 
 
 
 
4861273
6fbe46c
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import streamlit as st
from huggingface_hub import InferenceClient, HfApi
import time
import requests
from requests.exceptions import RequestException
from gtts import gTTS  # Google Text-to-Speech
import tempfile
import os

# Set page config at the very beginning
st.set_page_config(page_title="Phi-3.5 Chatbot", page_icon="🤖")

# Add a text input for the Hugging Face API token
hf_token = st.text_input("Enter your Hugging Face API token", type="password")

@st.cache_resource
def get_client(token):
    return InferenceClient("microsoft/Phi-3.5-mini-instruct", token=token)

def validate_token(token):
    try:
        api = HfApi(token=token)
        api.whoami()
        return True
    except Exception as e:
        st.error(f"Token validation failed: {str(e)}")
        return False

def make_request_with_retries(client, prompt, max_new_tokens, temperature, top_p, max_retries=5, initial_delay=1):
    for attempt in range(max_retries):
        try:
            response = client.text_generation(
                prompt,
                max_new_tokens=max_new_tokens,
                temperature=temperature,
                top_p=top_p,
            )
            return response
        except RequestException as e:
            if attempt < max_retries - 1:
                delay = initial_delay * (2 ** attempt)  # Exponential backoff
                st.warning(f"Request failed. Retrying in {delay} seconds... (Attempt {attempt + 1}/{max_retries})")
                time.sleep(delay)
            else:
                raise e

def respond(message, history, system_message, max_tokens, temperature, top_p):
    if not hf_token:
        st.error("Please enter your Hugging Face API token.")
        return

    if not validate_token(hf_token):
        return

    client = get_client(hf_token)
    
    # Construct the prompt
    prompt = f"{system_message}\n\n"
    for user_msg, assistant_msg in history:
        prompt += f"Human: {user_msg}\nAssistant: {assistant_msg}\n\n"
    prompt += f"Human: {message}\nAssistant:"

    try:
        response = make_request_with_retries(client, prompt, max_tokens, temperature, top_p)
        yield response
    except Exception as e:
        st.error(f"An error occurred: {str(e)}")
        yield "I'm sorry, but I encountered an error while processing your request."

def text_to_speech(text):
    # Create a named temporary file
    tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
    tts = gTTS(text=text, lang='en')
    tts.save(tmp_file.name)
    return tmp_file.name

st.title("Phi-3.5 Mini Chatbot")

if "messages" not in st.session_state:
    st.session_state.messages = []

system_message = st.text_input("System message", value="You are a helpful AI assistant.")
max_tokens = st.slider("Max new tokens", min_value=1, max_value=1024, value=256, step=1)
temperature = st.slider("Temperature", min_value=0.01, max_value=1.0, value=0.7, step=0.01)
top_p = st.slider("Top P", min_value=0.0, max_value=1.0, value=0.9, step=0.01)

for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

if prompt := st.chat_input("What is your message?"):
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

    with st.chat_message("assistant"):
        message_placeholder = st.empty()
        full_response = ""
        for response in respond(prompt, 
                                [(msg["content"], st.session_state.messages[i+1]["content"]) 
                                 for i, msg in enumerate(st.session_state.messages[:-1:2])],
                                system_message, 
                                max_tokens, 
                                temperature, 
                                top_p):
            message_placeholder.markdown(response)
            full_response = response

    st.session_state.messages.append({"role": "assistant", "content": full_response})

    # Generate audio from the assistant's response
    audio_file = text_to_speech(full_response)
    st.audio(audio_file, format='audio/mp3')