import os import gradio as gr from huggingface_hub import InferenceClient class XylariaChat: def __init__(self): # Securely load HuggingFace token self.hf_token = os.getenv("HF_TOKEN") if not self.hf_token: raise ValueError("HuggingFace token not found in environment variables") # Initialize the inference client self.client = InferenceClient( model="Qwen/QwQ-32B-Preview", api_key=self.hf_token ) # Initialize conversation history and persistent memory self.conversation_history = [] self.persistent_memory = {} # System prompt with more detailed instructions self.system_prompt = """You are a helpful and harmless AI assistant you are Xylaria 1.4 Senoa, Made by Sk Md Saad Amin you think step by step you should enclose your thinking in tags """ def store_information(self, key, value): """Store important information in persistent memory""" self.persistent_memory[key] = value def retrieve_information(self, key): """Retrieve information from persistent memory""" return self.persistent_memory.get(key) def reset_conversation(self): """ Completely reset the conversation history, persistent memory, and clear API-side memory """ # Clear local memory self.conversation_history = [] self.persistent_memory.clear() # Clear API-side memory by resetting the conversation try: # Attempt to clear any API-side session or context self.client = InferenceClient( model="Qwen/QwQ-32B-Preview", api_key=self.hf_token ) except Exception as e: print(f"Error resetting API client: {e}") return None # To clear the chatbot interface def get_response(self, user_input): # Prepare messages with conversation context and persistent memory messages = [ {"role": "system", "content": self.system_prompt}, *self.conversation_history, {"role": "user", "content": user_input} ] # Add persistent memory context if available if self.persistent_memory: memory_context = "Remembered Information:\n" + "\n".join( [f"{k}: {v}" for k, v in self.persistent_memory.items()] ) messages.insert(1, {"role": "system", "content": memory_context}) # Generate response with streaming try: stream = self.client.chat.completions.create( messages=messages, temperature=0.5, max_tokens=10240, top_p=0.7, stream=True ) return stream except Exception as e: return f"Error generating response: {str(e)}" def create_interface(self): def streaming_response(message, chat_history): # Clear input textbox response_stream = self.get_response(message) # If it's an error, return immediately if isinstance(response_stream, str): return "", chat_history + [[message, response_stream]] # Prepare for streaming response full_response = "" updated_history = chat_history + [[message, ""]] # Streaming output for chunk in response_stream: if chunk.choices[0].delta.content: chunk_content = chunk.choices[0].delta.content full_response += chunk_content # Update the last message in chat history with partial response updated_history[-1][1] = full_response yield "", updated_history # Update conversation history self.conversation_history.append( {"role": "user", "content": message} ) self.conversation_history.append( {"role": "assistant", "content": full_response} ) # Limit conversation history to prevent token overflow if len(self.conversation_history) > 10: self.conversation_history = self.conversation_history[-10:] # Custom CSS for Inter font custom_css = """ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); body, .gradio-container { font-family: 'Inter', sans-serif !important; } .chatbot-container .message { font-family: 'Inter', sans-serif !important; } .gradio-container input, .gradio-container textarea, .gradio-container button { font-family: 'Inter', sans-serif !important; } """ with gr.Blocks(theme='soft', css=custom_css) as demo: # Chat interface with improved styling with gr.Column(): chatbot = gr.Chatbot( label="Xylaria 1.4 Senoa", height=500, show_copy_button=True ) # Input row with improved layout with gr.Row(): txt = gr.Textbox( show_label=False, placeholder="Type your message...", container=False, scale=4 ) btn = gr.Button("Send", scale=1) # Clear history and memory buttons clear = gr.Button("Clear Conversation") clear_memory = gr.Button("Clear Memory") # Submit functionality with streaming btn.click( fn=streaming_response, inputs=[txt, chatbot], outputs=[txt, chatbot] ) txt.submit( fn=streaming_response, inputs=[txt, chatbot], outputs=[txt, chatbot] ) # Clear conversation history clear.click( fn=lambda: None, inputs=None, outputs=[chatbot], queue=False ) # Clear persistent memory and reset conversation clear_memory.click( fn=self.reset_conversation, inputs=None, outputs=[chatbot], queue=False ) # Ensure memory is cleared when the interface is closed demo.load(self.reset_conversation, None, None) return demo # Launch the interface def main(): chat = XylariaChat() interface = chat.create_interface() interface.launch( share=True, # Optional: create a public link debug=True # Show detailed errors ) if __name__ == "__main__": main()