import gradio as gr from transformers import AutoTokenizer, AutoModel, GPT2LMHeadModel, GPT2Tokenizer import torch # Load the bi-encoder model and tokenizer bi_encoder_model_name = "sentence-transformers/all-MiniLM-L6-v2" bi_tokenizer = AutoTokenizer.from_pretrained(bi_encoder_model_name) bi_model = AutoModel.from_pretrained(bi_encoder_model_name) # Load the GPT-2 model and tokenizer for response generation gpt2_model_name = "gpt2" gpt2_tokenizer = GPT2Tokenizer.from_pretrained(gpt2_model_name) gpt2_model = GPT2LMHeadModel.from_pretrained(gpt2_model_name) def encode_text(text): inputs = bi_tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=128) outputs = bi_model(**inputs) # Ensure the output is 2D by averaging the last hidden state along the sequence dimension return outputs.last_hidden_state.mean(dim=1).detach().numpy() def generate_response(user_input, context_embedding): # Combine user input with context embedding for GPT-2 input combined_input = user_input + " " + context_embedding # Generate a response using GPT-2 with adjusted parameters gpt2_inputs = gpt2_tokenizer.encode(combined_input, return_tensors='pt') gpt2_outputs = gpt2_model.generate( gpt2_inputs, max_length=150, num_return_sequences=1, temperature=0.5, top_p=0.9, repetition_penalty=1.2 ) generated_text = gpt2_tokenizer.decode(gpt2_outputs[0], skip_special_tokens=True) return generated_text def chatbot(user_input, context=""): context_embedding = encode_text(context) if context else "" response = generate_response(user_input, context_embedding) return response # Create the Gradio interface iface = gr.Interface( fn=chatbot, inputs=[gr.Textbox(lines=2, placeholder="Enter your message here..."), gr.Textbox(lines=2, placeholder="Enter context here (optional)...")], outputs="text", title="Context-Aware Dynamic Response Chatbot", description="A chatbot using a bi-encoder model to understand the input context and GPT-2 to generate dynamic responses." ) # Launch the interface iface.launch()