Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import tensorflow as tf | |
print("Loading the model......") | |
model_name = "WICKED4950/Irisonego5" | |
strategy = tf.distribute.MirroredStrategy() | |
tf.config.optimizer.set_jit(True) # Enable XLA | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
with strategy.scope(): | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
print("Interface getting done....") | |
# Define the chatbot function | |
def predict(user_input): | |
# Tokenize input text | |
inputs = tokenizer(user_input, return_tensors="tf", padding=True, truncation=True) | |
# Generate the response using the model | |
response_ids = model.generate( | |
inputs['input_ids'], | |
max_length=128, # Set max length of response | |
do_sample=True, # Sampling for variability | |
top_k=15, # Consider top 50 tokens | |
top_p=0.95, # Nucleus sampling | |
temperature=0.8 # Adjusts creativity of response | |
) | |
# Decode the response | |
response = tokenizer.decode(response_id[0], skip_special_tokens=True) | |
return response | |
# Gradio interface | |
iface = gr.Interface(fn=predict, | |
inputs="text", | |
outputs="text", | |
title="Your Chatbot") | |
print("Deploying") | |
iface.launch() | |