import gradio as gr import torch import asyncio from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # Load model and tokenizer model_name = "hassaanik/grammar-correction-model" tokenizer = AutoTokenizer.from_pretrained(model_name) # Use GPU if available, otherwise fallback to CPU device = "cuda" if torch.cuda.is_available() else "cpu" model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device) # Use FP16 for faster inference on GPU if torch.cuda.is_available(): model.half() # Async grammar correction function with batch processing async def correct_grammar_async(texts): # Tokenize the batch of inputs and move it to the correct device (CPU/GPU) inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device) # Asynchronous generation process outputs = await asyncio.to_thread(model.generate, inputs["input_ids"], max_length=512, num_beams=5, early_stopping=True) # Decode outputs in parallel corrected_texts = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs] return corrected_texts # Gradio interface function to handle input and output def correct_grammar_interface(text): corrected_text = asyncio.run(correct_grammar_async([text]))[0] # Single input for now return corrected_text # Gradio Interface with async capabilities and batch input/output with gr.Blocks() as grammar_app: gr.Markdown("