Spaces:

aarohanverma
/

text2sql-demo

Sleeping

File size: 2,827 Bytes

816ccb1
 
 
 
9d0df30
816ccb1
 
9d0df30
 
816ccb1
 
 
9d0df30
 
 
 
 
 
 
 
 
 
816ccb1
 
451c534
 
9d0df30
816ccb1
 
 
 
 
 
 
9d0df30
27e057c
451c534
9d0df30
451c534
 
 
9d0df30
 
 
 
 
 
 
 
 
 
 
451c534
9d0df30
27e057c
9d0df30
27e057c
816ccb1
9d0df30
816ccb1
 
 
 
 
 
 
 
27e057c
 
 
816ccb1

import gradio as gr
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Set up device: use GPU if available, else CPU.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the fine-tuned model and tokenizer.
model_name = "aarohanverma/text2sql-flan-t5-base-qlora-finetuned"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")

# For CPU inference, convert the model to FP32 for better compatibility.
if device.type == "cpu":
    model = model.float()

# Optionally compile the model for speed improvements (requires PyTorch 2.0+).
try:
    model = torch.compile(model)
except Exception as e:
    print("torch.compile optimization failed:", e)

def generate_sql(context: str, query: str) -> str:
    """
    Generates a SQL query given the provided context and natural language query.
    Constructs a prompt from the inputs, then performs deterministic generation
    using beam search with repetition handling.
    """
    prompt = f"""Context:
{context}
Query:
{query}
Response:
"""
    # Tokenize the prompt with truncation and max length; move to device.
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
    
    # Ensure the decoder start token is set.
    if model.config.decoder_start_token_id is None:
        model.config.decoder_start_token_id = tokenizer.pad_token_id

    # Generate SQL output with no_grad to optimize CPU usage.
    with torch.no_grad():
        generated_ids = model.generate(
            input_ids=inputs["input_ids"],
            decoder_start_token_id=model.config.decoder_start_token_id,
            max_new_tokens=100,
            temperature=0.0,         # Deterministic output
            num_beams=5,
            repetition_penalty=1.2,
            early_stopping=True,
        )
    
    # Decode and clean the generated SQL statement.
    generated_sql = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    generated_sql = generated_sql.split(";")[0].strip() + ";"  # Keep only the first valid SQL query
    return generated_sql

# Create Gradio interface with two input boxes: one for context and one for query.
iface = gr.Interface(
    fn=generate_sql,
    inputs=[
        gr.Textbox(lines=8, label="Context", placeholder="Enter table schema, sample data, etc."),
        gr.Textbox(lines=2, label="Query", placeholder="Enter your natural language query here...")
    ],
    outputs="text",
    title="Text-to-SQL Generator",
    description="Enter your own context (e.g., database schema and sample data) and a natural language query. The model will generate the corresponding SQL statement.",
    theme="compact",
    allow_flagging="never"
)

iface.launch()