Spaces:

LeonceNsh
/

usgov-contracts-rag

Sleeping

File size: 3,727 Bytes

5b4c268

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import gradio as gr

def generate_prompt(question, prompt_file="prompt.md", metadata_file="metadata.sql"):
    """
    Generates the prompt by reading the prompt template and table metadata,
    then formatting them with the user's question.
    """
    try:
        with open(prompt_file, "r") as f:
            prompt = f.read()
    except FileNotFoundError:
        return "Error: prompt.md file not found."

    try:
        with open(metadata_file, "r") as f:
            table_metadata_string = f.read()
    except FileNotFoundError:
        return "Error: metadata.sql file not found."

    prompt = prompt.format(
        user_question=question, table_metadata_string=table_metadata_string
    )
    return prompt

def get_tokenizer_model(model_name):
    """
    Loads the tokenizer and model from the specified model repository.
    """
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        trust_remote_code=True,  # Set to True if the model uses custom code
        torch_dtype=torch.float16,
        device_map="auto",        # Automatically maps the model to available devices
        use_cache=True,
    )
    return tokenizer, model

# Load the tokenizer and model once when the script starts
model_name = "defog/sqlcoder-7b-2"  # Replace with your model name
print("Loading model and tokenizer...")
tokenizer, model = get_tokenizer_model(model_name)
print("Model and tokenizer loaded successfully.")

# Initialize the text generation pipeline
text_gen_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=300,
    do_sample=False,       # Disable sampling for deterministic output
    return_full_text=False,
    num_beams=5,           # Use beam search for better quality
)

def run_inference_gradio(question, prompt_file="prompt.md", metadata_file="metadata.sql"):
    """
    Generates an SQL query based on the user's natural language question.
    """
    if not question.strip():
        return "Please enter a valid question."

    prompt = generate_prompt(question, prompt_file, metadata_file)
    
    if prompt.startswith("Error:"):
        return prompt  # Return the error message if files are missing

    eos_token_id = tokenizer.eos_token_id
    try:
        generated = text_gen_pipeline(
            prompt,
            num_return_sequences=1,
            eos_token_id=eos_token_id,
            pad_token_id=eos_token_id,
        )
    except Exception as e:
        return f"Error during model inference: {str(e)}"

    generated_text = generated[0]["generated_text"]
    
    # Extract the SQL query from the generated text
    sql_query = generated_text.split(";")[0].split("```")[0].strip() + ";"
    return sql_query

# Define the Gradio interface
iface = gr.Interface(
    fn=run_inference_gradio,
    inputs=gr.Textbox(
        lines=4, 
        placeholder="Enter your natural language question here...", 
        label="Question"
    ),
    outputs=gr.Textbox(label="Generated SQL Query"),
    title="Text-to-SQL Generator",
    description=(
        "Enter a natural language question related to your database, and this tool "
        "will generate the corresponding SQL query. Ensure that 'prompt.md' and "
        "'metadata.sql' are correctly set up in the application directory."
    ),
    examples=[
        ["Do we get more sales from customers in New York compared to customers in San Francisco? Give me the total sales for each city, and the difference between the two."]
    ],
    allow_flagging="never"
)

if __name__ == "__main__":
    iface.launch()