LeonceNsh's picture
Create app.py
5b4c268 verified
raw
history blame
3.73 kB
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import gradio as gr
def generate_prompt(question, prompt_file="prompt.md", metadata_file="metadata.sql"):
"""
Generates the prompt by reading the prompt template and table metadata,
then formatting them with the user's question.
"""
try:
with open(prompt_file, "r") as f:
prompt = f.read()
except FileNotFoundError:
return "Error: prompt.md file not found."
try:
with open(metadata_file, "r") as f:
table_metadata_string = f.read()
except FileNotFoundError:
return "Error: metadata.sql file not found."
prompt = prompt.format(
user_question=question, table_metadata_string=table_metadata_string
)
return prompt
def get_tokenizer_model(model_name):
"""
Loads the tokenizer and model from the specified model repository.
"""
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
trust_remote_code=True, # Set to True if the model uses custom code
torch_dtype=torch.float16,
device_map="auto", # Automatically maps the model to available devices
use_cache=True,
)
return tokenizer, model
# Load the tokenizer and model once when the script starts
model_name = "defog/sqlcoder-7b-2" # Replace with your model name
print("Loading model and tokenizer...")
tokenizer, model = get_tokenizer_model(model_name)
print("Model and tokenizer loaded successfully.")
# Initialize the text generation pipeline
text_gen_pipeline = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=300,
do_sample=False, # Disable sampling for deterministic output
return_full_text=False,
num_beams=5, # Use beam search for better quality
)
def run_inference_gradio(question, prompt_file="prompt.md", metadata_file="metadata.sql"):
"""
Generates an SQL query based on the user's natural language question.
"""
if not question.strip():
return "Please enter a valid question."
prompt = generate_prompt(question, prompt_file, metadata_file)
if prompt.startswith("Error:"):
return prompt # Return the error message if files are missing
eos_token_id = tokenizer.eos_token_id
try:
generated = text_gen_pipeline(
prompt,
num_return_sequences=1,
eos_token_id=eos_token_id,
pad_token_id=eos_token_id,
)
except Exception as e:
return f"Error during model inference: {str(e)}"
generated_text = generated[0]["generated_text"]
# Extract the SQL query from the generated text
sql_query = generated_text.split(";")[0].split("```")[0].strip() + ";"
return sql_query
# Define the Gradio interface
iface = gr.Interface(
fn=run_inference_gradio,
inputs=gr.Textbox(
lines=4,
placeholder="Enter your natural language question here...",
label="Question"
),
outputs=gr.Textbox(label="Generated SQL Query"),
title="Text-to-SQL Generator",
description=(
"Enter a natural language question related to your database, and this tool "
"will generate the corresponding SQL query. Ensure that 'prompt.md' and "
"'metadata.sql' are correctly set up in the application directory."
),
examples=[
["Do we get more sales from customers in New York compared to customers in San Francisco? Give me the total sales for each city, and the difference between the two."]
],
allow_flagging="never"
)
if __name__ == "__main__":
iface.launch()