Spaces:

LeonceNsh
/

usgov-contracts-rag

Running on CPU Upgrade

App Files Files Community

usgov-contracts-rag / app.py

LeonceNsh

Create app.py

5b4c268 verified 8 months ago

raw

history blame

3.73 kB

	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
	import gradio as gr

	def generate_prompt(question, prompt_file="prompt.md", metadata_file="metadata.sql"):
	"""
	Generates the prompt by reading the prompt template and table metadata,
	then formatting them with the user's question.
	"""
	try:
	with open(prompt_file, "r") as f:
	prompt = f.read()
	except FileNotFoundError:
	return "Error: prompt.md file not found."

	try:
	with open(metadata_file, "r") as f:
	table_metadata_string = f.read()
	except FileNotFoundError:
	return "Error: metadata.sql file not found."

	prompt = prompt.format(
	user_question=question, table_metadata_string=table_metadata_string
	)
	return prompt

	def get_tokenizer_model(model_name):
	"""
	Loads the tokenizer and model from the specified model repository.
	"""
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	trust_remote_code=True, # Set to True if the model uses custom code
	torch_dtype=torch.float16,
	device_map="auto", # Automatically maps the model to available devices
	use_cache=True,
	)
	return tokenizer, model

	# Load the tokenizer and model once when the script starts
	model_name = "defog/sqlcoder-7b-2" # Replace with your model name
	print("Loading model and tokenizer...")
	tokenizer, model = get_tokenizer_model(model_name)
	print("Model and tokenizer loaded successfully.")

	# Initialize the text generation pipeline
	text_gen_pipeline = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	max_new_tokens=300,
	do_sample=False, # Disable sampling for deterministic output
	return_full_text=False,
	num_beams=5, # Use beam search for better quality
	)

	def run_inference_gradio(question, prompt_file="prompt.md", metadata_file="metadata.sql"):
	"""
	Generates an SQL query based on the user's natural language question.
	"""
	if not question.strip():
	return "Please enter a valid question."

	prompt = generate_prompt(question, prompt_file, metadata_file)

	if prompt.startswith("Error:"):
	return prompt # Return the error message if files are missing

	eos_token_id = tokenizer.eos_token_id
	try:
	generated = text_gen_pipeline(
	prompt,
	num_return_sequences=1,
	eos_token_id=eos_token_id,
	pad_token_id=eos_token_id,
	)
	except Exception as e:
	return f"Error during model inference: {str(e)}"

	generated_text = generated[0]["generated_text"]

	# Extract the SQL query from the generated text
	sql_query = generated_text.split(";")[0].split("```")[0].strip() + ";"
	return sql_query

	# Define the Gradio interface
	iface = gr.Interface(
	fn=run_inference_gradio,
	inputs=gr.Textbox(
	lines=4,
	placeholder="Enter your natural language question here...",
	label="Question"
	),
	outputs=gr.Textbox(label="Generated SQL Query"),
	title="Text-to-SQL Generator",
	description=(
	"Enter a natural language question related to your database, and this tool "
	"will generate the corresponding SQL query. Ensure that 'prompt.md' and "
	"'metadata.sql' are correctly set up in the application directory."
	),
	examples=[
	["Do we get more sales from customers in New York compared to customers in San Francisco? Give me the total sales for each city, and the difference between the two."]
	],
	allow_flagging="never"
	)

	if __name__ == "__main__":
	iface.launch()