aarohanverma
/

text2sql-flan-t5-base-qlora-finetuned

@@ -203,51 +203,55 @@ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 import logging
 # Set up logging
-logging.basicConfig(
-  level=logging.INFO,
-  format="%(asctime)s - %(levelname)s - %(message)s",
-)
 logger = logging.getLogger(__name__)
-# Set device (GPU if available)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Load the fine-tuned model and tokenizer
-model_name = "aarohanverma/text2sql-flan-t5-base-qlora-finetuned"
 model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device)
 tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
 def run_inference(prompt_text: str) -> str:
-  """
-  Runs inference using deterministic decoding with beam search.
-  """
-  inputs = tokenizer(prompt_text, return_tensors="pt").to(device)
-  generated_ids = model.generate(
-      input_ids=inputs["input_ids"],
-      max_new_tokens=250,
-      temperature=0.0,
-      num_beams=3,
-      early_stopping=True,
-  )
-  return tokenizer.decode(generated_ids[0], skip_special_tokens=True)
 # Example usage:
 context = (
-  "CREATE TABLE customers (id INT PRIMARY KEY, name VARCHAR(100), country VARCHAR(50)); "
-  "CREATE TABLE orders (order_id INT PRIMARY KEY, customer_id INT, total_amount DECIMAL(10,2), "
-  "order_date DATE, FOREIGN KEY (customer_id) REFERENCES customers(id)); "
-  "INSERT INTO customers (id, name, country) VALUES (1, 'Alice', 'USA'), (2, 'Bob', 'UK'), "
-  "(3, 'Charlie', 'Canada'), (4, 'David', 'USA'); "
-  "INSERT INTO orders (order_id, customer_id, total_amount, order_date) VALUES "
-  "(101, 1, 500, '2024-01-15'), (102, 2, 300, '2024-01-20'), "
-  "(103, 1, 700, '2024-02-10'), (104, 3, 450, '2024-02-15'), "
-  "(105, 4, 900, '2024-03-05');"
-)
-query = (
-  "Retrieve the total order amount for each customer, showing only customers from the USA, "
-  "and sort the result by total order amount in descending order."
 )
 # Construct the prompt
 sample_prompt = f"""Context:
 {context}
@@ -269,12 +273,7 @@ print(query)
 print("\nResponse:")
 print(generated_sql)
-# Expected Output:
-# SELECT customers.name, SUM(orders.total_amount) as total_amount FROM customers
-# INNER JOIN orders ON customers.id = orders.customer_id
-# WHERE customers.country = 'USA'
-# GROUP BY customers.name
-# ORDER BY total_amount DESC;
 ```
 ## Citation

 import logging
 # Set up logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
+# Ensure device is set (GPU if available)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Load the fine-tuned model and tokenizer
+model_name = "aarohanverma/text2sql-flan-t5-base-qlora-finetuned"
 model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device)
 tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
+# Ensure decoder start token is set
+if model.config.decoder_start_token_id is None:
+    model.config.decoder_start_token_id = tokenizer.pad_token_id
 def run_inference(prompt_text: str) -> str:
+    """
+    Runs inference on the fine-tuned model using beam search with fixes for repetition.
+    """
+    inputs = tokenizer(prompt_text, return_tensors="pt", truncation=True, max_length=512).to(device)
+    generated_ids = model.generate(
+        input_ids=inputs["input_ids"],
+        decoder_start_token_id=model.config.decoder_start_token_id,  # ✅ Ensure decoder start token
+        max_new_tokens=100,  # ✅ Limit to prevent excessive output
+        temperature=0.1,  # ✅ Adds slight randomness to avoid repetition
+        num_beams=5,  # ✅ Increases quality
+        repetition_penalty=1.2,  # ✅ Penalizes repetition
+        early_stopping=True,  # ✅ Stops generation once complete
+    )
+    generated_sql = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
+    # ✅ Post-processing to remove repeated text
+    generated_sql = generated_sql.split(";")[0] + ";"  # Keep only the first valid SQL query
+    return generated_sql
 # Example usage:
 context = (
+    "CREATE TABLE students (id INT PRIMARY KEY, name VARCHAR(100), age INT, grade CHAR(1)); "
+    "INSERT INTO students (id, name, age, grade) VALUES "
+    "(1, 'Alice', 14, 'A'), (2, 'Bob', 15, 'B'), "
+    "(3, 'Charlie', 14, 'A'), (4, 'David', 16, 'C'), (5, 'Eve', 15, 'B');"
 )
+query = ("Retrieve the names of students who are 15 years old.")
 # Construct the prompt
 sample_prompt = f"""Context:
 {context}
 print("\nResponse:")
 print(generated_sql)
+# EXPECTED RESPONSE: SELECT name FROM students WHERE age = 15;
 ```
 ## Citation