|
--- |
|
language: en |
|
widget: |
|
- text: >- |
|
convert question and table into SQL query. tables: people_name(id,name), |
|
people_age(people_id,age). question: how many people with name jui and age |
|
less than 25 |
|
license: cc-by-sa-4.0 |
|
pipeline_tag: text2text-generation |
|
inference: |
|
parameters: |
|
max_length: 512 |
|
num_beams: 10 |
|
top_k: 10 |
|
--- |
|
|
|
This is an upgraded version of [https://huggingface.co/juierror/flan-t5-text2sql-with-schema](https://huggingface.co/juierror/flan-t5-text2sql-with-schema). |
|
|
|
It supports the '<' sign and can handle multiple tables. |
|
|
|
# How to use |
|
```python |
|
from typing import List |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("juierror/flan-t5-text2sql-with-schema-v2") |
|
model = AutoModelForSeq2SeqLM.from_pretrained("juierror/flan-t5-text2sql-with-schema-v2") |
|
|
|
def get_prompt(tables, question): |
|
prompt = f"""convert question and table into SQL query. tables: {tables}. question: {question}""" |
|
return prompt |
|
|
|
def prepare_input(question: str, tables: Dict[str, List[str]]): |
|
tables = [f"""{table_name}({",".join(tables[table_name])})""" for table_name in tables] |
|
tables = ", ".join(tables) |
|
prompt = get_prompt(tables, question) |
|
input_ids = tokenizer(prompt, max_length=512, return_tensors="pt").input_ids |
|
return input_ids |
|
|
|
def inference(question: str, tables: Dict[str, List[str]]) -> str: |
|
input_data = prepare_input(question=question, tables=tables) |
|
input_data = input_data.to(model.device) |
|
outputs = model.generate(inputs=input_data, num_beams=10, top_k=10, max_length=512) |
|
result = tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True) |
|
return result |
|
|
|
print(inference("how many people with name jui and age less than 25", { |
|
"people_name": ["id", "name"], |
|
"people_age": ["people_id", "age"] |
|
})) |
|
|
|
print(inference("what is id with name jui and age less than 25", { |
|
"people_name": ["id", "name", "age"] |
|
}))) |
|
``` |
|
|
|
# Dataset |
|
- [CoSQL](https://yale-lily.github.io/cosql) |
|
- [spider](https://yale-lily.github.io/spider) |
|
- [SParC](https://yale-lily.github.io/sparc) |