|
--- |
|
language: en |
|
datasets: |
|
- wikisql |
|
widget: |
|
- text: "question: get people name with age equal 25 table: id, name, age" |
|
--- |
|
|
|
# How to use |
|
```python |
|
from typing import List |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("juierror/text-to-sql-with-table-schema") |
|
model = AutoModelForSeq2SeqLM.from_pretrained("juierror/text-to-sql-with-table-schema") |
|
|
|
def prepare_input(question: str, table: List[str]): |
|
table_prefix = "table:" |
|
question_prefix = "question:" |
|
join_table = ",".join(table) |
|
inputs = f"{question_prefix} {question} {table_prefix} {join_table}" |
|
input_ids = tokenizer(inputs, max_length=700, return_tensors="pt").input_ids |
|
return input_ids |
|
|
|
def inference(question: str, table: List[str]) -> str: |
|
input_data = prepare_input(question=question, table=table) |
|
input_data = input_data.to(model.device) |
|
outputs = model.generate(inputs=input_data, num_beams=10, top_k=10, max_length=700) |
|
result = tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True) |
|
return result |
|
|
|
print(inference(question="get xml id from json cdr table", json_cdr=["id", "extensions", "age"])) |
|
``` |
|
|
|
There are newer version of this using Flan-T5 as a based model. You can check out [here](https://huggingface.co/juierror/flan-t5-text2sql-with-schema) |
|
|
|
PS. From this [discussion](https://huggingface.co/juierror/flan-t5-text2sql-with-schema/discussions/5), I think the base model that I use for finetune did not support the token `<`, so this might not be a good model to do this tasks. |