File size: 3,886 Bytes
47796ca 8347fc4 4e2df07 8347fc4 b884f75 c3ced67 b884f75 c3ced67 b6bd3b8 c3ced67 8347fc4 c3ced67 b6bd3b8 c3ced67 8347fc4 c3ced67 b884f75 c3ced67 b884f75 c3ced67 b884f75 c3ced67 8347fc4 4e2df07 b884f75 8347fc4 4e2df07 b884f75 8347fc4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import spaces
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import gradio as gr
title = """# 🙋🏻♂️Welcome to 🌟Tonic's Defog 🌬️🌁🌫️SqlCoder-2
You can use this Space to test out the current model [defog/sqlcoder2](https://huggingface.co/defog/sqlcoder2). [defog/sqlcoder2](https://huggingface.co/defog/sqlcoder2) is a 15B parameter model that doesn't outperform gpt-4 and gpt-4-turbo for natural language to SQL generation tasks on our sql-eval framework, and significantly outperforms all popular open-source models.
You can also use efog 🌬️🌁🌫️SqlCoder by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic/sqlcoder2?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></h3>
Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community 👻[![Let's build the future of AI together! 🚀🤖](https://discordapp.com/api/guilds/1109943800132010065/widget.png)](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [Poly](https://github.com/tonic-ai/poly) 🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
"""
global_tokenizer, global_model = None, None
def load_tokenizer_model(model_name):
global global_tokenizer, global_model
global_tokenizer = AutoTokenizer.from_pretrained(model_name)
global_model = AutoModelForCausalLM.from_pretrained(
model_name,
trust_remote_code=True,
torch_dtype=torch.float16,
device_map="auto",
use_cache=True,
)
def generate_prompt(question, prompt_file="prompt.md", metadata_file="metadata.sql"):
with open(prompt_file, "r") as f:
prompt = f.read()
with open(metadata_file, "r") as f:
table_metadata_string = f.read()
prompt = prompt.format(
user_question=question, table_metadata_string=table_metadata_string
)
return prompt
@spaces.GPU
def run_inference(question):
global global_tokenizer, global_model
prompt = generate_prompt(question)
eos_token_id = global_tokenizer.eos_token_id
pipe = pipeline(
"text-generation",
model=global_model,
tokenizer=global_tokenizer,
max_new_tokens=300,
do_sample=False,
num_beams=5,
)
generated_query = (
pipe(
prompt,
num_return_sequences=1,
eos_token_id=eos_token_id,
pad_token_id=eos_token_id,
)[0]["generated_text"]
.split("```sql")[-1]
.split("```")[0]
.split(";")[0]
.strip()
+ ";"
)
return generated_query
def main():
model_name = "defog/sqlcoder2"
load_tokenizer_model(model_name)
with gr.Blocks() as demo:
gr.Markdown(title)
question = gr.Textbox(label="Enter your question")
submit = gr.Button("Generate SQL Query")
output = gr.Textbox(label="🌬️🌁🌫️SqlCoder-2")
submit.click(fn=run_inference, inputs=question, outputs=output)
demo.launch()
if __name__ == "__main__":
main() |