Update app.py
Browse files
app.py
CHANGED
@@ -1,17 +1,22 @@
|
|
|
|
|
|
1 |
import spaces
|
2 |
import gradio as gr
|
3 |
import torch
|
4 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
5 |
|
6 |
-
title = """# Minitron
|
7 |
description = """
|
8 |
# Minitron
|
9 |
|
10 |
-
Minitron is a family of small language models (SLMs) obtained by pruning [NVIDIA's](https://huggingface.co/nvidia) Nemotron-4 15B model
|
|
|
11 |
|
12 |
# Short Story Generator
|
13 |
Welcome to the Short Story Generator! This application helps you create unique short stories based on your inputs.
|
14 |
|
|
|
|
|
15 |
**Instructions:**
|
16 |
1. **Main Character:** Describe the main character of your story. For example, "a brave knight" or "a curious cat".
|
17 |
2. **Setting:** Describe the setting where your story takes place. For example, "in an enchanted forest" or "in a bustling city".
|
@@ -29,55 +34,51 @@ inputs = [
|
|
29 |
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
|
30 |
]
|
31 |
|
32 |
-
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
37 |
|
38 |
device='cuda'
|
39 |
dtype=torch.bfloat16
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
# Define the prompt format
|
43 |
def create_prompt(instruction):
|
44 |
PROMPT = '''Below is an instruction that describes a task.\n\nWrite a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:'''
|
45 |
return PROMPT.format(instruction=instruction)
|
46 |
|
47 |
-
@spaces.GPU
|
48 |
-
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
49 |
-
prompt = create_prompt(message)
|
50 |
-
|
51 |
-
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
|
52 |
-
|
53 |
-
output_ids = model.generate(input_ids, max_length=50, num_return_sequences=1)
|
54 |
-
|
55 |
-
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
56 |
-
|
57 |
-
return output_text
|
58 |
|
59 |
@spaces.GPU
|
60 |
def generate_story(character, setting, plot_twist, max_tokens, temperature, top_p):
|
61 |
"""Define the function to generate the story."""
|
62 |
prompt = f"Write a short story with the following details:\nMain character: {character}\nSetting: {setting}\nPlot twist: {plot_twist}\n\nStory:"
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
|
|
|
|
68 |
|
69 |
-
return
|
70 |
-
|
71 |
-
#demo = gr.ChatInterface(
|
72 |
-
# title=gr.Markdown(title),
|
73 |
-
# description=gr.Markdown(description),
|
74 |
-
# fn=generate_story,
|
75 |
-
# additional_inputs=[
|
76 |
-
# gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
77 |
-
# gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
78 |
-
# gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
|
79 |
-
# ],
|
80 |
-
#)
|
81 |
|
82 |
# Create the Gradio interface
|
83 |
demo = gr.Interface(
|
|
|
1 |
+
from collections import namedtuple
|
2 |
+
|
3 |
import spaces
|
4 |
import gradio as gr
|
5 |
import torch
|
6 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
7 |
|
8 |
+
title = """# Minitron Story Generator"""
|
9 |
description = """
|
10 |
# Minitron
|
11 |
|
12 |
+
Minitron is a family of small language models (SLMs) obtained by pruning [NVIDIA's](https://huggingface.co/nvidia) Nemotron-4 15B model, LLaMA3.1-8B or Mistral NeMO models.
|
13 |
+
We prune model the number of transformer blocks, embedding size, attention heads, and MLP intermediate dimension, following which, we perform continued training with distillation to arrive at the final models.
|
14 |
|
15 |
# Short Story Generator
|
16 |
Welcome to the Short Story Generator! This application helps you create unique short stories based on your inputs.
|
17 |
|
18 |
+
This application will show you the output of several models in the Minitron family. Outputs are shown side by side so you can compare them.
|
19 |
+
|
20 |
**Instructions:**
|
21 |
1. **Main Character:** Describe the main character of your story. For example, "a brave knight" or "a curious cat".
|
22 |
2. **Setting:** Describe the setting where your story takes place. For example, "in an enchanted forest" or "in a bustling city".
|
|
|
34 |
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
|
35 |
]
|
36 |
|
37 |
+
Model = namedtuple('Model', ['name', 'llm', 'tokenizer'])
|
38 |
|
39 |
+
model_paths = [
|
40 |
+
"nvidia/Llama-3.1-Minitron-4B-Width-Base",
|
41 |
+
"nvidia/Llama-3.1-Minitron-4B-Depth-Base",
|
42 |
+
"nvidia/Mistral-NeMo-Minitron-8B-Base",
|
43 |
+
]
|
44 |
|
45 |
device='cuda'
|
46 |
dtype=torch.bfloat16
|
47 |
+
|
48 |
+
# Load the tokenizers and models.
|
49 |
+
models = [
|
50 |
+
Model(
|
51 |
+
name=p.split("/")[-1],
|
52 |
+
llm=AutoModelForCausalLM.from_pretrained(p, torch_dtype=dtype, device_map=device),
|
53 |
+
tokenizer=AutoTokenizer.from_pretrained(p),
|
54 |
+
) for p in model_paths
|
55 |
+
]
|
56 |
+
|
57 |
+
outputs = [
|
58 |
+
gr.Textbox(label=f"Generated Story ({model.name})") for model in models
|
59 |
+
]
|
60 |
|
61 |
# Define the prompt format
|
62 |
def create_prompt(instruction):
|
63 |
PROMPT = '''Below is an instruction that describes a task.\n\nWrite a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:'''
|
64 |
return PROMPT.format(instruction=instruction)
|
65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
@spaces.GPU
|
68 |
def generate_story(character, setting, plot_twist, max_tokens, temperature, top_p):
|
69 |
"""Define the function to generate the story."""
|
70 |
prompt = f"Write a short story with the following details:\nMain character: {character}\nSetting: {setting}\nPlot twist: {plot_twist}\n\nStory:"
|
71 |
+
|
72 |
+
output_texts = []
|
73 |
+
|
74 |
+
for model in models:
|
75 |
+
input_ids = model.tokenizer.encode(prompt, return_tensors="pt").to(model.llm.device)
|
76 |
+
output_ids = model.llm.generate(input_ids, max_length=max_tokens, num_return_sequences=1, temperature=temperature, top_p=top_p)
|
77 |
+
output_text = model.tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
78 |
+
output_texts.append(output_text[len(prompt):])
|
79 |
|
80 |
+
return output_texts
|
81 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
# Create the Gradio interface
|
84 |
demo = gr.Interface(
|