Spaces:
Sleeping
Sleeping
File size: 2,396 Bytes
4f83ec0 6447366 4f83ec0 6447366 4f83ec0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import time
import gradio as gr
import io
import pandas as pd
import spaces
from generate import stream_file
@spaces.GPU(duration=120)
def stream_output(filename: str):
if filename.endswith(".jsonl"):
filename = filename[:-len(".jsonl")]
content = ""
size=3
start_time = time.time()
for i, chunk in enumerate(stream_file(
filename=filename,
prompt="",
columns=[],
seed=42,
size=size,
)):
content += chunk
df = pd.read_json(io.StringIO(content), lines=True)
state_msg = (
f"✅ Done generating {size} samples in {time.time() - start_time:.2f}s"
if i + 1 == size else
f"⚙️ Generating... [{i}/{size}]"
)
yield df, "```json\n" + content + "\n```", state_msg
def test(filename: str):
if not filename.endswith(".jsonl"):
yield "❌ 404: File name must end with .jsonl", None, ""
return
content = ""
size = 10
start_time = time.time()
for i in range(size):
content += f'{{"i": {i}, "filename": "{filename}"}}\n'
df = pd.read_json(io.StringIO(content), lines=True)
state_msg = (
f"✅ Done generating {size} samples in {time.time() - start_time:.2f}s"
if i + 1 == size else
f"⚙️ Generating... [{i}/{size}]"
)
yield df, "```json\n" + content + "\n```", state_msg
time.sleep(0.1)
title = "LLM DataGen"
description = "Generate and stream synthetic dataset files in JSON Lines format"
examples = [
"movies_data.jsonl",
"common_first_names.jsonl",
"bad_amazon_reviews_on_defunct_products_that_people_hate.jsonl",
"dungeon_and_dragon_characters.jsonl"
]
with gr.Blocks() as demo:
gr.Markdown(f"# {title}")
gr.Markdown(description)
filename_comp = gr.Textbox(examples[0], placeholder=examples[0])
gr.Examples(examples, filename_comp)
generate_button = gr.Button("Generate dataset")
state_msg_comp = gr.Markdown("🔥 Ready to generate")
with gr.Tab("Dataset"):
dataframe_comp = gr.DataFrame()
with gr.Tab("File content"):
with gr.Blocks(fill_height=True):
with gr.Row():
file_content_comp = gr.Markdown()
generate_button.click(stream_output, filename_comp, [dataframe_comp, file_content_comp, state_msg_comp])
demo.launch()
|