Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from huggingface_hub import HfApi
|
3 |
+
import duckdb
|
4 |
+
from datasets import load_dataset
|
5 |
+
import pandas as pd
|
6 |
+
|
7 |
+
custom_css="""
|
8 |
+
* { animation: gow 3s 1 forwards; } @keyframes gow { from { transform: scale(0.1); } to { transform: scale(1.0); } }
|
9 |
+
"""
|
10 |
+
head_js="""
|
11 |
+
<script>var mouse = {x: undefined,y: undefined};var newX;var newY;window.addEventListener('mousemove',function (event) {mouse.x = event.x;mouse.y = event.y;});window.addEventListener('touchstart', function (event) {let touchtart = event.touches[0];event.preventDefault();mouse.x = touchtart.clientX;mouse.y = touchtart.clientY;newX = mouse.x;newY = mouse.y;var colr = 'hsla('+Math.floor(Math.random() * 360)+','+Math.floor(Math.random() * 100)+'%,'+Math.floor(Math.random() * 50)+'%,'+(Math.random() * 1)+')';document.querySelectorAll('*').forEach(item =>{ item.style.backgroundColor=colr; });}, false);var bkd = 'url('+String("https://huggingface.co/front/assets/huggingface_logo-noborder.svg")+')';</script>
|
12 |
+
"""
|
13 |
+
|
14 |
+
api = HfApi()
|
15 |
+
datasets=api.list_datasets(filter="task_categories:text-generation",language="en",gated=False,limit=100)
|
16 |
+
outf='./output.csv'
|
17 |
+
lst=[]
|
18 |
+
|
19 |
+
def looky(value):
|
20 |
+
datasets=api.list_datasets(search=f"{value}",language="en",gated=False,limit=100)
|
21 |
+
return gr.CheckboxGroup([d.id for d in datasets], label="Select Datasets")
|
22 |
+
def preview(selected):
|
23 |
+
lst=[]
|
24 |
+
for selecd in selected:
|
25 |
+
datum=load_dataset(selecd, split='train', streaming=True).take(3)
|
26 |
+
lst.extend(datum)
|
27 |
+
fd=pd.DataFrame(lst)
|
28 |
+
|
29 |
+
return gr.Dataframe(headers=["Dataset", "Sample"], value=fd)
|
30 |
+
|
31 |
+
def build_dataset(selected_datasets, num_samples):
|
32 |
+
outf='./output.csv'
|
33 |
+
con = duckdb.connect(database=':memory:')
|
34 |
+
combined_data = []
|
35 |
+
|
36 |
+
for dataset in selected_datasets:
|
37 |
+
|
38 |
+
data = load_dataset(dataset, split='train', streaming=True).take(num_samples)
|
39 |
+
combined_data.extend(data)
|
40 |
+
df = pd.DataFrame(combined_data)
|
41 |
+
con.execute("CREATE TABLE dataset AS SELECT * FROM df")
|
42 |
+
result = con.execute("SELECT * FROM dataset").fetchall()
|
43 |
+
con.execute("COPY (SELECT * FROM dataset) TO 'output.csv' (HEADER, DELIMITER ',');")
|
44 |
+
return result,outf
|
45 |
+
|
46 |
+
with gr.Blocks(head=head_js,css=custom_css) as iface:
|
47 |
+
frst_sample=gr.Dataframe(value=None,label="View 3 Samples per selected dataset")
|
48 |
+
|
49 |
+
srchbx=gr.Textbox(label="Search datasets",placeholder="Search Datasets on the Hub. Type query..hit Enter.. this will update the dataset list below..")
|
50 |
+
with gr.Accordion("Multi-Select Datasets", open=False,):
|
51 |
+
with gr.Row():
|
52 |
+
dataset_selector = gr.CheckboxGroup([d.id for d in datasets], label="Multi-Select Datasets")
|
53 |
+
num_samples_input = gr.Number(value=10, label="Number of Samples to retrieve per Dataset")
|
54 |
+
build_button = gr.Button("Build Dataset")
|
55 |
+
|
56 |
+
out_way = gr.File()
|
57 |
+
output_display = gr.Dataframe(headers=["Dataset", "Sample"])
|
58 |
+
build_button.click(fn=build_dataset,inputs=[dataset_selector, num_samples_input],outputs=[output_display,out_way])
|
59 |
+
dataset_selector.change(preview,dataset_selector,frst_sample)
|
60 |
+
srchbx.change(looky,srchbx,dataset_selector)
|
61 |
+
iface.load(None,None,None,js="""() =>{var colr = 'rgba('+Math.floor(Math.random() * 256)+','+Math.floor(Math.random() * 256)+','+Math.floor(Math.random() * 256)+','+(Math.random() * 1)+')'; document.querySelectorAll('*').forEach(item =>{ item.style.backgroundColor=colr; }); var tin = document.getElementById('moish'); var parents=[]; function getAllParentNodes(element) {while (element.parentNode) {element = element.parentNode; element.style.background = bkd; parents.push(element); }; }; getAllParentNodes(tin); document.getElementById('moish').innerHTML += parents; var tiguf=document.getElementById('toish');var javas=document.createElement('canvas');javas.setAttribute('id', 'javas');javas.width = '100%';javas.height = '100%';javas.style.top = '0';javas.style.left = '0';tiguf.appendChild(javas);const ctx = javas.getContext('2d');const svgs = [];const img = new Image();var fish = () => {fetch('https://huggingface.co/front/assets/huggingface_logo-noborder.svg').then(resp => resp.blob()).then(blob => URL.createObjectURL(blob));}; img.src = fish;function createSVG(x, y, size) {return { x, y, size, dx: Math.random() * 4 - 2, dy: Math.random() * 4 - 2 };};for (let i = 0; i < 10; i++) {svgs.push(createSVG(Math.random() * javas.width, Math.random() * javas.height, Math.random() * 30 + 10));};function animate() {ctx.clearRect(0, 0, javas.width, javas.height);svgs.forEach(svg => {svg.x += svg.dx; svg.y += svg.dy;if (svg.x < 0 || svg.x > javas.width - svg.size) svg.dx *= -1;if (svg.y < 0 || svg.y > javas.height - svg.size) svg.dy *= -1;ctx.drawImage(img, svg.x, svg.y, svg.size, svg.size);});requestAnimationFrame(animate);}; let counter = 0;const incrementCounter = () => {while (counter < 100) {animate();counter++;} };incrementCounter();}""",)
|
62 |
+
|
63 |
+
iface.launch(debug=True)
|