collection_dataset_viewer

Sleeping

App Files Files Community

collection_dataset_viewer / app.py

davanstrien HF staff

add desc

a4421c2 4 months ago

raw

history blame

3.95 kB

	import os
	import re

	import gradio as gr
	from huggingface_hub import get_collection


	def extract_collection_id(input_text):
	if url_match := re.match(r"https://huggingface\.co/collections/(.+)$", input_text):
	return url_match[1]

	# Check if input is already in the correct format
	return input_text if re.match(r"^[\w-]+/[\w-]+", input_text) else None


	def load_collection():
	collection_input = os.getenv("COLLECTION_SLUG_OR_URL")
	if not collection_input:
	raise ValueError("COLLECTION_SLUG_OR_URL environment variable is not set.")

	collection_id = extract_collection_id(collection_input)
	if not collection_id:
	raise ValueError(
	"Invalid collection ID or URL in COLLECTION_SLUG_OR_URL environment variable."
	)

	collection = get_collection(collection_id)
	if dataset_ids := [
	item.item_id for item in collection.items if item.item_type == "dataset"
	]:
	return dataset_ids, collection_id
	else:
	raise ValueError("No datasets found in this collection.")


	def display_dataset(dataset_ids, index):
	dataset_id = dataset_ids[index]
	return gr.HTML(f"""<iframe
	src="https://huggingface.co/datasets/{dataset_id}/embed/viewer"
	frameborder="0"
	width="100%"
	height="560px"
	></iframe>""")


	def navigate_dataset(dataset_ids, index, direction):
	new_index = (index + direction) % len(dataset_ids)
	return (
	new_index,
	f"Dataset {new_index + 1} of {len(dataset_ids)}: {dataset_ids[new_index]}",
	)


	def get_display_name(collection_id):
	# Strip out the extra ID part of the name
	return re.sub(r"-[a-f0-9]{32}$", "", collection_id)


	try:
	dataset_ids, collection_id = load_collection()
	display_name = get_display_name(collection_id)

	with gr.Blocks() as demo:
	gr.Markdown(f"<h1>Dataset Viewer for Collection: {display_name}</h1>")
	gr.Markdown(
	f"[View full collection on Hugging Face](https://huggingface.co/collections/{collection_id})"
	)

	gr.Markdown("""
	This app allows you to browse and view datasets from a specific Hugging Face collection.
	Use the 'Previous' and 'Next' buttons to navigate through the datasets in the collection.

	Note: This space is currently set up to display datasets from a specific collection.
	If you'd like to use it for a different collection:
	1. Duplicate this space
	2. In your duplicated space, set the `COLLECTION_SLUG_OR_URL` environment variable to your desired collection ID or URL
	3. Your new space will then display datasets from your chosen collection!
	""")

	index_state = gr.State(value=0)

	with gr.Row():
	left_btn = gr.Button("Previous")
	right_btn = gr.Button("Next")

	dataset_info = gr.Markdown(f"Dataset 1 of {len(dataset_ids)}: {dataset_ids[0]}")
	iframe_output = gr.HTML()

	left_btn.click(
	navigate_dataset,
	inputs=[gr.State(dataset_ids), index_state, gr.Number(-1, visible=False)],
	outputs=[index_state, dataset_info],
	)
	right_btn.click(
	navigate_dataset,
	inputs=[gr.State(dataset_ids), index_state, gr.Number(1, visible=False)],
	outputs=[index_state, dataset_info],
	)

	index_state.change(
	display_dataset,
	inputs=[gr.State(dataset_ids), index_state],
	outputs=[iframe_output],
	)

	# Initialize the display with the first dataset
	demo.load(
	fn=lambda: display_dataset(dataset_ids, 0),
	inputs=None,
	outputs=[iframe_output],
	)

	if __name__ == "__main__":
	demo.launch()

	except Exception as e:
	print(f"Error: {str(e)}")
	print(
	"Please set the COLLECTION_SLUG_OR_URL environment variable with a valid collection ID or URL."
	)