Spaces:

internships
/

internships-2023

Runtime error

App Files Files Community

internships-2023 / app.py

lysandre HF staff

Update internships

bce1389 over 1 year ago

raw

history blame contribute delete

7.1 kB

	import json
	import os
	from io import BytesIO

	import gradio as gr
	from huggingface_hub import upload_file

	default_question = """
	We're going to use the <a href="https://huggingface.co/datasets/wikitext" target="_blank"><code>wikitext (link)</a></code> dataset with the <code><a href="https://huggingface.co/distilbert-base-cased" target="_blank">distilbert-base-cased (link)</a></code> model checkpoint.

	<br/><br/>

	Start by loading the <code>wikitext-2-raw-v1</code> version of that dataset, and take the 11th example (index 10) of the <code>train</code> split.<br/>
	We'll tokenize this using the appropriate tokenizer, and we'll mask the sixth token (index 5) the sequence.

	<br/><br/>

	When using the <code>distilbert-base-cased</code> checkpoint to unmask that (sixth token, index 5) token, what is the most probable predicted token (please provide the decoded token, and not the ID)?

	<br/>
	<br/>
	Tips:
	<br/>
	- You might find the <a href="https://huggingface.co/docs/transformers/index" target="_blank">transformers docs (link)</a> useful.
	<br/>
	- You might find the <a href="https://huggingface.co/docs/datasets/index" target="_blank">datasets docs (link)</a> useful.
	<br/>
	- You might also be interested in the <a href="https://huggingface.co/course" target="_blank">Hugging Face course (link)</a>.
	"""

	skops_question = """
	1. Create a python environment[1] and install `scikit-learn` version `1.0` in that environment.
	<br/>
	2. Using that environment, create a `LogisticRegression` model[2] and fit it on the Iris dataset[3].
	<br/>
	3. Save the trained model using `pickle`[4] or `joblib`[5].
	<br/>
	4. Create a second environment, and install `scikit-learn` version `1.1` in it.
	<br/>
	5. Try loading the model you saved in step 3 in this second environment.

	<br/>
	<br/>
	Question:
	<br/>
	Is there a warning or error you receive while trying to load the model? If yes, what exactly is it.

	<br/>
	<br/>
	References
	<br/>
	- [1] You can use any tool you want to create the environment. Two of the options are:
	<br/>
	- `venv`: https://docs.python.org/3/library/venv.html
	<br/>
	- `mamba`: https://github.com/mamba-org/mamba
	<br/>
	- [2] `LogisticRegression` API guide: https://scikit-learn.org/dev/modules/generated/sklearn.linear_model.LogisticRegression.html
	<br/>
	- [3] `load_iris` API guide: https://scikit-learn.org/dev/modules/generated/sklearn.datasets.load_iris.html
	<br/>
	- [4] `pickle`: https://docs.python.org/3/library/pickle.html
	<br/>
	- [5] - `joblib`: https://joblib.readthedocs.io/en/latest/
	"""

	code_question = """
	You are probing your code generation model on a program synthesis benchmark and
	1 out of 4 the candidate solutions produced by your model pass the unit tests of a coding challenge.
	<br/>
	<br/>
	What’s the pass@2 metric (in percent) as introduced in the
	Codex paper (see section 2.1)?
	<br/>
	<br/>
	References
	<br/>
	- Codex paper: https://arxiv.org/abs/2107.03374
	"""

	evaluate_question = """
	Use the `evaluate` library to compute the BLEU score of the model generation `"Evaluate is a library to evaluate Machine Learning models"` and the reference solution `"Evaluate is a library to evaluate ML models"`. Round the result to two digits after the comma.
	<br/>
	<br/>
	References
	<br/>
	- `evaluate` library: https://huggingface.co/docs/evaluate/index
	- BLEU score: https://en.wikipedia.org/wiki/BLEU
	"""

	embodied_question = """
	We are going to use <a href="https://github.com/huggingface/simulate"> Simulate </a> to create a basic RL environment.

	<br/><br/>

	Instructions:

	<br/>

	pip install simulate

	<br/>
	create a scene with the unity engine
	<br/>
	add a box to the scene at position [0, 0, 1], add a camera named "cam" at default position
	<br/>
	show the scene, step the scene once
	<br/>
	what is the mean pixel value from the frames from "cam".

	<br/><br/>

	For some resources, you may want to check out:
	* <a href="https://huggingface.co/docs/simulate/main/en/quicktour"> Simulate quick start </a> for installation,
	* <a href="https://huggingface.co/docs/simulate/main/en/tutorials/running_the_simulation#running-the-simulation" simulation stepping <a> for running the simulation.
	"""

	fast_distributed_framework_question = """
	We are going to understand how many operations does a matrix multiplication hold using the simplest algorithm.
	<br/>
	<br/>
	Let A,B two matrices of size 256x64 and 128x64 respectively. When computing the matrix multiplication of A and (B^T), how many scalar multiplications are done? How many scalar additions are done? Please answer in the following format:
	<br/>
	multiplications: {YOUR_ANSWER_AS_A_SINGLE_NUMBER}
	<br/>
	additions: {YOUR_ANSWER_AS_A_SINGLE_NUMBER}
	<br/>
	"""

	internships = {
	'Accelerate': default_question,
	# 'Skops & Scikit-Learn': skops_question,
	# "Evaluate": evaluate_question,
	"Speech": default_question,
	# "ML for Code/Code Generation": code_question,
	# "Model forgetting": default_question,
	# "Multimodal AI": default_question,
	# "OCR": default_question,
	# "Efficient video pretraining": default_question,
	# "Retrieval augmentation as prompting": default_question,
	"Embodied AI": embodied_question,
	# "Toolkit for detecting distribution shift/Robustness": default_question,
	"Social impact evaluations": default_question,
	# "Gradio as an ecosystem": default_question,
	# "Benchmarking transformers on various AI hardware accelerators": default_question,
	"AI Art Tooling Residency": default_question,
	"Datasets for Large Language Models": default_question,
	"Fast Distributed Training Framework": fast_distributed_framework_question,
	}


	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# Internship introduction
	Please select the internship you would like to apply to and answer the question asked in the Answer box.
	"""
	)

	internship_choice = gr.Dropdown(label='Internship', choices=list(internships.keys()))

	with gr.Column(visible=False) as details_col:
	summary = gr.HTML(label='Question')
	details = gr.Textbox(label="Answer")
	username = gr.Textbox(label="Hugging Face Username")
	comment = gr.Textbox(label="Any comment?")
	generate_btn = gr.Button("Submit")
	output = gr.Label()

	def filter_species(species):
	return gr.Label.update(
	internships[species]
	), gr.update(visible=True)

	internship_choice.change(filter_species, internship_choice, [summary, details_col])

	def on_click(_details, _username, _internship_choice, _comment):
	response = {'response': _details, "internship": _internship_choice, "comment": _comment}
	upload_file(
	path_or_fileobj=BytesIO(bytes(json.dumps(response), 'utf-8')),
	path_in_repo=_username,
	repo_id='internships/internships-2023',
	repo_type='dataset',
	token=os.environ['HF_TOKEN']
	)
	return f"Submitted: '{_details}' for user '{_username}'"

	generate_btn.click(on_click, inputs=[details, username, internship_choice, comment], outputs=[output])


	if __name__ == "__main__":
	demo.launch()