import gradio as gr import pandas as pd from accelerate.utils import convert_bytes from hub_utils import check_for_discussion, report_results from huggingface_hub.utils import HfHubHTTPError from model_utils import calculate_memory, get_model def get_results(model_name: str, library: str, options: list, access_token: str): model = get_model(model_name, library, access_token) # try: # has_discussion = check_for_discussion(model_name) # except HfHubHTTPError: # has_discussion = True title = f"## Memory usage for '{model_name}'" data = calculate_memory(model, options) stages = {"model": [], "gradients": [], "optimizer": [], "step": []} for i, option in enumerate(data): for stage in stages: stages[stage].append(option["Training using Adam (Peak vRAM)"][stage]) value = max(data[i]["Training using Adam (Peak vRAM)"].values()) if value == -1: value = "N/A" else: value = convert_bytes(value) data[i]["Training using Adam (Peak vRAM)"] = value if any(value != -1 for value in stages["model"]): out_explain = "## Training using Adam explained:\n" out_explain += "When training on a batch size of 1, each stage of the training process is expected to have near the following memory results for each precision you selected:\n" memory_values = pd.DataFrame( columns=["dtype", "Model", "Gradient calculation", "Backward pass", "Optimizer step"] ) for i, dtype in enumerate(options): if stages["model"][i] != -1: memory_values.loc[len(memory_values.index)] = [ dtype, convert_bytes(stages["model"][i]), convert_bytes(stages["gradients"][i]), convert_bytes(stages["optimizer"][i]), convert_bytes(stages["step"][i]), ] return [ title, gr.update(visible=True, value=pd.DataFrame(data)), gr.update(visible=True, value=out_explain), gr.update(visible=True, value=memory_values), ] else: return [ title, gr.update(visible=True, value=pd.DataFrame(data)), gr.update(visible=False, value=""), gr.update(visible=False, value=pd.DataFrame()), ] with gr.Blocks() as demo: with gr.Column(): gr.Markdown( """