import json import gradio as gr from nn_meter import load_latency_predictor cortexA76cpu_predictor = load_latency_predictor("cortexA76cpu_tflite21") adreno640gpu_predictor = load_latency_predictor("adreno640gpu_tflite21") adreno630gpu = load_latency_predictor("adreno630gpu_tflite21") myriadvpu_predictor = load_latency_predictor("myriadvpu_openvino2019r2") predictor_map = { "cortexA76cpu_tflite21": cortexA76cpu_predictor, "adreno640gpu_tflite21": adreno640gpu_predictor, "adreno630gpu_tflite21": adreno630gpu, "myriadvpu_openvino2019r2": myriadvpu_predictor } feature_for_kernel = { # remove the last two float "conv": ["HW", "CIN", "COUT", "KERNEL_SIZE", "STRIDES"], "dwconv": ["HW", "CIN", "COUT", "KERNEL_SIZE", "STRIDES"], "fc": ["CIN", "COUT"], # support up to 4 cin, if less than 4, the latter cin will be set to 0 "concat": ["HW", "CIN1", "CIN2", "CIN3", "CIN4"], # "maxpool": ["HW", "CIN", "COUT", "KERNEL_SIZE", "POOL_STRIDES"], "avgpool": ["HW", "CIN", "COUT", "KERNEL_SIZE", "POOL_STRIDES"], "split": ["HW", "CIN"], "channelshuffle": ["HW", "CIN"], "se": ["HW", "CIN"], "global-avgpool": ["HW", "CIN"], "bnrelu": ["HW", "CIN"], "bn": ["HW", "CIN"], "hswish": ["HW", "CIN"], "relu": ["HW", "CIN"], "addrelu": ["HW", "CIN1", "CIN2"], "add": ["HW", "CIN1", "CIN2"], } def get_type(str): operate_type = str.split("-")[0] if operate_type == 'global' or operate_type == 'gap': operate_type = 'global-avgpool' return operate_type def get_configuration(operate_type, value_arr): feature_arr = feature_for_kernel[operate_type] if operate_type == 'concat': configuration_arr = [] for i in range(len(feature_arr)): if value_arr[i] != 0: configuration_arr.append(feature_arr[i]+"="+str(value_arr[i])) else: break else: configuration_arr = [feature_arr[i]+"="+str(value_arr[i]) for i in range(min(len(feature_arr),len(value_arr)))] return ', '.join(configuration_arr) def data_process(data): new_data = [] for item in data: operate_type = get_type(item[1]) new_item = { "order": item[0], "type": operate_type, "configuration": get_configuration(operate_type, item[2]), "latency": item[3], "name": item[4], } new_data.append(new_item) return new_data def generate_html(hardware, latency, block_detail): data = data_process(block_detail) doc = """

Latency Analysis

Group By:
Excution Order Operator Type Configuration Latency (ms) Detail Operator
""" return f"""""" def generate_error_html(massage): return f"""
nn-meter meets an error in latency prediction: {massage}
If you have any questions about the result, you can open new issues in nn-meter Git repository.
""" def get_latency(model, hardware_name): if model == None: return generate_error_html("Please upload a model file or select one example below.") model = model.name if hardware_name == '': return generate_error_html("Please select a device.") predictor = predictor_map[hardware_name] if model.endswith("onnx"): model_type = "onnx" elif model.endswith("pb"): model_type = "pb" else: model_type = "nnmeter-ir" try: model_latency, block_detail = predictor.detailed_predict(model, model_type) return generate_html(hardware_name, model_latency, block_detail) except Exception as e: return generate_error_html(repr(e)) title = "Interactive demo: nn-Meter (Draft Version)" description = "Demo for Microsoft's nn-Meter, a novel and efficient system to accurately predict the inference latency of DNN models on diverse edge devices. To use it, simply upload a model file, or use one of the example below and click ‘submit’. Results will show up in a few seconds." article = "

nn-Meter: towards accurate latency prediction of deep-learning model inference on diverse edge devices | Github Repo

" examples =[ ["samples/mobilenetv3small_0.pb", "cortexA76cpu_tflite21"], ["samples/mobilenetv3small_0.onnx", "adreno640gpu_tflite21"], ["samples/mobilenetv3small_0.json", "adreno630gpu_tflite21"] ] inputs = [ gr.inputs.File(label="Model File"), gr.inputs.Radio(choices=["cortexA76cpu_tflite21", "adreno640gpu_tflite21", "adreno630gpu_tflite21", "myriadvpu_openvino2019r2"], label="Device"), ] outputs = gr.outputs.HTML() iface = gr.Interface(fn=get_latency, inputs=inputs, outputs=outputs, title=title, description=description, article=article, examples=examples, allow_flagging="auto", css=""" div[id="6"] { flex-direction: column; } div[id="12"] { margin-left: 0px !important; margin-top: 0.75em !important; } div[id="12"] iframe{ height: 80vh !important; } """) iface.launch()