File size: 3,328 Bytes
9b61800
 
 
 
 
 
 
 
 
 
 
e20929a
 
996d8f7
 
5eb46a1
996d8f7
 
 
 
 
5eb46a1
996d8f7
 
 
 
330338a
 
996d8f7
330338a
996d8f7
 
e20929a
996d8f7
 
e20929a
9b61800
 
 
b994a2b
d9a70f5
85aee05
d9a70f5
 
 
85aee05
d9a70f5
85aee05
 
1e1d532
e20929a
85aee05
9b61800
996d8f7
 
642315a
9b61800
b994a2b
e20929a
76c896a
 
 
 
 
9b61800
29734be
9b61800
 
 
29734be
9b61800
 
 
 
 
 
 
29734be
9b61800
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
'''
 Description  : 
 Version      : 1.0
 Author       : Chaofan Tao
 Mail         : [email protected]
 Github       : https://github.com/sail-sg/scaling-with-vocab
 Date         : 2024-08-09 00:25
 Copyright (C) 2024 Chaofan Tao. All rights reserved.
'''
import gradio as gr
from utils import approach1_isoflops, approach2_derivative, approach3_isoloss


def compute_optimal_vocab(Nnv, flops):
    try:
        Nnv = float(eval(Nnv))
    except ValueError:
        return "Invalid input for Non-vocabulary Parameters."

    if flops:
        try:
            flops = float(eval(flops))
        except ValueError:
            return "Invalid input for FLOPs."

    if flops is None or flops == "":
        Vopt_app1 = approach1_isoflops(Nnv)
        Vopt_app2 = approach2_derivative(Nnv)
        Vopt_app3 = approach3_isoloss(Nnv)
    else:
        Vopt_app1, Vopt_app2 = None, None
        Vopt_app3 = approach3_isoloss(Nnv, flops)

    results = f"The optimal vocabulary size is:\nApproach 1: {Vopt_app1}\nApproach 2: {Vopt_app2}\nApproach 3: {Vopt_app3}"
    return results

with gr.Blocks() as demo:
    with gr.Column():
        gr.Markdown(
            """<h1>The Optimal Vocabulary Size Predictor</h1>
            This tool is used to predict the optimal vocabulary size given the non-vocabulary parameters. We provide 3 ways for prediction:
            
            - **Approach 1: Build the relationship between studied attributes and FLOPs**: Build the relationship between the optimal data points (the points that reach the lowest loss under the same FLOPs budget) and the FLOPs.
            - **Approach 2: Derivative-Based Estimation**: Fast calculation method using the derivative of FLOPs with respect to the vocabulary size.
            - **Approach 3: Parametric Fit of Loss Formula**: Design a loss formula that considers the effect of vocabulary size and utilizes the loss to make prediction.
            
            Approach 1 and 2 can only be used to compute the optimal vocabulary size when the compute is optimally allocated to non-vocabulary parameters, vocabulary parameters and data jointly. Approach 3 will not only consider the case above, but also consider the case when the amount of data does not satisfy the optimal compute allocation, and can calculate the optimal vocabulary size with specified FLOPs.
            
            **Thanks for trying** 🌟🌟🌟!
            """)
        

        with gr.Row():
            Nnv = gr.Textbox(label="Non-vocabulary Parameters", value=str(7*10**9))
            flops = gr.Textbox(label="FLOPs", placeholder="Optional (e.g. 7.05e21)")
            output_text = gr.Textbox(label="Prediction")
        with gr.Row():
            btn = gr.Button("Press it to compute the optimal vocabulary size")

        btn.click(
            compute_optimal_vocab,
            inputs=[Nnv, flops],
            outputs=output_text
        )
demo.launch()

# import gradio as gr
# def update(name):
#     return f"Welcome to Gradio, {name}!"

# with gr.Blocks() as demo:
#     gr.Markdown("Start typing below and then click **Run** to see the output.")
#     with gr.Row():
#         inp = gr.Textbox(placeholder="What is your name?")
#         out = gr.Textbox()
#     btn = gr.Button("Run")
#     btn.click(fn=update, inputs=inp, outputs=out)

# demo.launch()