Spaces:
Runtime error
Runtime error
now definition should render
Browse files- README.md +1 -1
- local_app.py +27 -2
README.md
CHANGED
@@ -22,7 +22,7 @@ pinned: false
|
|
22 |
Expected Calibration Error *ECE* is a popular metric to evaluate top-1 prediction miscalibration.
|
23 |
It measures the L^p norm difference between a model’s posterior and the true likelihood of being correct.
|
24 |
|
25 |
-
![ECE definition](
|
26 |
|
27 |
It is generally implemented as a binned estimator that discretizes predicted probabilities into ranges of possible values (bins) for which conditional expectation can be estimated.
|
28 |
|
|
|
22 |
Expected Calibration Error *ECE* is a popular metric to evaluate top-1 prediction miscalibration.
|
23 |
It measures the L^p norm difference between a model’s posterior and the true likelihood of being correct.
|
24 |
|
25 |
+
![ECE definition](https://huggingface.co/spaces/jordyvl/ece/resolve/main/ECE_definition.jpg)
|
26 |
|
27 |
It is generally implemented as a binned estimator that discretizes predicted probabilities into ranges of possible values (bins) for which conditional expectation can be estimated.
|
28 |
|
local_app.py
CHANGED
@@ -61,6 +61,29 @@ metric = ECE()
|
|
61 |
Switch inputs and compute_fn
|
62 |
"""
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
def reliability_plot(results):
|
66 |
fig = plt.figure()
|
@@ -97,7 +120,8 @@ def reliability_plot(results):
|
|
97 |
if np.isnan(empirical):
|
98 |
continue
|
99 |
|
100 |
-
|
|
|
101 |
"""
|
102 |
if perfect == empirical:
|
103 |
continue
|
@@ -145,10 +169,11 @@ def compute_and_plot(data, n_bins, bin_range, scheme, proxy, p):
|
|
145 |
)
|
146 |
|
147 |
plot = reliability_plot(results)
|
148 |
-
return results["ECE"], plot
|
149 |
|
150 |
|
151 |
outputs = [gr.outputs.Textbox(label="ECE"), gr.Plot(label="Reliability diagram")]
|
|
|
152 |
|
153 |
iface = gr.Interface(
|
154 |
fn=compute_and_plot,
|
|
|
61 |
Switch inputs and compute_fn
|
62 |
"""
|
63 |
|
64 |
+
def default_plot():
|
65 |
+
fig = plt.figure()
|
66 |
+
ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
|
67 |
+
ax2 = plt.subplot2grid((3, 1), (2, 0))
|
68 |
+
ranged = np.linspace(0, 1, 10)
|
69 |
+
ax1.plot(
|
70 |
+
ranged,
|
71 |
+
ranged,
|
72 |
+
color="darkgreen",
|
73 |
+
ls="dotted",
|
74 |
+
label="Perfect",
|
75 |
+
)
|
76 |
+
ax1.set_ylabel("Conditional Expectation")
|
77 |
+
ax1.set_ylim([-0.05, 1.05]) # respective to bin range
|
78 |
+
ax1.legend(loc="lower right")
|
79 |
+
ax1.set_title("Reliability Diagram")
|
80 |
+
|
81 |
+
# Bin frequencies
|
82 |
+
ax2.set_xlabel("Confidence")
|
83 |
+
ax2.set_ylabel("Count")
|
84 |
+
ax2.legend(loc="upper left") # , ncol=2
|
85 |
+
plt.tight_layout()
|
86 |
+
return fig
|
87 |
|
88 |
def reliability_plot(results):
|
89 |
fig = plt.figure()
|
|
|
120 |
if np.isnan(empirical):
|
121 |
continue
|
122 |
|
123 |
+
#width=-ranged[j],
|
124 |
+
ax1.bar([perfect], height=[empirical], align="edge", color="lightblue")
|
125 |
"""
|
126 |
if perfect == empirical:
|
127 |
continue
|
|
|
169 |
)
|
170 |
|
171 |
plot = reliability_plot(results)
|
172 |
+
return results["ECE"], plot
|
173 |
|
174 |
|
175 |
outputs = [gr.outputs.Textbox(label="ECE"), gr.Plot(label="Reliability diagram")]
|
176 |
+
#outputs[1].value = default_plot().__dict__
|
177 |
|
178 |
iface = gr.Interface(
|
179 |
fn=compute_and_plot,
|