hidden-bias / source /private-and-fair /accuracy-v-privacy-dataset_size.js
mervenoyan's picture
commit files to HF hub
7341022
!(async function(){
var data = await util.getFile('cns-cache/model_grid_test_accuracy.json')
data = data
.filter(d => util.epsilonExtent[1] <= d.epsilon && d.epsilon <= util.epsilonExtent[0])
.filter(d => d.dataset_size > 1000)
// .filter(d => d.dataset_size > 4000)
// console.log(data)
var bySize = d3.nestBy(data, d => d.dataset_size)
bySize.forEach((d, i) => {
d.dataset_size = d.key
d.color = d3.interpolatePlasma(.84- i/6)
if (d.key == 60000){
d3.selectAll('.tp60').st({background: d.color, padding: 2})
}
if (d.key == 7500){
d3.selectAll('.tp75').st({background: d.color, color: '#fff', padding: 2})
}
d.label = {
60000: {pos: [7, 11], textAnchor: 'middle', text: '60,000'},
30000: {pos: [7, 11], textAnchor: 'middle', text: '30,000'},
15000: {pos: [7, -5], textAnchor: 'start', text: '15,000'},
7500: {pos: [0, 8], textAnchor: 'start', text: '7,500'},
// 3750: {pos: [0, 14], textAnchor: 'end', text: '3,750 training points'},
3750: {pos: [-34, 10], textAnchor: 'start', text: '3,750'},
2000: {pos: [-50, 10], textAnchor: 'end', text: '2,000 training points'},
}[d.key]
d.forEach(e => e.size = d)
})
var sel = d3.select('.accuracy-v-privacy-dataset_size').html('')
.at({role: 'graphics-document', 'aria-label': `High privacy and accuracy requires more training data. Line chart showing too much differential privacy without enough data decreases accuracy.`})
sel.append('div.chart-title').text('High privacy and accuracy requires more training data')
var c = d3.conventions({
sel,
height: 400,
margin: {bottom: 125, top: 5},
layers: 'sd',
})
c.x = d3.scaleLog().domain(util.epsilonExtent).range(c.x.range())
c.xAxis = d3.axisBottom(c.x).tickFormat(d => {
var rv = d + ''
if (rv.split('').filter(d => d !=0 && d != '.')[0] == 1) return rv
})
c.yAxis.tickFormat(d => d3.format('.0%')(d))//.ticks(8)
d3.drawAxis(c)
util.addAxisLabel(c, 'Higher Privacy →', 'Test Accuracy')
util.ggPlotBg(c, false)
c.layers[1].append('div')
.st({fontSize: 12, color: '#555', width: 120*2, textAlign: 'center', lineHeight: '1.3em'})
.translate([c.width/2 - 120, c.height + 70])
.html('in ε, a <a href="https://desfontain.es/privacy/differential-privacy-in-more-detail.html">measure</a> of how much modifying a single training point can change the model (models with a lower ε are more private)')
c.svg.selectAll('.y .tick').filter(d => d == .9)
.select('text').st({fontWeight: 600}).parent()
.append('path')
.at({stroke: '#000', strokeDasharray: '2 2', d: 'M 0 0 H ' + c.width})
var line = d3.line()
.x(d => c.x(d.epsilon))
.y(d => c.y(d.accuracy))
.curve(d3.curveMonotoneX)
var lineSel = c.svg.append('g').appendMany('path.accuracy-line', bySize)
.at({
d: line,
fill: 'none',
})
.st({ stroke: d => d.color, })
.on('mousemove', setActiveDigit)
var circleSel = c.svg.append('g')
.appendMany('g.accuracy-circle', data)
.translate(d => [c.x(d.epsilon), c.y(d.accuracy)])
.on('mousemove', setActiveDigit)
// .call(d3.attachTooltip)
circleSel.append('circle')
.at({r: 4, stroke: '#fff'})
.st({fill: d => d.size.color })
var labelSel = c.svg.appendMany('g.accuracy-label', bySize)
.translate(d => [c.x(d[0].epsilon), c.y(d[0].accuracy)])
labelSel.append('text')
.filter(d => d.label)
.translate(d => d.label.pos)
.st({fill: d => d.color, fontWeight: 400})
.at({textAnchor: d => d.label.textAnchor, fontSize: 14, fill: '#000', dy: '.66em'})
.text(d => d.label.text)
.filter(d => d.key == 2000)
.text('')
.tspans(d => d.label.text.split(' '))
c.svg.append('text.annotation')
.translate([225, 106])
.tspans(d3.wordwrap('With limited data, adding more differential privacy improves accuracy...', 25), 12)
c.svg.append('text.annotation')
.translate([490, 230])
.tspans(d3.wordwrap(`...until it doesn't`, 20))
// setActiveDigit({dataset_size: 60000})
function setActiveDigit({dataset_size}){
lineSel
.classed('active', 0)
.filter(d => d.dataset_size == dataset_size)
.classed('active', 1)
.raise()
circleSel
.classed('active', 0)
.filter(d => d.dataset_size == dataset_size)
.classed('active', 1)
.raise()
labelSel
.classed('active', 0)
.filter(d => d.dataset_size == dataset_size)
.classed('active', 1)
}
})()
// aVal: 0.5
// accuracy: 0.8936
// accuracy_0: 0.9663265306122449
// accuracy_1: 0.9806167400881057
// accuracy_2: 0.9011627906976745
// accuracy_3: 0.8633663366336634
// accuracy_4: 0.8859470468431772
// accuracy_5: 0.8733183856502242
// accuracy_6: 0.9384133611691023
// accuracy_7: 0.8657587548638133
// accuracy_8: 0.8059548254620124
// accuracy_9: 0.8434093161546086
// dataset_size: 60000
// epochs: 4
// epsilon: 0.19034890168775565
// l2_norm_clip: 0.75
// noise_multiplier: 2.6