community_icon_html = """""" loading_icon_html = """""" share_js = """async () => { async function uploadFile(file){ const UPLOAD_URL = 'https://huggingface.co/uploads'; const response = await fetch(UPLOAD_URL, { method: 'POST', headers: { 'Content-Type': 'audio/wav', 'X-Requested-With': 'XMLHttpRequest', }, body: file, /// <- File inherits from Blob }); const url = await response.text(); return url; } function audioResample(buffer, sampleRate){ const offlineCtx = new OfflineAudioContext(2, (buffer.length / buffer.sampleRate) * sampleRate, sampleRate); const source = offlineCtx.createBufferSource(); source.buffer = buffer; source.connect(offlineCtx.destination); source.start(); return offlineCtx.startRendering(); }; function audioReduceChannels(buffer, targetChannelOpt){ if(targetChannelOpt === 'both' || buffer.numberOfChannels < 2) return buffer; const outBuffer = new AudioBuffer({ sampleRate: buffer.sampleRate, length: buffer.length, numberOfChannels: 1 }); const data = [buffer.getChannelData(0), buffer.getChannelData(1)]; const newData = new Float32Array(buffer.length); for(let i = 0; i < buffer.length; ++i) newData[i] = targetChannelOpt === 'left'? data[0][i] : targetChannelOpt === 'right'? data[1][i] : (data[0][i] + data[1][i]) / 2 ; outBuffer.copyToChannel(newData, 0); return outBuffer; }; function audioNormalize(buffer){ const data = Array.from(Array(buffer.numberOfChannels)).map((_, idx) => buffer.getChannelData(idx)); const maxAmplitude = Math.max(...data.map(chan => chan.reduce((acc, cur) => Math.max(acc, Math.abs(cur)), 0))); if(maxAmplitude >= 1.0) return buffer; const coeff = 1.0 / maxAmplitude; data.forEach(chan => { chan.forEach((v, idx) => chan[idx] = v*coeff); buffer.copyToChannel(chan, 0); }); return buffer; }; async function processAudioFile( audioBufferIn, targetChannelOpt, targetSampleRate ) { const resampled = await audioResample(audioBufferIn, targetSampleRate); const reduced = audioReduceChannels(resampled, targetChannelOpt); const normalized = audioNormalize(reduced); return normalized; } function audioToRawWave(audioChannels, bytesPerSample, mixChannels=false) { const bufferLength = audioChannels[0].length; const numberOfChannels = audioChannels.length === 1 ? 1 : 2; const reducedData = new Uint8Array( bufferLength * numberOfChannels * bytesPerSample ); for (let i = 0; i < bufferLength; ++i) { for ( let channel = 0; channel < (mixChannels ? 1 : numberOfChannels); ++channel ) { const outputIndex = (i * numberOfChannels + channel) * bytesPerSample; let sample; if (!mixChannels) sample = audioChannels[channel][i]; else sample = audioChannels.reduce((prv, cur) => prv + cur[i], 0) / numberOfChannels; sample = sample > 1 ? 1 : sample < -1 ? -1 : sample; //check for clipping //bit reduce and convert to Uint8 switch (bytesPerSample) { case 2: sample = sample * 32767; reducedData[outputIndex] = sample; reducedData[outputIndex + 1] = sample >> 8; break; case 1: reducedData[outputIndex] = (sample + 1) * 127; break; default: throw "Only 8, 16 bits per sample are supported"; } } } return reducedData; } function makeWav(data, channels, sampleRate, bytesPerSample) { const headerLength = 44; var wav = new Uint8Array(headerLength + data.length); var view = new DataView(wav.buffer); view.setUint32(0, 1380533830, false); // RIFF identifier 'RIFF' view.setUint32(4, 36 + data.length, true); // file length minus RIFF identifier length and file description length view.setUint32(8, 1463899717, false); // RIFF type 'WAVE' view.setUint32(12, 1718449184, false); // format chunk identifier 'fmt ' view.setUint32(16, 16, true); // format chunk length view.setUint16(20, 1, true); // sample format (raw) view.setUint16(22, channels, true); // channel count view.setUint32(24, sampleRate, true); // sample rate view.setUint32(28, sampleRate * bytesPerSample * channels, true); // byte rate (sample rate * block align) view.setUint16(32, bytesPerSample * channels, true); // block align (channel count * bytes per sample) view.setUint16(34, bytesPerSample * 8, true); // bits per sample view.setUint32(36, 1684108385, false); // data chunk identifier 'data' view.setUint32(40, data.length, true); // data chunk length wav.set(data, headerLength); return new Blob([wav.buffer], { type: "audio/wav" }); } const gradioEl = document.querySelector('body > gradio-app'); const audioEl = gradioEl.querySelector('audio'); const resultTxt = gradioEl.querySelector('#result-textarea textarea').value; const shareBtnEl = gradioEl.querySelector('#share-btn'); const shareIconEl = gradioEl.querySelector('#share-btn-share-icon'); const loadingIconEl = gradioEl.querySelector('#share-btn-loading-icon'); if(!audioEl){ return; }; shareBtnEl.style.pointerEvents = 'none'; shareIconEl.style.display = 'none'; loadingIconEl.style.removeProperty('display'); const res = await fetch(audioEl.src); const blob = await res.blob(); const channelOpt = "both"; const sampleRate = 48000; const bytesPerSample = 1; // or 2 const audioBufferIn = await new AudioContext().decodeAudioData( await blob.arrayBuffer() ); const audioBuffer = await processAudioFile( audioBufferIn, channelOpt, sampleRate ); const rawData = audioToRawWave( channelOpt === "both" ? [audioBuffer.getChannelData(0), audioBuffer.getChannelData(1)] : [audioBuffer.getChannelData(0)], bytesPerSample ); const blobWav = makeWav( rawData, channelOpt === "both" ? 2 : 1, sampleRate, bytesPerSample ); const fileName = `whisper-demo-input.wav`; const audioFile = new File([blobWav], fileName, { type: 'audio/wav' }); const url = await uploadFile(audioFile); const descriptionMd = `#### Input audio:

#### Transcription: > ${resultTxt}`; const params = new URLSearchParams({ description: descriptionMd, }); const paramsStr = params.toString(); window.open(`https://huggingface.co/spaces/openai/whisper/discussions/new?${paramsStr}`, '_blank'); shareBtnEl.style.removeProperty('pointer-events'); shareIconEl.style.removeProperty('display'); loadingIconEl.style.display = 'none'; }"""