File size: 43,992 Bytes
a1de0a7 |
1 2 3 4 5 6 7 8 9 10 11 12 |
"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[165],{9046:function(e,t,s){s.d(t,{t2:function(){return e9}});var n=s(20761),i=s(40911),r=s(68709),o=s(62414),a=s(81510),l=s(24087);async function c(e,t){let s=await Promise.all([(0,i.yM)(e,"tokenizer.json",!0,t),(0,i.yM)(e,"tokenizer_config.json",!0,t)]);return null!==t.legacy&&(s[1].legacy=t.legacy),s}function h(e,t=!0){if(void 0!==e.Regex){let t=e.Regex.replace(/\\([#&~])/g,"$1");for(let[e,s]of g)t=t.replaceAll(e,s);return RegExp(t,"gu")}if(void 0===e.String)return console.warn("Unknown pattern type:",e),null;{let s=(0,n.hr)(e.String);return RegExp(t?s:`(${s})`,"gu")}}function u(e){return new Map(Object.entries(e))}function d(e){let t=e.dims;switch(t.length){case 1:return e.tolist();case 2:if(1!==t[0])throw Error("Unable to decode tensor with `batch size !== 1`. Use `tokenizer.batch_decode(...)` for batched inputs.");return e.tolist()[0];default:throw Error(`Expected tensor to have 1-2 dimensions, got ${t.length}.`)}}function _(e){return e.replace(/ \./g,".").replace(/ \?/g,"?").replace(/ \!/g,"!").replace(/ ,/g,",").replace(/ \' /g,"'").replace(/ n\'t/g,"n't").replace(/ \'m/g,"'m").replace(/ \'s/g,"'s").replace(/ \'ve/g,"'ve").replace(/ \'re/g,"'re")}function p(e){return e.replace(/[\u0300-\u036f]/g,"")}let f="\\p{P}\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E",g=new Map([["(?i:'s|'t|'re|'ve|'m|'ll|'d)","(?:'([sS]|[tT]|[rR][eE]|[vV][eE]|[mM]|[lL][lL]|[dD]))"]]);class m{constructor(e){this.content=e.content,this.id=e.id,this.single_word=e.single_word??!1,this.lstrip=e.lstrip??!1,this.rstrip=e.rstrip??!1,this.special=e.special??!1,this.normalized=e.normalized??null}}class k extends n.Ag{constructor(e){super(),this.config=e,this.vocab=[],this.tokens_to_ids=new Map,this.unk_token_id=void 0,this.unk_token=void 0,this.end_of_word_suffix=void 0,this.fuse_unk=this.config.fuse_unk??!1}static fromConfig(e,...t){switch(e.type){case"WordPiece":return new x(e);case"Unigram":return new w(e,...t);case"BPE":return new v(e);default:if(e.vocab)return new z(e,...t);throw Error(`Unknown TokenizerModel type: ${e.type}`)}}_call(e){let t=this.encode(e);return this.fuse_unk&&(t=function(e,t,s){let n=[],i=0;for(;i<e.length;){if(n.push(e[i]),(s.get(e[i])??t)!==t){++i;continue}for(;i<e.length&&(s.get(e[i])??t)===t;)++i}return n}(t,this.unk_token_id,this.tokens_to_ids)),t}encode(e){throw Error("encode should be implemented in subclass.")}convert_tokens_to_ids(e){return e.map(e=>this.tokens_to_ids.get(e)??this.unk_token_id)}convert_ids_to_tokens(e){return e.map(e=>this.vocab[e]??this.unk_token)}}class x extends k{constructor(e){for(let[t,s]of(super(e),this.tokens_to_ids=u(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.max_input_chars_per_word=e.max_input_chars_per_word??100,this.vocab=Array(this.tokens_to_ids.size),this.tokens_to_ids))this.vocab[s]=t}encode(e){let t=[];for(let s of e){let e=[...s];if(e.length>this.max_input_chars_per_word){t.push(this.unk_token);continue}let n=!1,i=0,r=[];for(;i<e.length;){let t=e.length,s=null;for(;i<t;){let n=e.slice(i,t).join("");if(i>0&&(n=this.config.continuing_subword_prefix+n),this.tokens_to_ids.has(n)){s=n;break}--t}if(null===s){n=!0;break}r.push(s),i=t}n?t.push(this.unk_token):t.push(...r)}return t}}class w extends k{constructor(e,t){super(e);let s=e.vocab.length;this.vocab=Array(s),this.scores=Array(s);for(let t=0;t<s;++t){let s=e.vocab[t];this.vocab[t]=s[0],this.scores[t]=s[1]}this.unk_token_id=e.unk_id,this.unk_token=this.vocab[e.unk_id],this.tokens_to_ids=new Map(this.vocab.map((e,t)=>[e,t])),this.bosToken=" ",this.bosTokenId=this.tokens_to_ids.get(this.bosToken),this.eosToken=t.eos_token,this.eosTokenId=this.tokens_to_ids.get(this.eosToken),this.unkToken=this.vocab[this.unk_token_id],this.minScore=(0,r.VV)(this.scores)[0],this.unkScore=this.minScore-10,this.scores[this.unk_token_id]=this.unkScore,this.trie=new a.GA,this.trie.extend(this.vocab),this.fuse_unk=!0}populateNodes(e){let t=e.sentence,s=t.length,n=0;for(;n<s;){let s=!1,i=[];for(let r of this.trie.commonPrefixSearch(t.slice(n))){i.push(r);let t=this.tokens_to_ids.get(r),o=this.scores[t],a=r.length;e.insert(n,a,o,t),s||1!==a||(s=!0)}s||e.insert(n,1,this.unkScore,this.unk_token_id),n+=1}}tokenize(e){let t=new a.pQ(e,this.bosTokenId,this.eosTokenId);return this.populateNodes(t),t.tokens()}encode(e){let t=[];for(let s of e){let e=this.tokenize(s);t.push(...e)}return t}}let y=(()=>{let e=[...Array.from({length:94},(e,t)=>t+33),...Array.from({length:12},(e,t)=>t+161),...Array.from({length:82},(e,t)=>t+174)],t=e.slice(),s=0;for(let n=0;n<256;++n)e.includes(n)||(e.push(n),t.push(256+s),s+=1);let n=t.map(e=>String.fromCharCode(e));return Object.fromEntries(e.map((e,t)=>[e,n[t]]))})(),b=(0,n.$2)(y);class v extends k{constructor(e){for(let[t,s]of(super(e),this.BPE_SPLIT_TOKEN=" ",this.tokens_to_ids=u(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.vocab=Array(this.tokens_to_ids.size),this.tokens_to_ids))this.vocab[s]=t;this.bpe_ranks=new Map(e.merges.map((e,t)=>[e,t])),this.merges=e.merges.map(e=>e.split(this.BPE_SPLIT_TOKEN)),this.end_of_word_suffix=e.end_of_word_suffix,this.continuing_subword_suffix=e.continuing_subword_suffix??null,this.byte_fallback=this.config.byte_fallback??!1,this.byte_fallback&&(this.text_encoder=new TextEncoder),this.ignore_merges=this.config.ignore_merges??!1,this.cache=new Map}bpe(e){if(0===e.length)return[];let t=this.cache.get(e);if(void 0!==t)return t;let s=Array.from(e);this.end_of_word_suffix&&(s[s.length-1]+=this.end_of_word_suffix);let n=[];if(s.length>1){let e=new a.Z3((e,t)=>e.score<t.score),t={token:s[0],bias:0,prev:null,next:null},i=t;for(let t=1;t<s.length;++t){let n={bias:t/s.length,token:s[t],prev:i,next:null};i.next=n,this._add_node(e,i),i=n}for(;!e.isEmpty();){let s=e.pop();if(s.deleted||!s.next||s.next.deleted)continue;if(s.deleted=!0,s.next.deleted=!0,s.prev){let e={...s.prev};s.prev.deleted=!0,s.prev=e,e.prev?e.prev.next=e:t=e}let n={token:s.token+s.next.token,bias:s.bias,prev:s.prev,next:s.next.next};n.prev?(n.prev.next=n,this._add_node(e,n.prev)):t=n,n.next&&(n.next.prev=n,this._add_node(e,n))}for(let e=t;null!==e;e=e.next)n.push(e.token)}else n=s;if(this.continuing_subword_suffix)for(let e=0;e<n.length-1;++e)n[e]+=this.continuing_subword_suffix;return this.cache.set(e,n),n}_add_node(e,t){let s=this.bpe_ranks.get(t.token+this.BPE_SPLIT_TOKEN+t.next.token);void 0!==s&&(t.score=s+t.bias,e.push(t))}encode(e){let t=[];for(let s of e){if(this.ignore_merges&&this.tokens_to_ids.has(s)){t.push(s);continue}for(let e of this.bpe(s))this.tokens_to_ids.has(e)?t.push(e):this.byte_fallback?t.push(...Array.from(this.text_encoder.encode(e)).map(e=>`<0x${e.toString(16).toUpperCase().padStart(2,"0")}>`)):t.push(this.unk_token)}return t}}class z extends k{constructor(e,t){for(let[s,n]of(super(e),this.tokens_to_ids=u(t.target_lang?e.vocab[t.target_lang]:e.vocab),this.bos_token=t.bos_token,this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t.eos_token,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.pad_token=t.pad_token,this.pad_token_id=this.tokens_to_ids.get(this.pad_token),this.unk_token=t.unk_token,this.unk_token_id=this.tokens_to_ids.get(this.unk_token),this.vocab=Array(this.tokens_to_ids.size),this.tokens_to_ids))this.vocab[n]=s}encode(e){return e}}class A extends n.Ag{constructor(e){super(),this.config=e}static fromConfig(e){if(null===e)return null;switch(e.type){case"BertNormalizer":return new N(e);case"Precompiled":return new eo(e);case"Sequence":return new R(e);case"Replace":return new S(e);case"NFC":return new E(e);case"NFKC":return new T(e);case"NFKD":return new C(e);case"Strip":return new j(e);case"StripAccents":return new M(e);case"Lowercase":return new P(e);case"Prepend":return new $(e);default:throw Error(`Unknown Normalizer type: ${e.type}`)}}normalize(e){throw Error("normalize should be implemented in subclass.")}_call(e){return this.normalize(e)}}class S extends A{normalize(e){let t=h(this.config.pattern);return null===t?e:e.replaceAll(t,this.config.content)}}class E extends A{normalize(e){return e=e.normalize("NFC")}}class T extends A{normalize(e){return e=e.normalize("NFKC")}}class C extends A{normalize(e){return e=e.normalize("NFKD")}}class j extends A{normalize(e){return this.config.strip_left&&this.config.strip_right?e=e.trim():(this.config.strip_left&&(e=e.trimStart()),this.config.strip_right&&(e=e.trimEnd())),e}}class M extends A{normalize(e){return e=p(e)}}class P extends A{normalize(e){return e=e.toLowerCase()}}class $ extends A{normalize(e){return e=this.config.prepend+e}}class R extends A{constructor(e){super(e),this.normalizers=e.normalizers.map(e=>A.fromConfig(e))}normalize(e){return this.normalizers.reduce((e,t)=>t.normalize(e),e)}}class N extends A{_tokenize_chinese_chars(e){let t=[];for(let s=0;s<e.length;++s){let n=e[s],i=n.charCodeAt(0);this._is_chinese_char(i)?(t.push(" "),t.push(n),t.push(" ")):t.push(n)}return t.join("")}_is_chinese_char(e){return e>=19968&&e<=40959||e>=13312&&e<=19903||e>=131072&&e<=173791||e>=173824&&e<=177983||e>=177984&&e<=178207||e>=178208&&e<=183983||e>=63744&&e<=64255||e>=194560&&e<=195103}stripAccents(e){return e.normalize("NFD").replace(/[\u0300-\u036f]/g,"")}_is_control(e){switch(e){case" ":case"\n":case"\r":return!1;default:return/^\p{Cc}|\p{Cf}|\p{Co}|\p{Cs}$/u.test(e)}}_clean_text(e){let t=[];for(let s of e){let e=s.charCodeAt(0);0===e||65533===e||this._is_control(s)||(/^\s$/.test(s)?t.push(" "):t.push(s))}return t.join("")}normalize(e){return this.config.clean_text&&(e=this._clean_text(e)),this.config.handle_chinese_chars&&(e=this._tokenize_chinese_chars(e)),this.config.lowercase?(e=e.toLowerCase(),!1!==this.config.strip_accents&&(e=this.stripAccents(e))):this.config.strip_accents&&(e=this.stripAccents(e)),e}}class F extends n.Ag{static fromConfig(e){if(null===e)return null;switch(e.type){case"BertPreTokenizer":return new L(e);case"Sequence":return new ea(e);case"Whitespace":return new el(e);case"WhitespaceSplit":return new ec(e);case"Metaspace":return new ei(e);case"ByteLevel":return new O(e);case"Split":return new U(e);case"Punctuation":return new W(e);case"Digits":return new G(e);case"Replace":return new eh(e);default:throw Error(`Unknown PreTokenizer type: ${e.type}`)}}pre_tokenize_text(e,t){throw Error("pre_tokenize_text should be implemented in subclass.")}pre_tokenize(e,t){return(Array.isArray(e)?e.map(e=>this.pre_tokenize_text(e,t)):this.pre_tokenize_text(e,t)).flat()}_call(e,t){return this.pre_tokenize(e,t)}}class L extends F{constructor(e){super(),this.pattern=RegExp(`[^\\s${f}]+|[${f}]`,"gu")}pre_tokenize_text(e,t){return e.trim().match(this.pattern)||[]}}class O extends F{constructor(e){super(),this.config=e,this.add_prefix_space=this.config.add_prefix_space,this.trim_offsets=this.config.trim_offsets,this.use_regex=this.config.use_regex??!0,this.pattern=/'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu,this.byte_encoder=y,this.text_encoder=new TextEncoder}pre_tokenize_text(e,t){return this.add_prefix_space&&!e.startsWith(" ")&&(e=" "+e),(this.use_regex?e.match(this.pattern)||[]:[e]).map(e=>Array.from(this.text_encoder.encode(e),e=>this.byte_encoder[e]).join(""))}}class U extends F{constructor(e){super(),this.config=e,this.pattern=h(this.config.pattern,this.config.invert)}pre_tokenize_text(e,t){return null===this.pattern?[]:this.config.invert?e.match(this.pattern)||[]:function(e,t){let s=[],n=0;for(let i of e.matchAll(t)){let t=i[0];n<i.index&&s.push(e.slice(n,i.index)),t.length>0&&s.push(t),n=i.index+t.length}return n<e.length&&s.push(e.slice(n)),s}(e,this.pattern)}}class W extends F{constructor(e){super(),this.config=e,this.pattern=RegExp(`[^${f}]+|[${f}]+`,"gu")}pre_tokenize_text(e,t){return e.match(this.pattern)||[]}}class G extends F{constructor(e){super(),this.config=e;let t=`[^\\d]+|\\d${this.config.individual_digits?"":"+"}`;this.pattern=RegExp(t,"gu")}pre_tokenize_text(e,t){return e.match(this.pattern)||[]}}class I extends n.Ag{constructor(e){super(),this.config=e}static fromConfig(e){if(null===e)return null;switch(e.type){case"TemplateProcessing":return new q(e);case"ByteLevel":return new Y(e);case"RobertaProcessing":return new D(e);case"BertProcessing":return new B(e);default:throw Error(`Unknown PostProcessor type: ${e.type}`)}}post_process(e,...t){throw Error("post_process should be implemented in subclass.")}_call(e,...t){return this.post_process(e,...t)}}class B extends I{constructor(e){super(e),this.cls=e.cls[0],this.sep=e.sep[0]}post_process(e,t=null,{add_special_tokens:s=!0}={}){s&&(e=(0,n.eG)([this.cls],e,[this.sep]));let i=Array(e.length).fill(0);if(null!==t){let r=s&&this instanceof D?[this.sep]:[],o=s?[this.sep]:[];e=(0,n.eG)(e,r,t,o),i=(0,n.eG)(i,Array(t.length+r.length+o.length).fill(1))}return{tokens:e,token_type_ids:i}}}class D extends B{}class q extends I{constructor(e){super(e),this.single=e.single,this.pair=e.pair}post_process(e,t=null,{add_special_tokens:s=!0}={}){let i=null===t?this.single:this.pair,r=[],o=[];for(let a of i)"SpecialToken"in a?s&&(r.push(a.SpecialToken.id),o.push(a.SpecialToken.type_id)):"Sequence"in a&&("A"===a.Sequence.id?(r=(0,n.eG)(r,e),o=(0,n.eG)(o,Array(e.length).fill(a.Sequence.type_id))):"B"===a.Sequence.id&&(r=(0,n.eG)(r,t),o=(0,n.eG)(o,Array(t.length).fill(a.Sequence.type_id))));return{tokens:r,token_type_ids:o}}}class Y extends I{post_process(e,t=null){return t&&(e=(0,n.eG)(e,t)),{tokens:e}}}class K extends n.Ag{constructor(e){super(),this.config=e,this.added_tokens=[],this.end_of_word_suffix=null,this.trim_offsets=e.trim_offsets}static fromConfig(e){if(null===e)return null;switch(e.type){case"WordPiece":return new Q(e);case"Metaspace":return new er(e);case"ByteLevel":return new X(e);case"Replace":return new Z(e);case"ByteFallback":return new V(e);case"Fuse":return new H(e);case"Strip":return new J(e);case"Sequence":return new et(e);case"CTC":return new ee(e);case"BPEDecoder":return new es(e);default:throw Error(`Unknown Decoder type: ${e.type}`)}}_call(e){return this.decode(e)}decode(e){return this.decode_chain(e).join("")}decode_chain(e){throw Error("`decode_chain` should be implemented in subclass.")}}class Z extends K{decode_chain(e){let t=h(this.config.pattern);return null===t?e:e.map(e=>e.replaceAll(t,this.config.content))}}class V extends K{constructor(e){super(e),this.text_decoder=new TextDecoder}decode_chain(e){let t=[],s=[];for(let n of e){let e=null;if(6===n.length&&n.startsWith("<0x")&&n.endsWith(">")){let t=parseInt(n.slice(3,5),16);isNaN(t)||(e=t)}if(null!==e)s.push(e);else{if(s.length>0){let e=this.text_decoder.decode(Uint8Array.from(s));t.push(e),s=[]}t.push(n)}}if(s.length>0){let e=this.text_decoder.decode(Uint8Array.from(s));t.push(e),s=[]}return t}}class H extends K{decode_chain(e){return[e.join("")]}}class J extends K{constructor(e){super(e),this.content=this.config.content,this.start=this.config.start,this.stop=this.config.stop}decode_chain(e){return e.map(e=>{let t=0;for(let s=0;s<this.start;++s){if(e[s]===this.content){t=s+1;continue}break}let s=e.length;for(let t=0;t<this.stop;++t){let n=e.length-t-1;if(e[n]===this.content){s=n;continue}break}return e.slice(t,s)})}}class Q extends K{constructor(e){super(e),this.cleanup=e.cleanup}decode_chain(e){return e.map((e,t)=>(0!==t&&(e=e.startsWith(this.config.prefix)?e.replace(this.config.prefix,""):" "+e),this.cleanup&&(e=_(e)),e))}}class X extends K{constructor(e){super(e),this.byte_decoder=b,this.text_decoder=new TextDecoder("utf-8",{fatal:!1,ignoreBOM:!0}),this.end_of_word_suffix=null}convert_tokens_to_string(e){let t=new Uint8Array([...e.join("")].map(e=>this.byte_decoder[e]));return this.text_decoder.decode(t)}decode_chain(e){let t=[],s=[];for(let n of e)void 0!==this.added_tokens.find(e=>e.content===n)?(s.length>0&&(t.push(this.convert_tokens_to_string(s)),s=[]),t.push(n)):s.push(n);return s.length>0&&t.push(this.convert_tokens_to_string(s)),t}}class ee extends K{constructor(e){super(e),this.pad_token=this.config.pad_token,this.word_delimiter_token=this.config.word_delimiter_token,this.cleanup=this.config.cleanup}convert_tokens_to_string(e){if(0===e.length)return"";let t=[e[0]];for(let s=1;s<e.length;++s)e[s]!==t.at(-1)&&t.push(e[s]);let s=t.filter(e=>e!==this.pad_token).join("");return this.cleanup&&(s=_(s).replaceAll(this.word_delimiter_token," ").trim()),s}decode_chain(e){return[this.convert_tokens_to_string(e)]}}class et extends K{constructor(e){super(e),this.decoders=e.decoders.map(e=>K.fromConfig(e))}decode_chain(e){return this.decoders.reduce((e,t)=>t.decode_chain(e),e)}}class es extends K{constructor(e){super(e),this.suffix=this.config.suffix}decode_chain(e){return e.map((t,s)=>t.replaceAll(this.suffix,s===e.length-1?"":" "))}}class en extends K{decode_chain(e){let t="";for(let s=1;s<e.length;s+=2)t+=e[s];return[t]}}class ei extends F{constructor(e){super(),this.addPrefixSpace=e.add_prefix_space,this.replacement=e.replacement,this.strRep=e.str_rep||this.replacement,this.prepend_scheme=e.prepend_scheme??"always"}pre_tokenize_text(e,{section_index:t}={}){let s=e.replaceAll(" ",this.strRep);return this.addPrefixSpace&&!s.startsWith(this.replacement)&&("always"===this.prepend_scheme||"first"===this.prepend_scheme&&0===t)&&(s=this.strRep+s),[s]}}class er extends K{constructor(e){super(e),this.addPrefixSpace=e.add_prefix_space,this.replacement=e.replacement}decode_chain(e){let t=[];for(let s=0;s<e.length;++s){let n=e[s].replaceAll(this.replacement," ");this.addPrefixSpace&&0==s&&n.startsWith(" ")&&(n=n.substring(1)),t.push(n)}return t}}class eo extends A{constructor(e){super(e),this.charsmap=e.precompiled_charsmap}normalize(e){return e=(e=(e=e.replace(/[\u0001-\u0008\u000B\u000E-\u001F\u007F\u008F\u009F]/gm,"")).replace(/[\u0009\u000A\u000C\u000D\u1680\u200B\u200C\u200E\u200F\u2028\u2029\u2581\uFEFF\uFFFD]/gm," ")).includes("~")?e.split("~").map(e=>e.normalize("NFKC")).join("~"):e.normalize("NFKC")}}class ea extends F{constructor(e){super(),this.tokenizers=e.pretokenizers.map(e=>F.fromConfig(e))}pre_tokenize_text(e,t){return this.tokenizers.reduce((e,s)=>s.pre_tokenize(e,t),[e])}}class el extends F{constructor(e){super()}pre_tokenize_text(e,t){return e.match(/\w+|[^\w\s]+/g)||[]}}class ec extends F{constructor(e){super()}pre_tokenize_text(e,t){return e.match(/\S+/g)||[]}}class eh extends F{constructor(e){super(),this.config=e,this.pattern=h(this.config.pattern),this.content=this.config.content}pre_tokenize_text(e,t){return null===this.pattern?[e]:[e.replaceAll(this.pattern,this.config.content)]}}let eu=["bos_token","eos_token","unk_token","sep_token","pad_token","cls_token","mask_token"];class ed extends n.Ag{return_token_type_ids=!1;_default_chat_template=`{% for message in messages %}{{'<|im_start|>' + message['role'] + '
' + message['content'] + '<|im_end|>' + '
'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
' }}{% endif %}`;constructor(e,t){for(let s of(super(),this._tokenizer_config=t,this.normalizer=A.fromConfig(e.normalizer),this.pre_tokenizer=F.fromConfig(e.pre_tokenizer),this.model=k.fromConfig(e.model,t),this.post_processor=I.fromConfig(e.post_processor),this.decoder=K.fromConfig(e.decoder),this.special_tokens=[],this.all_special_ids=[],this.added_tokens=[],e.added_tokens)){let e=new m(s);this.added_tokens.push(e),this.model.tokens_to_ids.set(e.content,e.id),this.model.vocab[e.id]=e.content,e.special&&(this.special_tokens.push(e.content),this.all_special_ids.push(e.id))}if(this.additional_special_tokens=t.additional_special_tokens??[],this.special_tokens.push(...this.additional_special_tokens),this.special_tokens=[...new Set(this.special_tokens)],this.decoder&&(this.decoder.added_tokens=this.added_tokens,this.decoder.end_of_word_suffix=this.model.end_of_word_suffix),this.added_tokens_regex=this.added_tokens.length>0?new RegExp(this.added_tokens.map(e=>`${e.lstrip?"\\s*":""}(${(0,n.hr)(e.content)})${e.rstrip?"\\s*":""}`).join("|")):null,this.mask_token=this.getToken("mask_token"),this.mask_token_id=this.model.tokens_to_ids.get(this.mask_token),this.pad_token=this.getToken("pad_token","eos_token"),this.pad_token_id=this.model.tokens_to_ids.get(this.pad_token),this.sep_token=this.getToken("sep_token"),this.sep_token_id=this.model.tokens_to_ids.get(this.sep_token),this.unk_token=this.getToken("unk_token"),this.unk_token_id=this.model.tokens_to_ids.get(this.unk_token),this.model_max_length=t.model_max_length,this.remove_space=t.remove_space,this.clean_up_tokenization_spaces=t.clean_up_tokenization_spaces??!0,this.do_lowercase_and_remove_accent=t.do_lowercase_and_remove_accent??!1,this.padding_side="right",this.legacy=!1,this.chat_template=t.chat_template??null,Array.isArray(this.chat_template)){let e=Object.create(null);for(let{name:t,template:s}of this.chat_template){if("string"!=typeof t||"string"!=typeof s)throw Error('Chat template must be a list of objects with "name" and "template" properties');e[t]=s}this.chat_template=e}this._compiled_template_cache=new Map}getToken(...e){for(let t of e){let e=this._tokenizer_config[t];if(e){if("object"!=typeof e)return e;if("AddedToken"===e.__type)return e.content;throw Error(`Unknown token: ${e}`)}}return null}static async from_pretrained(e,{progress_callback:t=null,config:s=null,cache_dir:n=null,local_files_only:i=!1,revision:r="main",legacy:o=null}={}){return new this(...await c(e,{progress_callback:t,config:s,cache_dir:n,local_files_only:i,revision:r,legacy:o}))}_call(e,{text_pair:t=null,add_special_tokens:s=!0,padding:i=!1,truncation:a=null,max_length:l=null,return_tensor:c=!0}={}){let h;let u=Array.isArray(e);if(u){if(0===e.length)throw Error("text array must be non-empty");if(null!==t){if(Array.isArray(t)){if(e.length!==t.length)throw Error("text and text_pair must have the same length")}else throw Error("text_pair must also be an array");h=e.map((e,n)=>this._encode_plus(e,t[n],{add_special_tokens:s}))}else h=e.map(e=>this._encode_plus(e,null,{add_special_tokens:s}))}else{if(null==e)throw Error("text may not be null or undefined");if(Array.isArray(t))throw Error("When specifying `text_pair`, since `text` is a string, `text_pair` must also be a string (i.e., not an array).");h=[this._encode_plus(e,t,{add_special_tokens:s})]}if(null===l?l="max_length"===i?this.model_max_length:(0,r.Fp)(h.map(e=>e.input_ids.length))[0]:a||console.warn("Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=true` to explicitly truncate examples to max length."),l=Math.min(l,this.model_max_length),i||a)for(let e=0;e<h.length;++e)h[e].input_ids.length!==l&&(h[e].input_ids.length>l?a&&function(e,t){for(let s of Object.keys(e))e[s].length=t}(h[e],l):i&&function(e,t,s,i){for(let r of Object.keys(e)){let o=t-e[r].length,a=s(r),l=Array(o).fill(a);e[r]="right"===i?(0,n.eG)(e[r],l):(0,n.eG)(l,e[r])}}(h[e],l,e=>"input_ids"===e?this.pad_token_id:0,this.padding_side));let d={};if(c){if(!(i&&a)&&h.some(e=>{for(let t of Object.keys(e))if(e[t].length!==h[0][t]?.length)return!0;return!1}))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=true' and 'truncation=true' to have batched tensors with the same length.");let e=[h.length,h[0].input_ids.length];for(let t of Object.keys(h[0]))d[t]=new o.es("int64",BigInt64Array.from(h.flatMap(e=>e[t]).map(BigInt)),e)}else{for(let e of Object.keys(h[0]))d[e]=h.map(t=>t[e]);if(!u)for(let e of Object.keys(d))d[e]=d[e][0]}return d}_encode_text(e){return null===e?null:(this.added_tokens_regex?e.split(this.added_tokens_regex).filter(e=>e):[e]).map((e,t)=>{if(void 0!==this.added_tokens.find(t=>t.content===e))return e;{if(!0===this.remove_space&&(e=e.trim().split(/\s+/).join(" ")),this.do_lowercase_and_remove_accent&&(e=p(e.toLowerCase())),null!==this.normalizer&&(e=this.normalizer(e)),0===e.length)return[];let s=null!==this.pre_tokenizer?this.pre_tokenizer(e,{section_index:t}):[e];return this.model(s)}}).flat()}_encode_plus(e,t=null,{add_special_tokens:s=!0}={}){let i=this._encode_text(e),r=this._encode_text(t),o=this.post_processor?this.post_processor(i,r,{add_special_tokens:s}):{tokens:(0,n.eG)(i??[],r??[])},a=this.model.convert_tokens_to_ids(o.tokens),l={input_ids:a,attention_mask:Array(a.length).fill(1)};return this.return_token_type_ids&&o.token_type_ids&&(l.token_type_ids=o.token_type_ids),l}encode(e,t=null,{add_special_tokens:s=!0}={}){let{input_ids:n}=this._encode_plus(e,t,{add_special_tokens:s});return n}batch_decode(e,t={}){return e instanceof o.es&&(e=e.tolist()),e.map(e=>this.decode(e,t))}decode(e,t={}){if(e instanceof o.es&&(e=d(e)),!Array.isArray(e)||0===e.length||!(0,n.Wy)(e[0]))throw Error("token_ids must be a non-empty array of integers.");return this.decode_single(e,t)}decode_single(e,{skip_special_tokens:t=!1,clean_up_tokenization_spaces:s=null}){let n=this.model.convert_ids_to_tokens(e);t&&(n=n.filter(e=>!this.special_tokens.includes(e)));let i=this.decoder?this.decoder(n):n.join(" ");return this.decoder&&this.decoder.end_of_word_suffix&&(i=i.replaceAll(this.decoder.end_of_word_suffix," "),t&&(i=i.trim())),(s??this.clean_up_tokenization_spaces)&&(i=_(i)),i}get default_chat_template(){return this._warned_about_chat_template||(console.warn("No chat template is defined for this tokenizer - using a default chat template that implements the ChatML format. If the default is not appropriate for your model, please set `tokenizer.chat_template` to an appropriate template. See https://huggingface.co/docs/transformers/main/chat_templating for more information."),this._warned_about_chat_template=!0),this._default_chat_template}apply_chat_template(e,{chat_template:t=null,add_generation_prompt:s=!1,tokenize:n=!0,padding:i=!1,truncation:r=!1,max_length:o=null,return_tensor:a=!0,tokenizer_kwargs:c={},...h}={}){if(this.chat_template&&"object"==typeof this.chat_template||null===this.chat_template&&this.default_chat_template&&"object"==typeof this.default_chat_template){let e=this.chat_template??this.default_chat_template;if(null!==t&&Object.hasOwn(e,t))t=e[t];else if(null===t&&"default"in e)t=e.default;else if(null===t)throw Error(`This model has multiple chat templates with no default specified! Please either pass a chat template or the name of the template you wish to use to the 'chat_template' argument. Available template names are ${Object.keys(e).sort()}.`)}else t??=this.chat_template??this.default_chat_template;if("string"!=typeof t)throw Error(`chat_template must be a string, but got ${typeof t}`);let u=this._compiled_template_cache.get(t);void 0===u&&(u=new l.YS(t),this._compiled_template_cache.set(t,u));let d=Object.create(null);for(let e of eu){let t=this.getToken(e);t&&(d[e]=t)}let _=u.render({messages:e,add_generation_prompt:s,...d,...h});return n?this._call(_,{add_special_tokens:!1,padding:i,truncation:r,max_length:o,return_tensor:a,...c}).input_ids:_}}class e_ extends ed{return_token_type_ids=!0}class ep extends ed{return_token_type_ids=!0}class ef extends ed{return_token_type_ids=!0}class eg extends ed{return_token_type_ids=!0}class em extends ed{return_token_type_ids=!0}class ek extends ed{return_token_type_ids=!0}class ex extends ed{return_token_type_ids=!0}class ew extends ed{return_token_type_ids=!0}class ey extends ed{return_token_type_ids=!0}class eb extends ed{}class ev extends ed{}class ez extends ed{return_token_type_ids=!0;constructor(e,t){super(e,t),console.warn('WARNING: `XLMTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}}class eA extends ed{return_token_type_ids=!0}class eS extends ed{}class eE extends ed{_default_chat_template='{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}'}class eT extends ed{}class eC extends ed{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{2}_[A-Z]{2}$/,this.language_codes=this.special_tokens.filter(e=>this.languageRegex.test(e)),this.lang_to_token=e=>e}_build_translation_inputs(e,t,s){return eB(this,e,t,s)}}class ej extends eC{}class eM extends ed{}class eP extends eE{constructor(e,t){let s=".,!?…。,、।۔،",n=e.pre_tokenizer?.pretokenizers[0]?.pattern;n&&n.Regex===` ?[^(\\s|[${s}])]+`&&(n.Regex=` ?[^\\s${s}]+`),super(e,t)}}class e$ extends ed{_default_chat_template=`{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif USE_DEFAULT_PROMPT == true and not '<<SYS>>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>
' + system_message + '
<</SYS>>
' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>
' + content.strip() + '
<</SYS>>
' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}`;DEFAULT_SYSTEM_PROMPT="You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.";constructor(e,t){super(e,t),this.use_default_system_prompt=t.use_default_system_prompt??!1,this.legacy=t.legacy??!0,this.legacy||(this.normalizer=null,this.pre_tokenizer=new ei({replacement:"▁",add_prefix_space:!0,prepend_scheme:"first"}))}_encode_text(e){if(null===e)return null;if(this.legacy||0===e.length)return super._encode_text(e);let t=super._encode_text("▁"+e.replaceAll("▁"," "));return t.length>1&&"▁"===t[0]&&this.special_tokens.includes(t[1])&&(t=t.slice(1)),t}get default_chat_template(){return super.default_chat_template.replaceAll("USE_DEFAULT_PROMPT",this.use_default_system_prompt?"true":"false").replaceAll("DEFAULT_SYSTEM_MESSAGE",this.DEFAULT_SYSTEM_PROMPT.replaceAll("\n","\\n").replaceAll("'","\\'"))}}class eR extends e${}class eN extends ed{}class eF extends ed{}class eL extends ed{}class eO extends ed{}class eU extends ed{}class eW extends ed{}class eG extends ed{_default_chat_template="{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}"}class eI extends ed{}function eB(e,t,s,n){if(!("language_codes"in e)||!Array.isArray(e.language_codes))throw Error("Tokenizer must have `language_codes` attribute set and it should be an array of language ids.");if(!("languageRegex"in e)||!(e.languageRegex instanceof RegExp))throw Error("Tokenizer must have `languageRegex` attribute set and it should be a regular expression.");if(!("lang_to_token"in e)||"function"!=typeof e.lang_to_token)throw Error("Tokenizer must have `lang_to_token` attribute set and it should be a function.");let i=n.src_lang,r=n.tgt_lang;if(!e.language_codes.includes(r))throw Error(`Target language code "${r}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);if(void 0!==i){if(!e.language_codes.includes(i))throw Error(`Source language code "${i}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);for(let t of e.post_processor.config.single)if("SpecialToken"in t&&e.languageRegex.test(t.SpecialToken.id)){t.SpecialToken.id=e.lang_to_token(i);break}}return n.forced_bos_token_id=e.model.convert_tokens_to_ids([e.lang_to_token(r)])[0],e._call(t,s)}class eD extends ed{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{3}_[A-Z][a-z]{3}$/,this.language_codes=this.special_tokens.filter(e=>this.languageRegex.test(e)),this.lang_to_token=e=>e}_build_translation_inputs(e,t,s){return eB(this,e,t,s)}}class eq extends ed{constructor(e,t){super(e,t),this.languageRegex=/^__[a-z]{2,3}__$/,this.language_codes=this.special_tokens.filter(e=>this.languageRegex.test(e)).map(e=>e.slice(2,-2)),this.lang_to_token=e=>`__${e}__`}_build_translation_inputs(e,t,s){return eB(this,e,t,s)}}let eY=[["en","english"],["zh","chinese"],["de","german"],["es","spanish"],["ru","russian"],["ko","korean"],["fr","french"],["ja","japanese"],["pt","portuguese"],["tr","turkish"],["pl","polish"],["ca","catalan"],["nl","dutch"],["ar","arabic"],["sv","swedish"],["it","italian"],["id","indonesian"],["hi","hindi"],["fi","finnish"],["vi","vietnamese"],["he","hebrew"],["uk","ukrainian"],["el","greek"],["ms","malay"],["cs","czech"],["ro","romanian"],["da","danish"],["hu","hungarian"],["ta","tamil"],["no","norwegian"],["th","thai"],["ur","urdu"],["hr","croatian"],["bg","bulgarian"],["lt","lithuanian"],["la","latin"],["mi","maori"],["ml","malayalam"],["cy","welsh"],["sk","slovak"],["te","telugu"],["fa","persian"],["lv","latvian"],["bn","bengali"],["sr","serbian"],["az","azerbaijani"],["sl","slovenian"],["kn","kannada"],["et","estonian"],["mk","macedonian"],["br","breton"],["eu","basque"],["is","icelandic"],["hy","armenian"],["ne","nepali"],["mn","mongolian"],["bs","bosnian"],["kk","kazakh"],["sq","albanian"],["sw","swahili"],["gl","galician"],["mr","marathi"],["pa","punjabi"],["si","sinhala"],["km","khmer"],["sn","shona"],["yo","yoruba"],["so","somali"],["af","afrikaans"],["oc","occitan"],["ka","georgian"],["be","belarusian"],["tg","tajik"],["sd","sindhi"],["gu","gujarati"],["am","amharic"],["yi","yiddish"],["lo","lao"],["uz","uzbek"],["fo","faroese"],["ht","haitian creole"],["ps","pashto"],["tk","turkmen"],["nn","nynorsk"],["mt","maltese"],["sa","sanskrit"],["lb","luxembourgish"],["my","myanmar"],["bo","tibetan"],["tl","tagalog"],["mg","malagasy"],["as","assamese"],["tt","tatar"],["haw","hawaiian"],["ln","lingala"],["ha","hausa"],["ba","bashkir"],["jw","javanese"],["su","sundanese"]],eK=new Map(eY),eZ=new Map([...eY.map(([e,t])=>[t,e]),["burmese","my"],["valencian","ca"],["flemish","nl"],["haitian","ht"],["letzeburgesch","lb"],["pushto","ps"],["panjabi","pa"],["moldavian","ro"],["moldovan","ro"],["sinhalese","si"],["castilian","es"]]);class eV extends ed{_default_chat_template='{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}';_decode_asr(e,{return_timestamps:t=!1,return_language:s=!1,time_precision:n=null,force_full_sequences:i=!0}={}){if(null===n)throw Error("Must specify time_precision");let o=null,a="word"===t;function l(){return{language:o,timestamp:[null,null],text:""}}let c=[],h=l(),u=0,d=this.model.convert_tokens_to_ids(["<|notimestamps|>"])[0]+1,_=[],p=[],f=!1,g=null,m=new Set(this.all_special_ids);for(let s of e){let e=s.tokens,i=a?s.token_timestamps:null,k=null,x=d;if("stride"in s){let[t,i,r]=s.stride;if(u-=i,g=t-r,i&&(x=i/n+d),r)for(let t=e.length-1;t>=0;--t){let s=e[t];if(s>=d){if(null!==k&&(s-d)*n<g)break;k=s}}}let w=[],y=[];for(let s=0;s<e.length;++s){let g=e[s];if(m.has(g)){let e=this.decode([g]),s=eK.get(e.slice(2,-2));if(void 0!==s){if(null!==o&&s!==o&&!t){_.push(w);let e=this.findLongestCommonSequence(_)[0],t=this.decode(e);h.text=t,c.push(h),_=[],w=[],h=l()}o=h.language=s}}else if(g>=d){let e=(g-d)*n+u,t=(0,r.NM)(e,2);if(null!==k&&g>=k)f=!0;else if(f||_.length>0&&g<x)f=!1;else if(null===h.timestamp[0])h.timestamp[0]=t;else if(t===h.timestamp[0]);else{h.timestamp[1]=t,_.push(w),a&&p.push(y);let[e,s]=this.findLongestCommonSequence(_,p),n=this.decode(e);h.text=n,a&&(h.words=this.collateWordTimestamps(e,s,o)),c.push(h),_=[],w=[],p=[],y=[],h=l()}}else if(w.push(g),a){let e,t=(0,r.NM)(i[s]+u,2);e=s+1<i.length?(0,r.NM)(i[s+1]+u,2):null,y.push([t,e])}}if("stride"in s){let[e,t,n]=s.stride;u+=e-n}w.length>0?(_.push(w),a&&p.push(y)):_.every(e=>0===e.length)&&(h=l(),_=[],w=[],p=[],y=[])}if(_.length>0){if(i&&t)throw Error("Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.");let[e,s]=this.findLongestCommonSequence(_,p),n=this.decode(e);h.text=n,a&&(h.words=this.collateWordTimestamps(e,s,o)),c.push(h)}let k=Object.create(null),x=c.map(e=>e.text).join("");if(t||s){for(let e=0;e<c.length;++e){let n=c[e];t||delete n.timestamp,s||delete n.language}if(a){let e=[];for(let t of c)for(let s of t.words)e.push(s);k={chunks:e}}else k={chunks:c}}return[x,k]}findLongestCommonSequence(e,t=null){let s=e[0],n=s.length,i=[],r=Array.isArray(t)&&t.length>0,o=r?[]:null,a=r?t[0]:null;for(let l=1;l<e.length;++l){let c=e[l],h=0,u=[n,n,0,0],d=c.length;for(let e=1;e<n+d;++e){let t=e/1e4,i=Math.max(0,n-e),r=Math.min(n,n+d-e),o=s.slice(i,r),a=Math.max(0,e-n),l=Math.min(d,e),_=c.slice(a,l);if(o.length!==_.length)throw Error("There is a bug within whisper `decode_asr` function, please report it. Dropping to prevent bad inference.");let p=o.filter((e,t)=>e===_[t]).length,f=p/e+t;p>1&&f>h&&(h=f,u=[i,r,a,l])}let[_,p,f,g]=u,m=Math.floor((p+_)/2),k=Math.floor((g+f)/2);i.push(...s.slice(0,m)),n=(s=c.slice(k)).length,r&&(o.push(...a.slice(0,m)),a=t[l].slice(k))}return(i.push(...s),r)?(o.push(...a),[i,o]):[i,[]]}collateWordTimestamps(e,t,s){let[n,i,r]=this.combineTokensIntoWords(e,s),o=[];for(let e=0;e<n.length;++e){let s=r[e];o.push({text:n[e],timestamp:[t[s.at(0)][0],t[s.at(-1)][1]]})}return o}combineTokensIntoWords(e,t,s="\"'“\xa1\xbf([{-",n="\"'.。,,!!??::”)]}、"){let i,r,o;return["chinese","japanese","thai","lao","myanmar"].includes(t=t??"english")?[i,r,o]=this.splitTokensOnUnicode(e):[i,r,o]=this.splitTokensOnSpaces(e),this.mergePunctuations(i,r,o,s,n)}decode(e,t){let s;return t&&t.decode_with_timestamps?(e instanceof o.es&&(e=d(e)),s=this.decodeWithTimestamps(e,t)):s=super.decode(e,t),s}decodeWithTimestamps(e,t){let s=t?.time_precision??.02,n=Array.from(this.all_special_ids).at(-1)+1,i=[[]];for(let t of e)if(t>=n){let e=(0,r.NM)((t-n)*s,2);i.push(`<|${e}|>`),i.push([])}else i[i.length-1].push(t);return(i=i.map(e=>"string"==typeof e?e:super.decode(e,t))).join("")}splitTokensOnUnicode(e){let t=this.decode(e,{decode_with_timestamps:!0}),s=[],n=[],i=[],r=[],o=[],a=0;for(let l=0;l<e.length;++l){let c=e[l];r.push(c),o.push(l);let h=this.decode(r,{decode_with_timestamps:!0});h.includes("�")&&"�"!==t[a+h.indexOf("�")]||(s.push(h),n.push(r),i.push(o),r=[],o=[],a+=h.length)}return[s,n,i]}splitTokensOnSpaces(e){let[t,s,n]=this.splitTokensOnUnicode(e),i=[],r=[],o=[],a=RegExp(`^[${f}]$`,"gu");for(let e=0;e<t.length;++e){let l=t[e],c=s[e],h=n[e],u=c[0]>=this.model.tokens_to_ids.get("<|endoftext|>"),d=l.startsWith(" "),_=l.trim(),p=a.test(_);if(u||d||p||0===i.length)i.push(l),r.push(c),o.push(h);else{let e=i.length-1;i[e]+=l,r[e].push(...c),o[e].push(...h)}}return[i,r,o]}mergePunctuations(e,t,s,i,r){let o=structuredClone(e),a=structuredClone(t),l=structuredClone(s),c=o.length-2,h=o.length-1;for(;c>=0;)o[c].startsWith(" ")&&i.includes(o[c].trim())?(o[h]=o[c]+o[h],a[h]=(0,n.eG)(a[c],a[h]),l[h]=(0,n.eG)(l[c],l[h]),o[c]="",a[c]=[],l[c]=[]):h=c,--c;for(c=0,h=1;h<o.length;)!o[c].endsWith(" ")&&r.includes(o[h])?(o[c]+=o[h],a[c]=(0,n.eG)(a[c],a[h]),l[c]=(0,n.eG)(l[c],l[h]),o[h]="",a[h]=[],l[h]=[]):c=h,++h;return[o.filter(e=>e),a.filter(e=>e.length>0),l.filter(e=>e.length>0)]}get_decoder_prompt_ids({language:e=null,task:t=null,no_timestamps:s=!0}={}){let n=[];if(e){e=e.toLowerCase();let t=eZ.get(e);if(void 0===t){if(eK.has(e))t=e;else{let t=2===e.length?eK.keys():eK.values();throw Error(`Language "${e}" is not supported. Must be one of: ${JSON.stringify(t)}`)}}let s=this.model.tokens_to_ids.get(`<|${t}|>`);if(void 0===s)throw Error(`Unable to find language "${t}" in model vocabulary. Please report this issue at https://github.com/xenova/transformers.js/issues/new/choose.`);n.push(s)}else n.push(null);if(t){if("transcribe"!==(t=t.toLowerCase())&&"translate"!==t)throw Error(`Task "${t}" is not supported. Must be one of: ["transcribe", "translate"]`);let e=this.model.tokens_to_ids.get(`<|${t}|>`);if(void 0===e)throw Error(`Unable to find task "${t}" in model vocabulary. Please report this issue at https://github.com/xenova/transformers.js/issues/new/choose.`);n.push(e)}else n.push(null);if(s){let e=this.model.tokens_to_ids.get("<|notimestamps|>");if(void 0===e)throw Error('Unable to find "<|notimestamps|>" in model vocabulary. Please report this issue at https://github.com/xenova/transformers.js/issues/new/choose.');n.push(e)}return n.map((e,t)=>[t+1,e]).filter(e=>null!==e[1])}}class eH extends ed{}class eJ extends ed{}class eQ extends ed{}class eX extends ed{constructor(e,t){super(e,t),this.languageRegex=/^(>>\w+<<)\s*/g,this.supported_language_codes=this.model.vocab.filter(e=>this.languageRegex.test(e)),console.warn('WARNING: `MarianTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}_encode_text(e){if(null===e)return null;let[t,...s]=e.trim().split(this.languageRegex);if(0===s.length)return super._encode_text(t);if(2===s.length){let[e,t]=s;return this.supported_language_codes.includes(e)||console.warn(`Unsupported language code "${e}" detected, which may lead to unexpected behavior. Should be one of: ${JSON.stringify(this.supported_language_codes)}`),(0,n.eG)([e],super._encode_text(t))}}}class e0 extends ed{}class e1 extends ed{_default_chat_template="{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ ' ' }}{% endif %}{% endfor %}{{ eos_token }}"}class e2 extends e1{}class e3 extends ed{}class e6 extends ed{}class e8 extends ed{constructor(e,t){super(e,t),this.decoder=new en({})}}class e7 extends ed{}class e9{static TOKENIZER_CLASS_MAPPING={T5Tokenizer:eS,DistilBertTokenizer:eb,CamembertTokenizer:ev,DebertaTokenizer:em,DebertaV2Tokenizer:ek,BertTokenizer:e_,HerbertTokenizer:ex,ConvBertTokenizer:ew,RoFormerTokenizer:ey,XLMTokenizer:ez,ElectraTokenizer:eA,MobileBertTokenizer:ef,SqueezeBertTokenizer:eg,AlbertTokenizer:ep,GPT2Tokenizer:eE,BartTokenizer:eT,MBartTokenizer:eC,MBart50Tokenizer:ej,RobertaTokenizer:eM,WhisperTokenizer:eV,CodeGenTokenizer:eH,CLIPTokenizer:eJ,SiglipTokenizer:eQ,MarianTokenizer:eX,BloomTokenizer:eP,NllbTokenizer:eD,M2M100Tokenizer:eq,LlamaTokenizer:e$,CodeLlamaTokenizer:eR,XLMRobertaTokenizer:eN,MPNetTokenizer:eF,FalconTokenizer:eL,GPTNeoXTokenizer:eO,EsmTokenizer:eU,Wav2Vec2CTCTokenizer:e0,BlenderbotTokenizer:e1,BlenderbotSmallTokenizer:e2,SpeechT5Tokenizer:e3,NougatTokenizer:e6,VitsTokenizer:e8,Qwen2Tokenizer:eW,GemmaTokenizer:eG,Grok1Tokenizer:eI,CohereTokenizer:e7,PreTrainedTokenizer:ed};static async from_pretrained(e,{quantized:t=!0,progress_callback:s=null,config:n=null,cache_dir:i=null,local_files_only:r=!1,revision:o="main",legacy:a=null}={}){let[l,h]=await c(e,{quantized:t,progress_callback:s,config:n,cache_dir:i,local_files_only:r,revision:o,legacy:a}),u=h.tokenizer_class?.replace(/Fast$/,"")??"PreTrainedTokenizer",d=this.TOKENIZER_CLASS_MAPPING[u];return d||(console.warn(`Unknown tokenizer class "${u}", attempting to construct from base class.`),d=ed),new d(l,h)}}}}]); |