Xenova HF staff commited on
Commit
bbc60eb
1 Parent(s): 5939bcf

Upload tokenizer.json

Browse files
Files changed (1) hide show
  1. tokenizer.json +1 -1
tokenizer.json CHANGED
@@ -90,7 +90,7 @@
90
  {
91
  "type": "Split",
92
  "pattern": {
93
- "Regex": "(?i:\\b[cmnjstdl]|qu|puisqu|lorsqu|quelqu|presqu|quoiqu|jusqu)['´’]|['´’](?i:[sdmt]|ll|ve|re)\\b|\\p{P}{1,4}|[^\\p{L}\\p{N}\\p{Z}]"
94
  },
95
  "behavior": "Isolated",
96
  "invert": false
 
90
  {
91
  "type": "Split",
92
  "pattern": {
93
+ "Regex": "(?:\\b[cCmMnNjJsStTdDlL]|[qQ][uU]|[pP][uU][iI][sS][qQ][uU]|[lL][oO][rR][sS][qQ][uU]|[qQ][uU][eE][lL][qQ][uU]|[pP][rR][eE][sS][qQ][uU]|[qQ][uU][oO][iI][qQ][uU]|[jJ][uU][sS][qQ][uU])['´’]|['´’](?:[sSdDmMtT]|[lL][lL]|[vV][eE]|[rR][eE])\\b|\\p{P}{1,4}|[^\\p{L}\\p{N}\\p{Z}]"
94
  },
95
  "behavior": "Isolated",
96
  "invert": false