khalidhabiburahmanBoon commited on
Commit
2be443f
1 Parent(s): ed73e8d

Upload 136 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
.gitattributes CHANGED
@@ -33,3 +33,33 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ ambiguous_words.exe filter=lfs diff=lfs merge=lfs -text
37
+ classifier_tester.exe filter=lfs diff=lfs merge=lfs -text
38
+ cntraining.exe filter=lfs diff=lfs merge=lfs -text
39
+ combine_lang_model.exe filter=lfs diff=lfs merge=lfs -text
40
+ combine_tessdata.exe filter=lfs diff=lfs merge=lfs -text
41
+ libcairo-2.dll filter=lfs diff=lfs merge=lfs -text
42
+ libcrypto-3-x64.dll filter=lfs diff=lfs merge=lfs -text
43
+ libgio-2.0-0.dll filter=lfs diff=lfs merge=lfs -text
44
+ libglib-2.0-0.dll filter=lfs diff=lfs merge=lfs -text
45
+ libharfbuzz-0.dll filter=lfs diff=lfs merge=lfs -text
46
+ libiconv-2.dll filter=lfs diff=lfs merge=lfs -text
47
+ libicudt74.dll filter=lfs diff=lfs merge=lfs -text
48
+ libicuin74.dll filter=lfs diff=lfs merge=lfs -text
49
+ libicuuc74.dll filter=lfs diff=lfs merge=lfs -text
50
+ libleptonica-6.dll filter=lfs diff=lfs merge=lfs -text
51
+ libstdc++-6.dll filter=lfs diff=lfs merge=lfs -text
52
+ libtesseract-5.dll filter=lfs diff=lfs merge=lfs -text
53
+ libunistring-5.dll filter=lfs diff=lfs merge=lfs -text
54
+ libzstd.dll filter=lfs diff=lfs merge=lfs -text
55
+ lstmeval.exe filter=lfs diff=lfs merge=lfs -text
56
+ lstmtraining.exe filter=lfs diff=lfs merge=lfs -text
57
+ mftraining.exe filter=lfs diff=lfs merge=lfs -text
58
+ set_unicharset_properties.exe filter=lfs diff=lfs merge=lfs -text
59
+ shapeclustering.exe filter=lfs diff=lfs merge=lfs -text
60
+ tessdata/eng.traineddata filter=lfs diff=lfs merge=lfs -text
61
+ tessdata/osd.traineddata filter=lfs diff=lfs merge=lfs -text
62
+ tesseract.exe filter=lfs diff=lfs merge=lfs -text
63
+ text2image.exe filter=lfs diff=lfs merge=lfs -text
64
+ unicharset_extractor.exe filter=lfs diff=lfs merge=lfs -text
65
+ wordlist2dawg.exe filter=lfs diff=lfs merge=lfs -text
ambiguous_words.1.html ADDED
@@ -0,0 +1,790 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
5
+ <meta name="generator" content="AsciiDoc 10.2.0">
6
+ <title>AMBIGUOUS_WORDS(1)</title>
7
+ <style type="text/css">
8
+ /* Shared CSS for AsciiDoc xhtml11 and html5 backends */
9
+
10
+ /* Default font. */
11
+ body {
12
+ font-family: Georgia,serif;
13
+ }
14
+
15
+ /* Title font. */
16
+ h1, h2, h3, h4, h5, h6,
17
+ div.title, caption.title,
18
+ thead, p.table.header,
19
+ #toctitle,
20
+ #author, #revnumber, #revdate, #revremark,
21
+ #footer {
22
+ font-family: Arial,Helvetica,sans-serif;
23
+ }
24
+
25
+ body {
26
+ margin: 1em 5% 1em 5%;
27
+ }
28
+
29
+ a {
30
+ color: blue;
31
+ text-decoration: underline;
32
+ }
33
+ a:visited {
34
+ color: fuchsia;
35
+ }
36
+
37
+ em {
38
+ font-style: italic;
39
+ color: navy;
40
+ }
41
+
42
+ strong {
43
+ font-weight: bold;
44
+ color: #083194;
45
+ }
46
+
47
+ h1, h2, h3, h4, h5, h6 {
48
+ color: #527bbd;
49
+ margin-top: 1.2em;
50
+ margin-bottom: 0.5em;
51
+ line-height: 1.3;
52
+ }
53
+
54
+ h1, h2, h3 {
55
+ border-bottom: 2px solid silver;
56
+ }
57
+ h2 {
58
+ padding-top: 0.5em;
59
+ }
60
+ h3 {
61
+ float: left;
62
+ }
63
+ h3 + * {
64
+ clear: left;
65
+ }
66
+ h5 {
67
+ font-size: 1.0em;
68
+ }
69
+
70
+ div.sectionbody {
71
+ margin-left: 0;
72
+ }
73
+
74
+ hr {
75
+ border: 1px solid silver;
76
+ }
77
+
78
+ p {
79
+ margin-top: 0.5em;
80
+ margin-bottom: 0.5em;
81
+ }
82
+
83
+ ul, ol, li > p {
84
+ margin-top: 0;
85
+ }
86
+ ul > li { color: #aaa; }
87
+ ul > li > * { color: black; }
88
+
89
+ .monospaced, code, pre {
90
+ font-family: "Courier New", Courier, monospace;
91
+ font-size: inherit;
92
+ color: navy;
93
+ padding: 0;
94
+ margin: 0;
95
+ }
96
+ pre {
97
+ white-space: pre-wrap;
98
+ }
99
+
100
+ #author {
101
+ color: #527bbd;
102
+ font-weight: bold;
103
+ font-size: 1.1em;
104
+ }
105
+ #email {
106
+ }
107
+ #revnumber, #revdate, #revremark {
108
+ }
109
+
110
+ #footer {
111
+ font-size: small;
112
+ border-top: 2px solid silver;
113
+ padding-top: 0.5em;
114
+ margin-top: 4.0em;
115
+ }
116
+ #footer-text {
117
+ float: left;
118
+ padding-bottom: 0.5em;
119
+ }
120
+ #footer-badges {
121
+ float: right;
122
+ padding-bottom: 0.5em;
123
+ }
124
+
125
+ #preamble {
126
+ margin-top: 1.5em;
127
+ margin-bottom: 1.5em;
128
+ }
129
+ div.imageblock, div.exampleblock, div.verseblock,
130
+ div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
131
+ div.admonitionblock {
132
+ margin-top: 1.0em;
133
+ margin-bottom: 1.5em;
134
+ }
135
+ div.admonitionblock {
136
+ margin-top: 2.0em;
137
+ margin-bottom: 2.0em;
138
+ margin-right: 10%;
139
+ color: #606060;
140
+ }
141
+
142
+ div.content { /* Block element content. */
143
+ padding: 0;
144
+ }
145
+
146
+ /* Block element titles. */
147
+ div.title, caption.title {
148
+ color: #527bbd;
149
+ font-weight: bold;
150
+ text-align: left;
151
+ margin-top: 1.0em;
152
+ margin-bottom: 0.5em;
153
+ }
154
+ div.title + * {
155
+ margin-top: 0;
156
+ }
157
+
158
+ td div.title:first-child {
159
+ margin-top: 0.0em;
160
+ }
161
+ div.content div.title:first-child {
162
+ margin-top: 0.0em;
163
+ }
164
+ div.content + div.title {
165
+ margin-top: 0.0em;
166
+ }
167
+
168
+ div.sidebarblock > div.content {
169
+ background: #ffffee;
170
+ border: 1px solid #dddddd;
171
+ border-left: 4px solid #f0f0f0;
172
+ padding: 0.5em;
173
+ }
174
+
175
+ div.listingblock > div.content {
176
+ border: 1px solid #dddddd;
177
+ border-left: 5px solid #f0f0f0;
178
+ background: #f8f8f8;
179
+ padding: 0.5em;
180
+ }
181
+
182
+ div.quoteblock, div.verseblock {
183
+ padding-left: 1.0em;
184
+ margin-left: 1.0em;
185
+ margin-right: 10%;
186
+ border-left: 5px solid #f0f0f0;
187
+ color: #888;
188
+ }
189
+
190
+ div.quoteblock > div.attribution {
191
+ padding-top: 0.5em;
192
+ text-align: right;
193
+ }
194
+
195
+ div.verseblock > pre.content {
196
+ font-family: inherit;
197
+ font-size: inherit;
198
+ }
199
+ div.verseblock > div.attribution {
200
+ padding-top: 0.75em;
201
+ text-align: left;
202
+ }
203
+ /* DEPRECATED: Pre version 8.2.7 verse style literal block. */
204
+ div.verseblock + div.attribution {
205
+ text-align: left;
206
+ }
207
+
208
+ div.admonitionblock .icon {
209
+ vertical-align: top;
210
+ font-size: 1.1em;
211
+ font-weight: bold;
212
+ text-decoration: underline;
213
+ color: #527bbd;
214
+ padding-right: 0.5em;
215
+ }
216
+ div.admonitionblock td.content {
217
+ padding-left: 0.5em;
218
+ border-left: 3px solid #dddddd;
219
+ }
220
+
221
+ div.exampleblock > div.content {
222
+ border-left: 3px solid #dddddd;
223
+ padding-left: 0.5em;
224
+ }
225
+
226
+ div.imageblock div.content { padding-left: 0; }
227
+ span.image img { border-style: none; vertical-align: text-bottom; }
228
+ a.image:visited { color: white; }
229
+
230
+ dl {
231
+ margin-top: 0.8em;
232
+ margin-bottom: 0.8em;
233
+ }
234
+ dt {
235
+ margin-top: 0.5em;
236
+ margin-bottom: 0;
237
+ font-style: normal;
238
+ color: navy;
239
+ }
240
+ dd > *:first-child {
241
+ margin-top: 0.1em;
242
+ }
243
+
244
+ ul, ol {
245
+ list-style-position: outside;
246
+ }
247
+ ol.arabic {
248
+ list-style-type: decimal;
249
+ }
250
+ ol.loweralpha {
251
+ list-style-type: lower-alpha;
252
+ }
253
+ ol.upperalpha {
254
+ list-style-type: upper-alpha;
255
+ }
256
+ ol.lowerroman {
257
+ list-style-type: lower-roman;
258
+ }
259
+ ol.upperroman {
260
+ list-style-type: upper-roman;
261
+ }
262
+
263
+ div.compact ul, div.compact ol,
264
+ div.compact p, div.compact p,
265
+ div.compact div, div.compact div {
266
+ margin-top: 0.1em;
267
+ margin-bottom: 0.1em;
268
+ }
269
+
270
+ tfoot {
271
+ font-weight: bold;
272
+ }
273
+ td > div.verse {
274
+ white-space: pre;
275
+ }
276
+
277
+ div.hdlist {
278
+ margin-top: 0.8em;
279
+ margin-bottom: 0.8em;
280
+ }
281
+ div.hdlist tr {
282
+ padding-bottom: 15px;
283
+ }
284
+ dt.hdlist1.strong, td.hdlist1.strong {
285
+ font-weight: bold;
286
+ }
287
+ td.hdlist1 {
288
+ vertical-align: top;
289
+ font-style: normal;
290
+ padding-right: 0.8em;
291
+ color: navy;
292
+ }
293
+ td.hdlist2 {
294
+ vertical-align: top;
295
+ }
296
+ div.hdlist.compact tr {
297
+ margin: 0;
298
+ padding-bottom: 0;
299
+ }
300
+
301
+ .comment {
302
+ background: yellow;
303
+ }
304
+
305
+ .footnote, .footnoteref {
306
+ font-size: 0.8em;
307
+ }
308
+
309
+ span.footnote, span.footnoteref {
310
+ vertical-align: super;
311
+ }
312
+
313
+ #footnotes {
314
+ margin: 20px 0 20px 0;
315
+ padding: 7px 0 0 0;
316
+ }
317
+
318
+ #footnotes div.footnote {
319
+ margin: 0 0 5px 0;
320
+ }
321
+
322
+ #footnotes hr {
323
+ border: none;
324
+ border-top: 1px solid silver;
325
+ height: 1px;
326
+ text-align: left;
327
+ margin-left: 0;
328
+ width: 20%;
329
+ min-width: 100px;
330
+ }
331
+
332
+ div.colist td {
333
+ padding-right: 0.5em;
334
+ padding-bottom: 0.3em;
335
+ vertical-align: top;
336
+ }
337
+ div.colist td img {
338
+ margin-top: 0.3em;
339
+ }
340
+
341
+ @media print {
342
+ #footer-badges { display: none; }
343
+ }
344
+
345
+ #toc {
346
+ margin-bottom: 2.5em;
347
+ }
348
+
349
+ #toctitle {
350
+ color: #527bbd;
351
+ font-size: 1.1em;
352
+ font-weight: bold;
353
+ margin-top: 1.0em;
354
+ margin-bottom: 0.1em;
355
+ }
356
+
357
+ div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
358
+ margin-top: 0;
359
+ margin-bottom: 0;
360
+ }
361
+ div.toclevel2 {
362
+ margin-left: 2em;
363
+ font-size: 0.9em;
364
+ }
365
+ div.toclevel3 {
366
+ margin-left: 4em;
367
+ font-size: 0.9em;
368
+ }
369
+ div.toclevel4 {
370
+ margin-left: 6em;
371
+ font-size: 0.9em;
372
+ }
373
+
374
+ span.aqua { color: aqua; }
375
+ span.black { color: black; }
376
+ span.blue { color: blue; }
377
+ span.fuchsia { color: fuchsia; }
378
+ span.gray { color: gray; }
379
+ span.green { color: green; }
380
+ span.lime { color: lime; }
381
+ span.maroon { color: maroon; }
382
+ span.navy { color: navy; }
383
+ span.olive { color: olive; }
384
+ span.purple { color: purple; }
385
+ span.red { color: red; }
386
+ span.silver { color: silver; }
387
+ span.teal { color: teal; }
388
+ span.white { color: white; }
389
+ span.yellow { color: yellow; }
390
+
391
+ span.aqua-background { background: aqua; }
392
+ span.black-background { background: black; }
393
+ span.blue-background { background: blue; }
394
+ span.fuchsia-background { background: fuchsia; }
395
+ span.gray-background { background: gray; }
396
+ span.green-background { background: green; }
397
+ span.lime-background { background: lime; }
398
+ span.maroon-background { background: maroon; }
399
+ span.navy-background { background: navy; }
400
+ span.olive-background { background: olive; }
401
+ span.purple-background { background: purple; }
402
+ span.red-background { background: red; }
403
+ span.silver-background { background: silver; }
404
+ span.teal-background { background: teal; }
405
+ span.white-background { background: white; }
406
+ span.yellow-background { background: yellow; }
407
+
408
+ span.big { font-size: 2em; }
409
+ span.small { font-size: 0.6em; }
410
+
411
+ span.underline { text-decoration: underline; }
412
+ span.overline { text-decoration: overline; }
413
+ span.line-through { text-decoration: line-through; }
414
+
415
+ div.unbreakable { page-break-inside: avoid; }
416
+
417
+
418
+ /*
419
+ * xhtml11 specific
420
+ *
421
+ * */
422
+
423
+ div.tableblock {
424
+ margin-top: 1.0em;
425
+ margin-bottom: 1.5em;
426
+ }
427
+ div.tableblock > table {
428
+ border: 3px solid #527bbd;
429
+ }
430
+ thead, p.table.header {
431
+ font-weight: bold;
432
+ color: #527bbd;
433
+ }
434
+ p.table {
435
+ margin-top: 0;
436
+ }
437
+ /* Because the table frame attribute is overridden by CSS in most browsers. */
438
+ div.tableblock > table[frame="void"] {
439
+ border-style: none;
440
+ }
441
+ div.tableblock > table[frame="hsides"] {
442
+ border-left-style: none;
443
+ border-right-style: none;
444
+ }
445
+ div.tableblock > table[frame="vsides"] {
446
+ border-top-style: none;
447
+ border-bottom-style: none;
448
+ }
449
+
450
+
451
+ /*
452
+ * html5 specific
453
+ *
454
+ * */
455
+
456
+ table.tableblock {
457
+ margin-top: 1.0em;
458
+ margin-bottom: 1.5em;
459
+ }
460
+ thead, p.tableblock.header {
461
+ font-weight: bold;
462
+ color: #527bbd;
463
+ }
464
+ p.tableblock {
465
+ margin-top: 0;
466
+ }
467
+ table.tableblock {
468
+ border-width: 3px;
469
+ border-spacing: 0px;
470
+ border-style: solid;
471
+ border-color: #527bbd;
472
+ border-collapse: collapse;
473
+ }
474
+ th.tableblock, td.tableblock {
475
+ border-width: 1px;
476
+ padding: 4px;
477
+ border-style: solid;
478
+ border-color: #527bbd;
479
+ }
480
+
481
+ table.tableblock.frame-topbot {
482
+ border-left-style: hidden;
483
+ border-right-style: hidden;
484
+ }
485
+ table.tableblock.frame-sides {
486
+ border-top-style: hidden;
487
+ border-bottom-style: hidden;
488
+ }
489
+ table.tableblock.frame-none {
490
+ border-style: hidden;
491
+ }
492
+
493
+ th.tableblock.halign-left, td.tableblock.halign-left {
494
+ text-align: left;
495
+ }
496
+ th.tableblock.halign-center, td.tableblock.halign-center {
497
+ text-align: center;
498
+ }
499
+ th.tableblock.halign-right, td.tableblock.halign-right {
500
+ text-align: right;
501
+ }
502
+
503
+ th.tableblock.valign-top, td.tableblock.valign-top {
504
+ vertical-align: top;
505
+ }
506
+ th.tableblock.valign-middle, td.tableblock.valign-middle {
507
+ vertical-align: middle;
508
+ }
509
+ th.tableblock.valign-bottom, td.tableblock.valign-bottom {
510
+ vertical-align: bottom;
511
+ }
512
+
513
+
514
+ /*
515
+ * manpage specific
516
+ *
517
+ * */
518
+
519
+ body.manpage h1 {
520
+ padding-top: 0.5em;
521
+ padding-bottom: 0.5em;
522
+ border-top: 2px solid silver;
523
+ border-bottom: 2px solid silver;
524
+ }
525
+ body.manpage h2 {
526
+ border-style: none;
527
+ }
528
+ body.manpage div.sectionbody {
529
+ margin-left: 3em;
530
+ }
531
+
532
+ @media print {
533
+ body.manpage div#toc { display: none; }
534
+ }
535
+
536
+
537
+ </style>
538
+ <script type="text/javascript">
539
+ /*<![CDATA[*/
540
+ var asciidoc = { // Namespace.
541
+
542
+ /////////////////////////////////////////////////////////////////////
543
+ // Table Of Contents generator
544
+ /////////////////////////////////////////////////////////////////////
545
+
546
+ /* Author: Mihai Bazon, September 2002
547
+ * http://students.infoiasi.ro/~mishoo
548
+ *
549
+ * Table Of Content generator
550
+ * Version: 0.4
551
+ *
552
+ * Feel free to use this script under the terms of the GNU General Public
553
+ * License, as long as you do not remove or alter this notice.
554
+ */
555
+
556
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
557
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
558
+
559
+ // toclevels = 1..4.
560
+ toc: function (toclevels) {
561
+
562
+ function getText(el) {
563
+ var text = "";
564
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
565
+ if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
566
+ text += i.data;
567
+ else if (i.firstChild != null)
568
+ text += getText(i);
569
+ }
570
+ return text;
571
+ }
572
+
573
+ function TocEntry(el, text, toclevel) {
574
+ this.element = el;
575
+ this.text = text;
576
+ this.toclevel = toclevel;
577
+ }
578
+
579
+ function tocEntries(el, toclevels) {
580
+ var result = new Array;
581
+ var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
582
+ // Function that scans the DOM tree for header elements (the DOM2
583
+ // nodeIterator API would be a better technique but not supported by all
584
+ // browsers).
585
+ var iterate = function (el) {
586
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
587
+ if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
588
+ var mo = re.exec(i.tagName);
589
+ if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
590
+ result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
591
+ }
592
+ iterate(i);
593
+ }
594
+ }
595
+ }
596
+ iterate(el);
597
+ return result;
598
+ }
599
+
600
+ var toc = document.getElementById("toc");
601
+ if (!toc) {
602
+ return;
603
+ }
604
+
605
+ // Delete existing TOC entries in case we're reloading the TOC.
606
+ var tocEntriesToRemove = [];
607
+ var i;
608
+ for (i = 0; i < toc.childNodes.length; i++) {
609
+ var entry = toc.childNodes[i];
610
+ if (entry.nodeName.toLowerCase() == 'div'
611
+ && entry.getAttribute("class")
612
+ && entry.getAttribute("class").match(/^toclevel/))
613
+ tocEntriesToRemove.push(entry);
614
+ }
615
+ for (i = 0; i < tocEntriesToRemove.length; i++) {
616
+ toc.removeChild(tocEntriesToRemove[i]);
617
+ }
618
+
619
+ // Rebuild TOC entries.
620
+ var entries = tocEntries(document.getElementById("content"), toclevels);
621
+ for (var i = 0; i < entries.length; ++i) {
622
+ var entry = entries[i];
623
+ if (entry.element.id == "")
624
+ entry.element.id = "_toc_" + i;
625
+ var a = document.createElement("a");
626
+ a.href = "#" + entry.element.id;
627
+ a.appendChild(document.createTextNode(entry.text));
628
+ var div = document.createElement("div");
629
+ div.appendChild(a);
630
+ div.className = "toclevel" + entry.toclevel;
631
+ toc.appendChild(div);
632
+ }
633
+ if (entries.length == 0)
634
+ toc.parentNode.removeChild(toc);
635
+ },
636
+
637
+
638
+ /////////////////////////////////////////////////////////////////////
639
+ // Footnotes generator
640
+ /////////////////////////////////////////////////////////////////////
641
+
642
+ /* Based on footnote generation code from:
643
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
644
+ */
645
+
646
+ footnotes: function () {
647
+ // Delete existing footnote entries in case we're reloading the footnodes.
648
+ var i;
649
+ var noteholder = document.getElementById("footnotes");
650
+ if (!noteholder) {
651
+ return;
652
+ }
653
+ var entriesToRemove = [];
654
+ for (i = 0; i < noteholder.childNodes.length; i++) {
655
+ var entry = noteholder.childNodes[i];
656
+ if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
657
+ entriesToRemove.push(entry);
658
+ }
659
+ for (i = 0; i < entriesToRemove.length; i++) {
660
+ noteholder.removeChild(entriesToRemove[i]);
661
+ }
662
+
663
+ // Rebuild footnote entries.
664
+ var cont = document.getElementById("content");
665
+ var spans = cont.getElementsByTagName("span");
666
+ var refs = {};
667
+ var n = 0;
668
+ for (i=0; i<spans.length; i++) {
669
+ if (spans[i].className == "footnote") {
670
+ n++;
671
+ var note = spans[i].getAttribute("data-note");
672
+ if (!note) {
673
+ // Use [\s\S] in place of . so multi-line matches work.
674
+ // Because JavaScript has no s (dotall) regex flag.
675
+ note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
676
+ spans[i].innerHTML =
677
+ "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
678
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
679
+ spans[i].setAttribute("data-note", note);
680
+ }
681
+ noteholder.innerHTML +=
682
+ "<div class='footnote' id='_footnote_" + n + "'>" +
683
+ "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
684
+ n + "</a>. " + note + "</div>";
685
+ var id =spans[i].getAttribute("id");
686
+ if (id != null) refs["#"+id] = n;
687
+ }
688
+ }
689
+ if (n == 0)
690
+ noteholder.parentNode.removeChild(noteholder);
691
+ else {
692
+ // Process footnoterefs.
693
+ for (i=0; i<spans.length; i++) {
694
+ if (spans[i].className == "footnoteref") {
695
+ var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
696
+ href = href.match(/#.*/)[0]; // Because IE return full URL.
697
+ n = refs[href];
698
+ spans[i].innerHTML =
699
+ "[<a href='#_footnote_" + n +
700
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
701
+ }
702
+ }
703
+ }
704
+ },
705
+
706
+ install: function(toclevels) {
707
+ var timerId;
708
+
709
+ function reinstall() {
710
+ asciidoc.footnotes();
711
+ if (toclevels) {
712
+ asciidoc.toc(toclevels);
713
+ }
714
+ }
715
+
716
+ function reinstallAndRemoveTimer() {
717
+ clearInterval(timerId);
718
+ reinstall();
719
+ }
720
+
721
+ timerId = setInterval(reinstall, 500);
722
+ if (document.addEventListener)
723
+ document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
724
+ else
725
+ window.onload = reinstallAndRemoveTimer;
726
+ }
727
+
728
+ }
729
+ asciidoc.install();
730
+ /*]]>*/
731
+ </script>
732
+ </head>
733
+ <body class="manpage">
734
+ <div id="header">
735
+ <h1>
736
+ AMBIGUOUS_WORDS(1) Manual Page
737
+ </h1>
738
+ <h2>NAME</h2>
739
+ <div class="sectionbody">
740
+ <p>ambiguous_words -
741
+ generate sets of words Tesseract is likely to find ambiguous
742
+ </p>
743
+ </div>
744
+ </div>
745
+ <div id="content">
746
+ <div class="sect1">
747
+ <h2 id="_synopsis">SYNOPSIS</h2>
748
+ <div class="sectionbody">
749
+ <div class="paragraph"><p><strong>ambiguous_words</strong> [-l lang] <em>TESSDATADIR</em> <em>WORDLIST</em> <em>AMBIGUOUSFILE</em></p></div>
750
+ </div>
751
+ </div>
752
+ <div class="sect1">
753
+ <h2 id="_description">DESCRIPTION</h2>
754
+ <div class="sectionbody">
755
+ <div class="paragraph"><p>ambiguous_words(1) runs Tesseract in a special mode, and for each word
756
+ in word list, produces a set of words which Tesseract thinks might be
757
+ ambiguous with it. <em>TESSDATADIR</em> must be set to the absolute path of
758
+ a directory containing <em>tessdata/lang.traineddata</em>.</p></div>
759
+ </div>
760
+ </div>
761
+ <div class="sect1">
762
+ <h2 id="_see_also">SEE ALSO</h2>
763
+ <div class="sectionbody">
764
+ <div class="paragraph"><p>tesseract(1)</p></div>
765
+ </div>
766
+ </div>
767
+ <div class="sect1">
768
+ <h2 id="_copying">COPYING</h2>
769
+ <div class="sectionbody">
770
+ <div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
771
+ Licensed under the Apache License, Version 2.0</p></div>
772
+ </div>
773
+ </div>
774
+ <div class="sect1">
775
+ <h2 id="_author">AUTHOR</h2>
776
+ <div class="sectionbody">
777
+ <div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
778
+ at Hewlett Packard (1985-1995) and Google (2006-2018).</p></div>
779
+ </div>
780
+ </div>
781
+ </div>
782
+ <div id="footnotes"><hr></div>
783
+ <div id="footer">
784
+ <div id="footer-text">
785
+ Last updated
786
+ 2024-05-03 17:30:23 CEST
787
+ </div>
788
+ </div>
789
+ </body>
790
+ </html>
ambiguous_words.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85c23ac33fa8235cf5f3adaf87b97c506a56e71a96dd0f7bb83b373c029c2ae5
3
+ size 1066496
classifier_tester.1.html ADDED
@@ -0,0 +1,857 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
5
+ <meta name="generator" content="AsciiDoc 10.2.0">
6
+ <title>CLASSIFIER_TESTER(1)</title>
7
+ <style type="text/css">
8
+ /* Shared CSS for AsciiDoc xhtml11 and html5 backends */
9
+
10
+ /* Default font. */
11
+ body {
12
+ font-family: Georgia,serif;
13
+ }
14
+
15
+ /* Title font. */
16
+ h1, h2, h3, h4, h5, h6,
17
+ div.title, caption.title,
18
+ thead, p.table.header,
19
+ #toctitle,
20
+ #author, #revnumber, #revdate, #revremark,
21
+ #footer {
22
+ font-family: Arial,Helvetica,sans-serif;
23
+ }
24
+
25
+ body {
26
+ margin: 1em 5% 1em 5%;
27
+ }
28
+
29
+ a {
30
+ color: blue;
31
+ text-decoration: underline;
32
+ }
33
+ a:visited {
34
+ color: fuchsia;
35
+ }
36
+
37
+ em {
38
+ font-style: italic;
39
+ color: navy;
40
+ }
41
+
42
+ strong {
43
+ font-weight: bold;
44
+ color: #083194;
45
+ }
46
+
47
+ h1, h2, h3, h4, h5, h6 {
48
+ color: #527bbd;
49
+ margin-top: 1.2em;
50
+ margin-bottom: 0.5em;
51
+ line-height: 1.3;
52
+ }
53
+
54
+ h1, h2, h3 {
55
+ border-bottom: 2px solid silver;
56
+ }
57
+ h2 {
58
+ padding-top: 0.5em;
59
+ }
60
+ h3 {
61
+ float: left;
62
+ }
63
+ h3 + * {
64
+ clear: left;
65
+ }
66
+ h5 {
67
+ font-size: 1.0em;
68
+ }
69
+
70
+ div.sectionbody {
71
+ margin-left: 0;
72
+ }
73
+
74
+ hr {
75
+ border: 1px solid silver;
76
+ }
77
+
78
+ p {
79
+ margin-top: 0.5em;
80
+ margin-bottom: 0.5em;
81
+ }
82
+
83
+ ul, ol, li > p {
84
+ margin-top: 0;
85
+ }
86
+ ul > li { color: #aaa; }
87
+ ul > li > * { color: black; }
88
+
89
+ .monospaced, code, pre {
90
+ font-family: "Courier New", Courier, monospace;
91
+ font-size: inherit;
92
+ color: navy;
93
+ padding: 0;
94
+ margin: 0;
95
+ }
96
+ pre {
97
+ white-space: pre-wrap;
98
+ }
99
+
100
+ #author {
101
+ color: #527bbd;
102
+ font-weight: bold;
103
+ font-size: 1.1em;
104
+ }
105
+ #email {
106
+ }
107
+ #revnumber, #revdate, #revremark {
108
+ }
109
+
110
+ #footer {
111
+ font-size: small;
112
+ border-top: 2px solid silver;
113
+ padding-top: 0.5em;
114
+ margin-top: 4.0em;
115
+ }
116
+ #footer-text {
117
+ float: left;
118
+ padding-bottom: 0.5em;
119
+ }
120
+ #footer-badges {
121
+ float: right;
122
+ padding-bottom: 0.5em;
123
+ }
124
+
125
+ #preamble {
126
+ margin-top: 1.5em;
127
+ margin-bottom: 1.5em;
128
+ }
129
+ div.imageblock, div.exampleblock, div.verseblock,
130
+ div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
131
+ div.admonitionblock {
132
+ margin-top: 1.0em;
133
+ margin-bottom: 1.5em;
134
+ }
135
+ div.admonitionblock {
136
+ margin-top: 2.0em;
137
+ margin-bottom: 2.0em;
138
+ margin-right: 10%;
139
+ color: #606060;
140
+ }
141
+
142
+ div.content { /* Block element content. */
143
+ padding: 0;
144
+ }
145
+
146
+ /* Block element titles. */
147
+ div.title, caption.title {
148
+ color: #527bbd;
149
+ font-weight: bold;
150
+ text-align: left;
151
+ margin-top: 1.0em;
152
+ margin-bottom: 0.5em;
153
+ }
154
+ div.title + * {
155
+ margin-top: 0;
156
+ }
157
+
158
+ td div.title:first-child {
159
+ margin-top: 0.0em;
160
+ }
161
+ div.content div.title:first-child {
162
+ margin-top: 0.0em;
163
+ }
164
+ div.content + div.title {
165
+ margin-top: 0.0em;
166
+ }
167
+
168
+ div.sidebarblock > div.content {
169
+ background: #ffffee;
170
+ border: 1px solid #dddddd;
171
+ border-left: 4px solid #f0f0f0;
172
+ padding: 0.5em;
173
+ }
174
+
175
+ div.listingblock > div.content {
176
+ border: 1px solid #dddddd;
177
+ border-left: 5px solid #f0f0f0;
178
+ background: #f8f8f8;
179
+ padding: 0.5em;
180
+ }
181
+
182
+ div.quoteblock, div.verseblock {
183
+ padding-left: 1.0em;
184
+ margin-left: 1.0em;
185
+ margin-right: 10%;
186
+ border-left: 5px solid #f0f0f0;
187
+ color: #888;
188
+ }
189
+
190
+ div.quoteblock > div.attribution {
191
+ padding-top: 0.5em;
192
+ text-align: right;
193
+ }
194
+
195
+ div.verseblock > pre.content {
196
+ font-family: inherit;
197
+ font-size: inherit;
198
+ }
199
+ div.verseblock > div.attribution {
200
+ padding-top: 0.75em;
201
+ text-align: left;
202
+ }
203
+ /* DEPRECATED: Pre version 8.2.7 verse style literal block. */
204
+ div.verseblock + div.attribution {
205
+ text-align: left;
206
+ }
207
+
208
+ div.admonitionblock .icon {
209
+ vertical-align: top;
210
+ font-size: 1.1em;
211
+ font-weight: bold;
212
+ text-decoration: underline;
213
+ color: #527bbd;
214
+ padding-right: 0.5em;
215
+ }
216
+ div.admonitionblock td.content {
217
+ padding-left: 0.5em;
218
+ border-left: 3px solid #dddddd;
219
+ }
220
+
221
+ div.exampleblock > div.content {
222
+ border-left: 3px solid #dddddd;
223
+ padding-left: 0.5em;
224
+ }
225
+
226
+ div.imageblock div.content { padding-left: 0; }
227
+ span.image img { border-style: none; vertical-align: text-bottom; }
228
+ a.image:visited { color: white; }
229
+
230
+ dl {
231
+ margin-top: 0.8em;
232
+ margin-bottom: 0.8em;
233
+ }
234
+ dt {
235
+ margin-top: 0.5em;
236
+ margin-bottom: 0;
237
+ font-style: normal;
238
+ color: navy;
239
+ }
240
+ dd > *:first-child {
241
+ margin-top: 0.1em;
242
+ }
243
+
244
+ ul, ol {
245
+ list-style-position: outside;
246
+ }
247
+ ol.arabic {
248
+ list-style-type: decimal;
249
+ }
250
+ ol.loweralpha {
251
+ list-style-type: lower-alpha;
252
+ }
253
+ ol.upperalpha {
254
+ list-style-type: upper-alpha;
255
+ }
256
+ ol.lowerroman {
257
+ list-style-type: lower-roman;
258
+ }
259
+ ol.upperroman {
260
+ list-style-type: upper-roman;
261
+ }
262
+
263
+ div.compact ul, div.compact ol,
264
+ div.compact p, div.compact p,
265
+ div.compact div, div.compact div {
266
+ margin-top: 0.1em;
267
+ margin-bottom: 0.1em;
268
+ }
269
+
270
+ tfoot {
271
+ font-weight: bold;
272
+ }
273
+ td > div.verse {
274
+ white-space: pre;
275
+ }
276
+
277
+ div.hdlist {
278
+ margin-top: 0.8em;
279
+ margin-bottom: 0.8em;
280
+ }
281
+ div.hdlist tr {
282
+ padding-bottom: 15px;
283
+ }
284
+ dt.hdlist1.strong, td.hdlist1.strong {
285
+ font-weight: bold;
286
+ }
287
+ td.hdlist1 {
288
+ vertical-align: top;
289
+ font-style: normal;
290
+ padding-right: 0.8em;
291
+ color: navy;
292
+ }
293
+ td.hdlist2 {
294
+ vertical-align: top;
295
+ }
296
+ div.hdlist.compact tr {
297
+ margin: 0;
298
+ padding-bottom: 0;
299
+ }
300
+
301
+ .comment {
302
+ background: yellow;
303
+ }
304
+
305
+ .footnote, .footnoteref {
306
+ font-size: 0.8em;
307
+ }
308
+
309
+ span.footnote, span.footnoteref {
310
+ vertical-align: super;
311
+ }
312
+
313
+ #footnotes {
314
+ margin: 20px 0 20px 0;
315
+ padding: 7px 0 0 0;
316
+ }
317
+
318
+ #footnotes div.footnote {
319
+ margin: 0 0 5px 0;
320
+ }
321
+
322
+ #footnotes hr {
323
+ border: none;
324
+ border-top: 1px solid silver;
325
+ height: 1px;
326
+ text-align: left;
327
+ margin-left: 0;
328
+ width: 20%;
329
+ min-width: 100px;
330
+ }
331
+
332
+ div.colist td {
333
+ padding-right: 0.5em;
334
+ padding-bottom: 0.3em;
335
+ vertical-align: top;
336
+ }
337
+ div.colist td img {
338
+ margin-top: 0.3em;
339
+ }
340
+
341
+ @media print {
342
+ #footer-badges { display: none; }
343
+ }
344
+
345
+ #toc {
346
+ margin-bottom: 2.5em;
347
+ }
348
+
349
+ #toctitle {
350
+ color: #527bbd;
351
+ font-size: 1.1em;
352
+ font-weight: bold;
353
+ margin-top: 1.0em;
354
+ margin-bottom: 0.1em;
355
+ }
356
+
357
+ div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
358
+ margin-top: 0;
359
+ margin-bottom: 0;
360
+ }
361
+ div.toclevel2 {
362
+ margin-left: 2em;
363
+ font-size: 0.9em;
364
+ }
365
+ div.toclevel3 {
366
+ margin-left: 4em;
367
+ font-size: 0.9em;
368
+ }
369
+ div.toclevel4 {
370
+ margin-left: 6em;
371
+ font-size: 0.9em;
372
+ }
373
+
374
+ span.aqua { color: aqua; }
375
+ span.black { color: black; }
376
+ span.blue { color: blue; }
377
+ span.fuchsia { color: fuchsia; }
378
+ span.gray { color: gray; }
379
+ span.green { color: green; }
380
+ span.lime { color: lime; }
381
+ span.maroon { color: maroon; }
382
+ span.navy { color: navy; }
383
+ span.olive { color: olive; }
384
+ span.purple { color: purple; }
385
+ span.red { color: red; }
386
+ span.silver { color: silver; }
387
+ span.teal { color: teal; }
388
+ span.white { color: white; }
389
+ span.yellow { color: yellow; }
390
+
391
+ span.aqua-background { background: aqua; }
392
+ span.black-background { background: black; }
393
+ span.blue-background { background: blue; }
394
+ span.fuchsia-background { background: fuchsia; }
395
+ span.gray-background { background: gray; }
396
+ span.green-background { background: green; }
397
+ span.lime-background { background: lime; }
398
+ span.maroon-background { background: maroon; }
399
+ span.navy-background { background: navy; }
400
+ span.olive-background { background: olive; }
401
+ span.purple-background { background: purple; }
402
+ span.red-background { background: red; }
403
+ span.silver-background { background: silver; }
404
+ span.teal-background { background: teal; }
405
+ span.white-background { background: white; }
406
+ span.yellow-background { background: yellow; }
407
+
408
+ span.big { font-size: 2em; }
409
+ span.small { font-size: 0.6em; }
410
+
411
+ span.underline { text-decoration: underline; }
412
+ span.overline { text-decoration: overline; }
413
+ span.line-through { text-decoration: line-through; }
414
+
415
+ div.unbreakable { page-break-inside: avoid; }
416
+
417
+
418
+ /*
419
+ * xhtml11 specific
420
+ *
421
+ * */
422
+
423
+ div.tableblock {
424
+ margin-top: 1.0em;
425
+ margin-bottom: 1.5em;
426
+ }
427
+ div.tableblock > table {
428
+ border: 3px solid #527bbd;
429
+ }
430
+ thead, p.table.header {
431
+ font-weight: bold;
432
+ color: #527bbd;
433
+ }
434
+ p.table {
435
+ margin-top: 0;
436
+ }
437
+ /* Because the table frame attribute is overridden by CSS in most browsers. */
438
+ div.tableblock > table[frame="void"] {
439
+ border-style: none;
440
+ }
441
+ div.tableblock > table[frame="hsides"] {
442
+ border-left-style: none;
443
+ border-right-style: none;
444
+ }
445
+ div.tableblock > table[frame="vsides"] {
446
+ border-top-style: none;
447
+ border-bottom-style: none;
448
+ }
449
+
450
+
451
+ /*
452
+ * html5 specific
453
+ *
454
+ * */
455
+
456
+ table.tableblock {
457
+ margin-top: 1.0em;
458
+ margin-bottom: 1.5em;
459
+ }
460
+ thead, p.tableblock.header {
461
+ font-weight: bold;
462
+ color: #527bbd;
463
+ }
464
+ p.tableblock {
465
+ margin-top: 0;
466
+ }
467
+ table.tableblock {
468
+ border-width: 3px;
469
+ border-spacing: 0px;
470
+ border-style: solid;
471
+ border-color: #527bbd;
472
+ border-collapse: collapse;
473
+ }
474
+ th.tableblock, td.tableblock {
475
+ border-width: 1px;
476
+ padding: 4px;
477
+ border-style: solid;
478
+ border-color: #527bbd;
479
+ }
480
+
481
+ table.tableblock.frame-topbot {
482
+ border-left-style: hidden;
483
+ border-right-style: hidden;
484
+ }
485
+ table.tableblock.frame-sides {
486
+ border-top-style: hidden;
487
+ border-bottom-style: hidden;
488
+ }
489
+ table.tableblock.frame-none {
490
+ border-style: hidden;
491
+ }
492
+
493
+ th.tableblock.halign-left, td.tableblock.halign-left {
494
+ text-align: left;
495
+ }
496
+ th.tableblock.halign-center, td.tableblock.halign-center {
497
+ text-align: center;
498
+ }
499
+ th.tableblock.halign-right, td.tableblock.halign-right {
500
+ text-align: right;
501
+ }
502
+
503
+ th.tableblock.valign-top, td.tableblock.valign-top {
504
+ vertical-align: top;
505
+ }
506
+ th.tableblock.valign-middle, td.tableblock.valign-middle {
507
+ vertical-align: middle;
508
+ }
509
+ th.tableblock.valign-bottom, td.tableblock.valign-bottom {
510
+ vertical-align: bottom;
511
+ }
512
+
513
+
514
+ /*
515
+ * manpage specific
516
+ *
517
+ * */
518
+
519
+ body.manpage h1 {
520
+ padding-top: 0.5em;
521
+ padding-bottom: 0.5em;
522
+ border-top: 2px solid silver;
523
+ border-bottom: 2px solid silver;
524
+ }
525
+ body.manpage h2 {
526
+ border-style: none;
527
+ }
528
+ body.manpage div.sectionbody {
529
+ margin-left: 3em;
530
+ }
531
+
532
+ @media print {
533
+ body.manpage div#toc { display: none; }
534
+ }
535
+
536
+
537
+ </style>
538
+ <script type="text/javascript">
539
+ /*<![CDATA[*/
540
+ var asciidoc = { // Namespace.
541
+
542
+ /////////////////////////////////////////////////////////////////////
543
+ // Table Of Contents generator
544
+ /////////////////////////////////////////////////////////////////////
545
+
546
+ /* Author: Mihai Bazon, September 2002
547
+ * http://students.infoiasi.ro/~mishoo
548
+ *
549
+ * Table Of Content generator
550
+ * Version: 0.4
551
+ *
552
+ * Feel free to use this script under the terms of the GNU General Public
553
+ * License, as long as you do not remove or alter this notice.
554
+ */
555
+
556
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
557
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
558
+
559
+ // toclevels = 1..4.
560
+ toc: function (toclevels) {
561
+
562
+ function getText(el) {
563
+ var text = "";
564
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
565
+ if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
566
+ text += i.data;
567
+ else if (i.firstChild != null)
568
+ text += getText(i);
569
+ }
570
+ return text;
571
+ }
572
+
573
+ function TocEntry(el, text, toclevel) {
574
+ this.element = el;
575
+ this.text = text;
576
+ this.toclevel = toclevel;
577
+ }
578
+
579
+ function tocEntries(el, toclevels) {
580
+ var result = new Array;
581
+ var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
582
+ // Function that scans the DOM tree for header elements (the DOM2
583
+ // nodeIterator API would be a better technique but not supported by all
584
+ // browsers).
585
+ var iterate = function (el) {
586
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
587
+ if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
588
+ var mo = re.exec(i.tagName);
589
+ if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
590
+ result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
591
+ }
592
+ iterate(i);
593
+ }
594
+ }
595
+ }
596
+ iterate(el);
597
+ return result;
598
+ }
599
+
600
+ var toc = document.getElementById("toc");
601
+ if (!toc) {
602
+ return;
603
+ }
604
+
605
+ // Delete existing TOC entries in case we're reloading the TOC.
606
+ var tocEntriesToRemove = [];
607
+ var i;
608
+ for (i = 0; i < toc.childNodes.length; i++) {
609
+ var entry = toc.childNodes[i];
610
+ if (entry.nodeName.toLowerCase() == 'div'
611
+ && entry.getAttribute("class")
612
+ && entry.getAttribute("class").match(/^toclevel/))
613
+ tocEntriesToRemove.push(entry);
614
+ }
615
+ for (i = 0; i < tocEntriesToRemove.length; i++) {
616
+ toc.removeChild(tocEntriesToRemove[i]);
617
+ }
618
+
619
+ // Rebuild TOC entries.
620
+ var entries = tocEntries(document.getElementById("content"), toclevels);
621
+ for (var i = 0; i < entries.length; ++i) {
622
+ var entry = entries[i];
623
+ if (entry.element.id == "")
624
+ entry.element.id = "_toc_" + i;
625
+ var a = document.createElement("a");
626
+ a.href = "#" + entry.element.id;
627
+ a.appendChild(document.createTextNode(entry.text));
628
+ var div = document.createElement("div");
629
+ div.appendChild(a);
630
+ div.className = "toclevel" + entry.toclevel;
631
+ toc.appendChild(div);
632
+ }
633
+ if (entries.length == 0)
634
+ toc.parentNode.removeChild(toc);
635
+ },
636
+
637
+
638
+ /////////////////////////////////////////////////////////////////////
639
+ // Footnotes generator
640
+ /////////////////////////////////////////////////////////////////////
641
+
642
+ /* Based on footnote generation code from:
643
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
644
+ */
645
+
646
+ footnotes: function () {
647
+ // Delete existing footnote entries in case we're reloading the footnodes.
648
+ var i;
649
+ var noteholder = document.getElementById("footnotes");
650
+ if (!noteholder) {
651
+ return;
652
+ }
653
+ var entriesToRemove = [];
654
+ for (i = 0; i < noteholder.childNodes.length; i++) {
655
+ var entry = noteholder.childNodes[i];
656
+ if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
657
+ entriesToRemove.push(entry);
658
+ }
659
+ for (i = 0; i < entriesToRemove.length; i++) {
660
+ noteholder.removeChild(entriesToRemove[i]);
661
+ }
662
+
663
+ // Rebuild footnote entries.
664
+ var cont = document.getElementById("content");
665
+ var spans = cont.getElementsByTagName("span");
666
+ var refs = {};
667
+ var n = 0;
668
+ for (i=0; i<spans.length; i++) {
669
+ if (spans[i].className == "footnote") {
670
+ n++;
671
+ var note = spans[i].getAttribute("data-note");
672
+ if (!note) {
673
+ // Use [\s\S] in place of . so multi-line matches work.
674
+ // Because JavaScript has no s (dotall) regex flag.
675
+ note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
676
+ spans[i].innerHTML =
677
+ "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
678
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
679
+ spans[i].setAttribute("data-note", note);
680
+ }
681
+ noteholder.innerHTML +=
682
+ "<div class='footnote' id='_footnote_" + n + "'>" +
683
+ "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
684
+ n + "</a>. " + note + "</div>";
685
+ var id =spans[i].getAttribute("id");
686
+ if (id != null) refs["#"+id] = n;
687
+ }
688
+ }
689
+ if (n == 0)
690
+ noteholder.parentNode.removeChild(noteholder);
691
+ else {
692
+ // Process footnoterefs.
693
+ for (i=0; i<spans.length; i++) {
694
+ if (spans[i].className == "footnoteref") {
695
+ var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
696
+ href = href.match(/#.*/)[0]; // Because IE return full URL.
697
+ n = refs[href];
698
+ spans[i].innerHTML =
699
+ "[<a href='#_footnote_" + n +
700
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
701
+ }
702
+ }
703
+ }
704
+ },
705
+
706
+ install: function(toclevels) {
707
+ var timerId;
708
+
709
+ function reinstall() {
710
+ asciidoc.footnotes();
711
+ if (toclevels) {
712
+ asciidoc.toc(toclevels);
713
+ }
714
+ }
715
+
716
+ function reinstallAndRemoveTimer() {
717
+ clearInterval(timerId);
718
+ reinstall();
719
+ }
720
+
721
+ timerId = setInterval(reinstall, 500);
722
+ if (document.addEventListener)
723
+ document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
724
+ else
725
+ window.onload = reinstallAndRemoveTimer;
726
+ }
727
+
728
+ }
729
+ asciidoc.install();
730
+ /*]]>*/
731
+ </script>
732
+ </head>
733
+ <body class="article">
734
+ <div id="header">
735
+ <h1>CLASSIFIER_TESTER(1)</h1>
736
+ </div>
737
+ <div id="content">
738
+ <div class="sect1">
739
+ <h2 id="_name">NAME</h2>
740
+ <div class="sectionbody">
741
+ <div class="paragraph"><p>classifier_tester - for <strong>legacy tesseract</strong> engine.</p></div>
742
+ </div>
743
+ </div>
744
+ <div class="sect1">
745
+ <h2 id="_synopsis">SYNOPSIS</h2>
746
+ <div class="sectionbody">
747
+ <div class="paragraph"><p><strong>classifier_tester</strong> -U <em>unicharset_file</em> -F <em>font_properties_file</em> -X <em>xheights_file</em> -classifier <em>x</em> -lang <em>lang</em> [-output_trainer trainer] *.tr</p></div>
748
+ </div>
749
+ </div>
750
+ <div class="sect1">
751
+ <h2 id="_description">DESCRIPTION</h2>
752
+ <div class="sectionbody">
753
+ <div class="paragraph"><p>classifier_tester(1) runs Tesseract in a special mode.
754
+ It takes a list of .tr files and tests a character classifier
755
+ on data as formatted for training,
756
+ but it doesn&#8217;t have to be the same as the training data.</p></div>
757
+ </div>
758
+ </div>
759
+ <div class="sect1">
760
+ <h2 id="_in_out_arguments">IN/OUT ARGUMENTS</h2>
761
+ <div class="sectionbody">
762
+ <div class="paragraph"><p>a list of .tr files</p></div>
763
+ </div>
764
+ </div>
765
+ <div class="sect1">
766
+ <h2 id="_options">OPTIONS</h2>
767
+ <div class="sectionbody">
768
+ <div class="dlist"><dl>
769
+ <dt class="hdlist1">
770
+ -l <em>lang</em>
771
+ </dt>
772
+ <dd>
773
+ <p>
774
+ (Input) three character language code; default value <em>eng</em>.
775
+ </p>
776
+ </dd>
777
+ <dt class="hdlist1">
778
+ -classifier <em>x</em>
779
+ </dt>
780
+ <dd>
781
+ <p>
782
+ (Input) One of "pruner", "full".
783
+ </p>
784
+ </dd>
785
+ <dt class="hdlist1">
786
+ -U <em>unicharset</em>
787
+ </dt>
788
+ <dd>
789
+ <p>
790
+ (Input) The unicharset for the language.
791
+ </p>
792
+ </dd>
793
+ <dt class="hdlist1">
794
+ -F <em>font_properties_file</em>
795
+ </dt>
796
+ <dd>
797
+ <p>
798
+ (Input) font properties file, each line is of the following form, where each field other than the font name is 0 or 1:
799
+ </p>
800
+ <div class="literalblock">
801
+ <div class="content monospaced">
802
+ <pre>*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur*</pre>
803
+ </div></div>
804
+ </dd>
805
+ <dt class="hdlist1">
806
+ -X <em>xheights_file</em>
807
+ </dt>
808
+ <dd>
809
+ <p>
810
+ (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ]
811
+ </p>
812
+ <div class="literalblock">
813
+ <div class="content monospaced">
814
+ <pre>*font_name* *xheight*</pre>
815
+ </div></div>
816
+ </dd>
817
+ <dt class="hdlist1">
818
+ -output_trainer <em>trainer</em>
819
+ </dt>
820
+ <dd>
821
+ <p>
822
+ (Output, Optional) Filename for output trainer.
823
+ </p>
824
+ </dd>
825
+ </dl></div>
826
+ </div>
827
+ </div>
828
+ <div class="sect1">
829
+ <h2 id="_see_also">SEE ALSO</h2>
830
+ <div class="sectionbody">
831
+ <div class="paragraph"><p>tesseract(1)</p></div>
832
+ </div>
833
+ </div>
834
+ <div class="sect1">
835
+ <h2 id="_copying">COPYING</h2>
836
+ <div class="sectionbody">
837
+ <div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
838
+ Licensed under the Apache License, Version 2.0</p></div>
839
+ </div>
840
+ </div>
841
+ <div class="sect1">
842
+ <h2 id="_author">AUTHOR</h2>
843
+ <div class="sectionbody">
844
+ <div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
845
+ at Hewlett Packard (1985-1995) and Google (2006-2018).</p></div>
846
+ </div>
847
+ </div>
848
+ </div>
849
+ <div id="footnotes"><hr></div>
850
+ <div id="footer">
851
+ <div id="footer-text">
852
+ Last updated
853
+ 2024-05-03 17:30:23 CEST
854
+ </div>
855
+ </div>
856
+ </body>
857
+ </html>
classifier_tester.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e059fdeafdc95a6a4220619226ad0d0e7a48e95fd6902c7cdd523af376d3bd74
3
+ size 4987040
cntraining.1.html ADDED
@@ -0,0 +1,803 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
5
+ <meta name="generator" content="AsciiDoc 10.2.0">
6
+ <title>CNTRAINING(1)</title>
7
+ <style type="text/css">
8
+ /* Shared CSS for AsciiDoc xhtml11 and html5 backends */
9
+
10
+ /* Default font. */
11
+ body {
12
+ font-family: Georgia,serif;
13
+ }
14
+
15
+ /* Title font. */
16
+ h1, h2, h3, h4, h5, h6,
17
+ div.title, caption.title,
18
+ thead, p.table.header,
19
+ #toctitle,
20
+ #author, #revnumber, #revdate, #revremark,
21
+ #footer {
22
+ font-family: Arial,Helvetica,sans-serif;
23
+ }
24
+
25
+ body {
26
+ margin: 1em 5% 1em 5%;
27
+ }
28
+
29
+ a {
30
+ color: blue;
31
+ text-decoration: underline;
32
+ }
33
+ a:visited {
34
+ color: fuchsia;
35
+ }
36
+
37
+ em {
38
+ font-style: italic;
39
+ color: navy;
40
+ }
41
+
42
+ strong {
43
+ font-weight: bold;
44
+ color: #083194;
45
+ }
46
+
47
+ h1, h2, h3, h4, h5, h6 {
48
+ color: #527bbd;
49
+ margin-top: 1.2em;
50
+ margin-bottom: 0.5em;
51
+ line-height: 1.3;
52
+ }
53
+
54
+ h1, h2, h3 {
55
+ border-bottom: 2px solid silver;
56
+ }
57
+ h2 {
58
+ padding-top: 0.5em;
59
+ }
60
+ h3 {
61
+ float: left;
62
+ }
63
+ h3 + * {
64
+ clear: left;
65
+ }
66
+ h5 {
67
+ font-size: 1.0em;
68
+ }
69
+
70
+ div.sectionbody {
71
+ margin-left: 0;
72
+ }
73
+
74
+ hr {
75
+ border: 1px solid silver;
76
+ }
77
+
78
+ p {
79
+ margin-top: 0.5em;
80
+ margin-bottom: 0.5em;
81
+ }
82
+
83
+ ul, ol, li > p {
84
+ margin-top: 0;
85
+ }
86
+ ul > li { color: #aaa; }
87
+ ul > li > * { color: black; }
88
+
89
+ .monospaced, code, pre {
90
+ font-family: "Courier New", Courier, monospace;
91
+ font-size: inherit;
92
+ color: navy;
93
+ padding: 0;
94
+ margin: 0;
95
+ }
96
+ pre {
97
+ white-space: pre-wrap;
98
+ }
99
+
100
+ #author {
101
+ color: #527bbd;
102
+ font-weight: bold;
103
+ font-size: 1.1em;
104
+ }
105
+ #email {
106
+ }
107
+ #revnumber, #revdate, #revremark {
108
+ }
109
+
110
+ #footer {
111
+ font-size: small;
112
+ border-top: 2px solid silver;
113
+ padding-top: 0.5em;
114
+ margin-top: 4.0em;
115
+ }
116
+ #footer-text {
117
+ float: left;
118
+ padding-bottom: 0.5em;
119
+ }
120
+ #footer-badges {
121
+ float: right;
122
+ padding-bottom: 0.5em;
123
+ }
124
+
125
+ #preamble {
126
+ margin-top: 1.5em;
127
+ margin-bottom: 1.5em;
128
+ }
129
+ div.imageblock, div.exampleblock, div.verseblock,
130
+ div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
131
+ div.admonitionblock {
132
+ margin-top: 1.0em;
133
+ margin-bottom: 1.5em;
134
+ }
135
+ div.admonitionblock {
136
+ margin-top: 2.0em;
137
+ margin-bottom: 2.0em;
138
+ margin-right: 10%;
139
+ color: #606060;
140
+ }
141
+
142
+ div.content { /* Block element content. */
143
+ padding: 0;
144
+ }
145
+
146
+ /* Block element titles. */
147
+ div.title, caption.title {
148
+ color: #527bbd;
149
+ font-weight: bold;
150
+ text-align: left;
151
+ margin-top: 1.0em;
152
+ margin-bottom: 0.5em;
153
+ }
154
+ div.title + * {
155
+ margin-top: 0;
156
+ }
157
+
158
+ td div.title:first-child {
159
+ margin-top: 0.0em;
160
+ }
161
+ div.content div.title:first-child {
162
+ margin-top: 0.0em;
163
+ }
164
+ div.content + div.title {
165
+ margin-top: 0.0em;
166
+ }
167
+
168
+ div.sidebarblock > div.content {
169
+ background: #ffffee;
170
+ border: 1px solid #dddddd;
171
+ border-left: 4px solid #f0f0f0;
172
+ padding: 0.5em;
173
+ }
174
+
175
+ div.listingblock > div.content {
176
+ border: 1px solid #dddddd;
177
+ border-left: 5px solid #f0f0f0;
178
+ background: #f8f8f8;
179
+ padding: 0.5em;
180
+ }
181
+
182
+ div.quoteblock, div.verseblock {
183
+ padding-left: 1.0em;
184
+ margin-left: 1.0em;
185
+ margin-right: 10%;
186
+ border-left: 5px solid #f0f0f0;
187
+ color: #888;
188
+ }
189
+
190
+ div.quoteblock > div.attribution {
191
+ padding-top: 0.5em;
192
+ text-align: right;
193
+ }
194
+
195
+ div.verseblock > pre.content {
196
+ font-family: inherit;
197
+ font-size: inherit;
198
+ }
199
+ div.verseblock > div.attribution {
200
+ padding-top: 0.75em;
201
+ text-align: left;
202
+ }
203
+ /* DEPRECATED: Pre version 8.2.7 verse style literal block. */
204
+ div.verseblock + div.attribution {
205
+ text-align: left;
206
+ }
207
+
208
+ div.admonitionblock .icon {
209
+ vertical-align: top;
210
+ font-size: 1.1em;
211
+ font-weight: bold;
212
+ text-decoration: underline;
213
+ color: #527bbd;
214
+ padding-right: 0.5em;
215
+ }
216
+ div.admonitionblock td.content {
217
+ padding-left: 0.5em;
218
+ border-left: 3px solid #dddddd;
219
+ }
220
+
221
+ div.exampleblock > div.content {
222
+ border-left: 3px solid #dddddd;
223
+ padding-left: 0.5em;
224
+ }
225
+
226
+ div.imageblock div.content { padding-left: 0; }
227
+ span.image img { border-style: none; vertical-align: text-bottom; }
228
+ a.image:visited { color: white; }
229
+
230
+ dl {
231
+ margin-top: 0.8em;
232
+ margin-bottom: 0.8em;
233
+ }
234
+ dt {
235
+ margin-top: 0.5em;
236
+ margin-bottom: 0;
237
+ font-style: normal;
238
+ color: navy;
239
+ }
240
+ dd > *:first-child {
241
+ margin-top: 0.1em;
242
+ }
243
+
244
+ ul, ol {
245
+ list-style-position: outside;
246
+ }
247
+ ol.arabic {
248
+ list-style-type: decimal;
249
+ }
250
+ ol.loweralpha {
251
+ list-style-type: lower-alpha;
252
+ }
253
+ ol.upperalpha {
254
+ list-style-type: upper-alpha;
255
+ }
256
+ ol.lowerroman {
257
+ list-style-type: lower-roman;
258
+ }
259
+ ol.upperroman {
260
+ list-style-type: upper-roman;
261
+ }
262
+
263
+ div.compact ul, div.compact ol,
264
+ div.compact p, div.compact p,
265
+ div.compact div, div.compact div {
266
+ margin-top: 0.1em;
267
+ margin-bottom: 0.1em;
268
+ }
269
+
270
+ tfoot {
271
+ font-weight: bold;
272
+ }
273
+ td > div.verse {
274
+ white-space: pre;
275
+ }
276
+
277
+ div.hdlist {
278
+ margin-top: 0.8em;
279
+ margin-bottom: 0.8em;
280
+ }
281
+ div.hdlist tr {
282
+ padding-bottom: 15px;
283
+ }
284
+ dt.hdlist1.strong, td.hdlist1.strong {
285
+ font-weight: bold;
286
+ }
287
+ td.hdlist1 {
288
+ vertical-align: top;
289
+ font-style: normal;
290
+ padding-right: 0.8em;
291
+ color: navy;
292
+ }
293
+ td.hdlist2 {
294
+ vertical-align: top;
295
+ }
296
+ div.hdlist.compact tr {
297
+ margin: 0;
298
+ padding-bottom: 0;
299
+ }
300
+
301
+ .comment {
302
+ background: yellow;
303
+ }
304
+
305
+ .footnote, .footnoteref {
306
+ font-size: 0.8em;
307
+ }
308
+
309
+ span.footnote, span.footnoteref {
310
+ vertical-align: super;
311
+ }
312
+
313
+ #footnotes {
314
+ margin: 20px 0 20px 0;
315
+ padding: 7px 0 0 0;
316
+ }
317
+
318
+ #footnotes div.footnote {
319
+ margin: 0 0 5px 0;
320
+ }
321
+
322
+ #footnotes hr {
323
+ border: none;
324
+ border-top: 1px solid silver;
325
+ height: 1px;
326
+ text-align: left;
327
+ margin-left: 0;
328
+ width: 20%;
329
+ min-width: 100px;
330
+ }
331
+
332
+ div.colist td {
333
+ padding-right: 0.5em;
334
+ padding-bottom: 0.3em;
335
+ vertical-align: top;
336
+ }
337
+ div.colist td img {
338
+ margin-top: 0.3em;
339
+ }
340
+
341
+ @media print {
342
+ #footer-badges { display: none; }
343
+ }
344
+
345
+ #toc {
346
+ margin-bottom: 2.5em;
347
+ }
348
+
349
+ #toctitle {
350
+ color: #527bbd;
351
+ font-size: 1.1em;
352
+ font-weight: bold;
353
+ margin-top: 1.0em;
354
+ margin-bottom: 0.1em;
355
+ }
356
+
357
+ div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
358
+ margin-top: 0;
359
+ margin-bottom: 0;
360
+ }
361
+ div.toclevel2 {
362
+ margin-left: 2em;
363
+ font-size: 0.9em;
364
+ }
365
+ div.toclevel3 {
366
+ margin-left: 4em;
367
+ font-size: 0.9em;
368
+ }
369
+ div.toclevel4 {
370
+ margin-left: 6em;
371
+ font-size: 0.9em;
372
+ }
373
+
374
+ span.aqua { color: aqua; }
375
+ span.black { color: black; }
376
+ span.blue { color: blue; }
377
+ span.fuchsia { color: fuchsia; }
378
+ span.gray { color: gray; }
379
+ span.green { color: green; }
380
+ span.lime { color: lime; }
381
+ span.maroon { color: maroon; }
382
+ span.navy { color: navy; }
383
+ span.olive { color: olive; }
384
+ span.purple { color: purple; }
385
+ span.red { color: red; }
386
+ span.silver { color: silver; }
387
+ span.teal { color: teal; }
388
+ span.white { color: white; }
389
+ span.yellow { color: yellow; }
390
+
391
+ span.aqua-background { background: aqua; }
392
+ span.black-background { background: black; }
393
+ span.blue-background { background: blue; }
394
+ span.fuchsia-background { background: fuchsia; }
395
+ span.gray-background { background: gray; }
396
+ span.green-background { background: green; }
397
+ span.lime-background { background: lime; }
398
+ span.maroon-background { background: maroon; }
399
+ span.navy-background { background: navy; }
400
+ span.olive-background { background: olive; }
401
+ span.purple-background { background: purple; }
402
+ span.red-background { background: red; }
403
+ span.silver-background { background: silver; }
404
+ span.teal-background { background: teal; }
405
+ span.white-background { background: white; }
406
+ span.yellow-background { background: yellow; }
407
+
408
+ span.big { font-size: 2em; }
409
+ span.small { font-size: 0.6em; }
410
+
411
+ span.underline { text-decoration: underline; }
412
+ span.overline { text-decoration: overline; }
413
+ span.line-through { text-decoration: line-through; }
414
+
415
+ div.unbreakable { page-break-inside: avoid; }
416
+
417
+
418
+ /*
419
+ * xhtml11 specific
420
+ *
421
+ * */
422
+
423
+ div.tableblock {
424
+ margin-top: 1.0em;
425
+ margin-bottom: 1.5em;
426
+ }
427
+ div.tableblock > table {
428
+ border: 3px solid #527bbd;
429
+ }
430
+ thead, p.table.header {
431
+ font-weight: bold;
432
+ color: #527bbd;
433
+ }
434
+ p.table {
435
+ margin-top: 0;
436
+ }
437
+ /* Because the table frame attribute is overridden by CSS in most browsers. */
438
+ div.tableblock > table[frame="void"] {
439
+ border-style: none;
440
+ }
441
+ div.tableblock > table[frame="hsides"] {
442
+ border-left-style: none;
443
+ border-right-style: none;
444
+ }
445
+ div.tableblock > table[frame="vsides"] {
446
+ border-top-style: none;
447
+ border-bottom-style: none;
448
+ }
449
+
450
+
451
+ /*
452
+ * html5 specific
453
+ *
454
+ * */
455
+
456
+ table.tableblock {
457
+ margin-top: 1.0em;
458
+ margin-bottom: 1.5em;
459
+ }
460
+ thead, p.tableblock.header {
461
+ font-weight: bold;
462
+ color: #527bbd;
463
+ }
464
+ p.tableblock {
465
+ margin-top: 0;
466
+ }
467
+ table.tableblock {
468
+ border-width: 3px;
469
+ border-spacing: 0px;
470
+ border-style: solid;
471
+ border-color: #527bbd;
472
+ border-collapse: collapse;
473
+ }
474
+ th.tableblock, td.tableblock {
475
+ border-width: 1px;
476
+ padding: 4px;
477
+ border-style: solid;
478
+ border-color: #527bbd;
479
+ }
480
+
481
+ table.tableblock.frame-topbot {
482
+ border-left-style: hidden;
483
+ border-right-style: hidden;
484
+ }
485
+ table.tableblock.frame-sides {
486
+ border-top-style: hidden;
487
+ border-bottom-style: hidden;
488
+ }
489
+ table.tableblock.frame-none {
490
+ border-style: hidden;
491
+ }
492
+
493
+ th.tableblock.halign-left, td.tableblock.halign-left {
494
+ text-align: left;
495
+ }
496
+ th.tableblock.halign-center, td.tableblock.halign-center {
497
+ text-align: center;
498
+ }
499
+ th.tableblock.halign-right, td.tableblock.halign-right {
500
+ text-align: right;
501
+ }
502
+
503
+ th.tableblock.valign-top, td.tableblock.valign-top {
504
+ vertical-align: top;
505
+ }
506
+ th.tableblock.valign-middle, td.tableblock.valign-middle {
507
+ vertical-align: middle;
508
+ }
509
+ th.tableblock.valign-bottom, td.tableblock.valign-bottom {
510
+ vertical-align: bottom;
511
+ }
512
+
513
+
514
+ /*
515
+ * manpage specific
516
+ *
517
+ * */
518
+
519
+ body.manpage h1 {
520
+ padding-top: 0.5em;
521
+ padding-bottom: 0.5em;
522
+ border-top: 2px solid silver;
523
+ border-bottom: 2px solid silver;
524
+ }
525
+ body.manpage h2 {
526
+ border-style: none;
527
+ }
528
+ body.manpage div.sectionbody {
529
+ margin-left: 3em;
530
+ }
531
+
532
+ @media print {
533
+ body.manpage div#toc { display: none; }
534
+ }
535
+
536
+
537
+ </style>
538
+ <script type="text/javascript">
539
+ /*<![CDATA[*/
540
+ var asciidoc = { // Namespace.
541
+
542
+ /////////////////////////////////////////////////////////////////////
543
+ // Table Of Contents generator
544
+ /////////////////////////////////////////////////////////////////////
545
+
546
+ /* Author: Mihai Bazon, September 2002
547
+ * http://students.infoiasi.ro/~mishoo
548
+ *
549
+ * Table Of Content generator
550
+ * Version: 0.4
551
+ *
552
+ * Feel free to use this script under the terms of the GNU General Public
553
+ * License, as long as you do not remove or alter this notice.
554
+ */
555
+
556
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
557
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
558
+
559
+ // toclevels = 1..4.
560
+ toc: function (toclevels) {
561
+
562
+ function getText(el) {
563
+ var text = "";
564
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
565
+ if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
566
+ text += i.data;
567
+ else if (i.firstChild != null)
568
+ text += getText(i);
569
+ }
570
+ return text;
571
+ }
572
+
573
+ function TocEntry(el, text, toclevel) {
574
+ this.element = el;
575
+ this.text = text;
576
+ this.toclevel = toclevel;
577
+ }
578
+
579
+ function tocEntries(el, toclevels) {
580
+ var result = new Array;
581
+ var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
582
+ // Function that scans the DOM tree for header elements (the DOM2
583
+ // nodeIterator API would be a better technique but not supported by all
584
+ // browsers).
585
+ var iterate = function (el) {
586
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
587
+ if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
588
+ var mo = re.exec(i.tagName);
589
+ if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
590
+ result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
591
+ }
592
+ iterate(i);
593
+ }
594
+ }
595
+ }
596
+ iterate(el);
597
+ return result;
598
+ }
599
+
600
+ var toc = document.getElementById("toc");
601
+ if (!toc) {
602
+ return;
603
+ }
604
+
605
+ // Delete existing TOC entries in case we're reloading the TOC.
606
+ var tocEntriesToRemove = [];
607
+ var i;
608
+ for (i = 0; i < toc.childNodes.length; i++) {
609
+ var entry = toc.childNodes[i];
610
+ if (entry.nodeName.toLowerCase() == 'div'
611
+ && entry.getAttribute("class")
612
+ && entry.getAttribute("class").match(/^toclevel/))
613
+ tocEntriesToRemove.push(entry);
614
+ }
615
+ for (i = 0; i < tocEntriesToRemove.length; i++) {
616
+ toc.removeChild(tocEntriesToRemove[i]);
617
+ }
618
+
619
+ // Rebuild TOC entries.
620
+ var entries = tocEntries(document.getElementById("content"), toclevels);
621
+ for (var i = 0; i < entries.length; ++i) {
622
+ var entry = entries[i];
623
+ if (entry.element.id == "")
624
+ entry.element.id = "_toc_" + i;
625
+ var a = document.createElement("a");
626
+ a.href = "#" + entry.element.id;
627
+ a.appendChild(document.createTextNode(entry.text));
628
+ var div = document.createElement("div");
629
+ div.appendChild(a);
630
+ div.className = "toclevel" + entry.toclevel;
631
+ toc.appendChild(div);
632
+ }
633
+ if (entries.length == 0)
634
+ toc.parentNode.removeChild(toc);
635
+ },
636
+
637
+
638
+ /////////////////////////////////////////////////////////////////////
639
+ // Footnotes generator
640
+ /////////////////////////////////////////////////////////////////////
641
+
642
+ /* Based on footnote generation code from:
643
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
644
+ */
645
+
646
+ footnotes: function () {
647
+ // Delete existing footnote entries in case we're reloading the footnodes.
648
+ var i;
649
+ var noteholder = document.getElementById("footnotes");
650
+ if (!noteholder) {
651
+ return;
652
+ }
653
+ var entriesToRemove = [];
654
+ for (i = 0; i < noteholder.childNodes.length; i++) {
655
+ var entry = noteholder.childNodes[i];
656
+ if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
657
+ entriesToRemove.push(entry);
658
+ }
659
+ for (i = 0; i < entriesToRemove.length; i++) {
660
+ noteholder.removeChild(entriesToRemove[i]);
661
+ }
662
+
663
+ // Rebuild footnote entries.
664
+ var cont = document.getElementById("content");
665
+ var spans = cont.getElementsByTagName("span");
666
+ var refs = {};
667
+ var n = 0;
668
+ for (i=0; i<spans.length; i++) {
669
+ if (spans[i].className == "footnote") {
670
+ n++;
671
+ var note = spans[i].getAttribute("data-note");
672
+ if (!note) {
673
+ // Use [\s\S] in place of . so multi-line matches work.
674
+ // Because JavaScript has no s (dotall) regex flag.
675
+ note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
676
+ spans[i].innerHTML =
677
+ "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
678
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
679
+ spans[i].setAttribute("data-note", note);
680
+ }
681
+ noteholder.innerHTML +=
682
+ "<div class='footnote' id='_footnote_" + n + "'>" +
683
+ "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
684
+ n + "</a>. " + note + "</div>";
685
+ var id =spans[i].getAttribute("id");
686
+ if (id != null) refs["#"+id] = n;
687
+ }
688
+ }
689
+ if (n == 0)
690
+ noteholder.parentNode.removeChild(noteholder);
691
+ else {
692
+ // Process footnoterefs.
693
+ for (i=0; i<spans.length; i++) {
694
+ if (spans[i].className == "footnoteref") {
695
+ var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
696
+ href = href.match(/#.*/)[0]; // Because IE return full URL.
697
+ n = refs[href];
698
+ spans[i].innerHTML =
699
+ "[<a href='#_footnote_" + n +
700
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
701
+ }
702
+ }
703
+ }
704
+ },
705
+
706
+ install: function(toclevels) {
707
+ var timerId;
708
+
709
+ function reinstall() {
710
+ asciidoc.footnotes();
711
+ if (toclevels) {
712
+ asciidoc.toc(toclevels);
713
+ }
714
+ }
715
+
716
+ function reinstallAndRemoveTimer() {
717
+ clearInterval(timerId);
718
+ reinstall();
719
+ }
720
+
721
+ timerId = setInterval(reinstall, 500);
722
+ if (document.addEventListener)
723
+ document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
724
+ else
725
+ window.onload = reinstallAndRemoveTimer;
726
+ }
727
+
728
+ }
729
+ asciidoc.install();
730
+ /*]]>*/
731
+ </script>
732
+ </head>
733
+ <body class="article">
734
+ <div id="header">
735
+ <h1>CNTRAINING(1)</h1>
736
+ </div>
737
+ <div id="content">
738
+ <div class="sect1">
739
+ <h2 id="_name">NAME</h2>
740
+ <div class="sectionbody">
741
+ <div class="paragraph"><p>cntraining - character normalization training for Tesseract</p></div>
742
+ </div>
743
+ </div>
744
+ <div class="sect1">
745
+ <h2 id="_synopsis">SYNOPSIS</h2>
746
+ <div class="sectionbody">
747
+ <div class="paragraph"><p><strong>cntraining</strong> [-D <em>dir</em>] <em>FILE</em>&#8230;</p></div>
748
+ </div>
749
+ </div>
750
+ <div class="sect1">
751
+ <h2 id="_description">DESCRIPTION</h2>
752
+ <div class="sectionbody">
753
+ <div class="paragraph"><p>cntraining takes a list of .tr files, from which it generates the
754
+ <strong>normproto</strong> data file (the character normalization sensitivity
755
+ prototypes).</p></div>
756
+ </div>
757
+ </div>
758
+ <div class="sect1">
759
+ <h2 id="_options">OPTIONS</h2>
760
+ <div class="sectionbody">
761
+ <div class="dlist"><dl>
762
+ <dt class="hdlist1">
763
+ -D <em>dir</em>
764
+ </dt>
765
+ <dd>
766
+ <p>
767
+ Directory to write output files to.
768
+ </p>
769
+ </dd>
770
+ </dl></div>
771
+ </div>
772
+ </div>
773
+ <div class="sect1">
774
+ <h2 id="_see_also">SEE ALSO</h2>
775
+ <div class="sectionbody">
776
+ <div class="paragraph"><p>tesseract(1), shapeclustering(1), mftraining(1)</p></div>
777
+ <div class="paragraph"><p><a href="https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html">https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html</a></p></div>
778
+ </div>
779
+ </div>
780
+ <div class="sect1">
781
+ <h2 id="_copying">COPYING</h2>
782
+ <div class="sectionbody">
783
+ <div class="paragraph"><p>Copyright (c) Hewlett-Packard Company, 1988
784
+ Licensed under the Apache License, Version 2.0</p></div>
785
+ </div>
786
+ </div>
787
+ <div class="sect1">
788
+ <h2 id="_author">AUTHOR</h2>
789
+ <div class="sectionbody">
790
+ <div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
791
+ at Hewlett Packard (1985-1995) and Google (2006-2018).</p></div>
792
+ </div>
793
+ </div>
794
+ </div>
795
+ <div id="footnotes"><hr></div>
796
+ <div id="footer">
797
+ <div id="footer-text">
798
+ Last updated
799
+ 2024-05-03 17:30:23 CEST
800
+ </div>
801
+ </div>
802
+ </body>
803
+ </html>
cntraining.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ea0cf4a9e750cb73bafddcc1bd14193e3dc15f2874a5e5ba4191b4662d0560e
3
+ size 4709776
combine_lang_model.1.html ADDED
@@ -0,0 +1,888 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
5
+ <meta name="generator" content="AsciiDoc 10.2.0">
6
+ <title>COMBINE_LANG_MODEL(1)</title>
7
+ <style type="text/css">
8
+ /* Shared CSS for AsciiDoc xhtml11 and html5 backends */
9
+
10
+ /* Default font. */
11
+ body {
12
+ font-family: Georgia,serif;
13
+ }
14
+
15
+ /* Title font. */
16
+ h1, h2, h3, h4, h5, h6,
17
+ div.title, caption.title,
18
+ thead, p.table.header,
19
+ #toctitle,
20
+ #author, #revnumber, #revdate, #revremark,
21
+ #footer {
22
+ font-family: Arial,Helvetica,sans-serif;
23
+ }
24
+
25
+ body {
26
+ margin: 1em 5% 1em 5%;
27
+ }
28
+
29
+ a {
30
+ color: blue;
31
+ text-decoration: underline;
32
+ }
33
+ a:visited {
34
+ color: fuchsia;
35
+ }
36
+
37
+ em {
38
+ font-style: italic;
39
+ color: navy;
40
+ }
41
+
42
+ strong {
43
+ font-weight: bold;
44
+ color: #083194;
45
+ }
46
+
47
+ h1, h2, h3, h4, h5, h6 {
48
+ color: #527bbd;
49
+ margin-top: 1.2em;
50
+ margin-bottom: 0.5em;
51
+ line-height: 1.3;
52
+ }
53
+
54
+ h1, h2, h3 {
55
+ border-bottom: 2px solid silver;
56
+ }
57
+ h2 {
58
+ padding-top: 0.5em;
59
+ }
60
+ h3 {
61
+ float: left;
62
+ }
63
+ h3 + * {
64
+ clear: left;
65
+ }
66
+ h5 {
67
+ font-size: 1.0em;
68
+ }
69
+
70
+ div.sectionbody {
71
+ margin-left: 0;
72
+ }
73
+
74
+ hr {
75
+ border: 1px solid silver;
76
+ }
77
+
78
+ p {
79
+ margin-top: 0.5em;
80
+ margin-bottom: 0.5em;
81
+ }
82
+
83
+ ul, ol, li > p {
84
+ margin-top: 0;
85
+ }
86
+ ul > li { color: #aaa; }
87
+ ul > li > * { color: black; }
88
+
89
+ .monospaced, code, pre {
90
+ font-family: "Courier New", Courier, monospace;
91
+ font-size: inherit;
92
+ color: navy;
93
+ padding: 0;
94
+ margin: 0;
95
+ }
96
+ pre {
97
+ white-space: pre-wrap;
98
+ }
99
+
100
+ #author {
101
+ color: #527bbd;
102
+ font-weight: bold;
103
+ font-size: 1.1em;
104
+ }
105
+ #email {
106
+ }
107
+ #revnumber, #revdate, #revremark {
108
+ }
109
+
110
+ #footer {
111
+ font-size: small;
112
+ border-top: 2px solid silver;
113
+ padding-top: 0.5em;
114
+ margin-top: 4.0em;
115
+ }
116
+ #footer-text {
117
+ float: left;
118
+ padding-bottom: 0.5em;
119
+ }
120
+ #footer-badges {
121
+ float: right;
122
+ padding-bottom: 0.5em;
123
+ }
124
+
125
+ #preamble {
126
+ margin-top: 1.5em;
127
+ margin-bottom: 1.5em;
128
+ }
129
+ div.imageblock, div.exampleblock, div.verseblock,
130
+ div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
131
+ div.admonitionblock {
132
+ margin-top: 1.0em;
133
+ margin-bottom: 1.5em;
134
+ }
135
+ div.admonitionblock {
136
+ margin-top: 2.0em;
137
+ margin-bottom: 2.0em;
138
+ margin-right: 10%;
139
+ color: #606060;
140
+ }
141
+
142
+ div.content { /* Block element content. */
143
+ padding: 0;
144
+ }
145
+
146
+ /* Block element titles. */
147
+ div.title, caption.title {
148
+ color: #527bbd;
149
+ font-weight: bold;
150
+ text-align: left;
151
+ margin-top: 1.0em;
152
+ margin-bottom: 0.5em;
153
+ }
154
+ div.title + * {
155
+ margin-top: 0;
156
+ }
157
+
158
+ td div.title:first-child {
159
+ margin-top: 0.0em;
160
+ }
161
+ div.content div.title:first-child {
162
+ margin-top: 0.0em;
163
+ }
164
+ div.content + div.title {
165
+ margin-top: 0.0em;
166
+ }
167
+
168
+ div.sidebarblock > div.content {
169
+ background: #ffffee;
170
+ border: 1px solid #dddddd;
171
+ border-left: 4px solid #f0f0f0;
172
+ padding: 0.5em;
173
+ }
174
+
175
+ div.listingblock > div.content {
176
+ border: 1px solid #dddddd;
177
+ border-left: 5px solid #f0f0f0;
178
+ background: #f8f8f8;
179
+ padding: 0.5em;
180
+ }
181
+
182
+ div.quoteblock, div.verseblock {
183
+ padding-left: 1.0em;
184
+ margin-left: 1.0em;
185
+ margin-right: 10%;
186
+ border-left: 5px solid #f0f0f0;
187
+ color: #888;
188
+ }
189
+
190
+ div.quoteblock > div.attribution {
191
+ padding-top: 0.5em;
192
+ text-align: right;
193
+ }
194
+
195
+ div.verseblock > pre.content {
196
+ font-family: inherit;
197
+ font-size: inherit;
198
+ }
199
+ div.verseblock > div.attribution {
200
+ padding-top: 0.75em;
201
+ text-align: left;
202
+ }
203
+ /* DEPRECATED: Pre version 8.2.7 verse style literal block. */
204
+ div.verseblock + div.attribution {
205
+ text-align: left;
206
+ }
207
+
208
+ div.admonitionblock .icon {
209
+ vertical-align: top;
210
+ font-size: 1.1em;
211
+ font-weight: bold;
212
+ text-decoration: underline;
213
+ color: #527bbd;
214
+ padding-right: 0.5em;
215
+ }
216
+ div.admonitionblock td.content {
217
+ padding-left: 0.5em;
218
+ border-left: 3px solid #dddddd;
219
+ }
220
+
221
+ div.exampleblock > div.content {
222
+ border-left: 3px solid #dddddd;
223
+ padding-left: 0.5em;
224
+ }
225
+
226
+ div.imageblock div.content { padding-left: 0; }
227
+ span.image img { border-style: none; vertical-align: text-bottom; }
228
+ a.image:visited { color: white; }
229
+
230
+ dl {
231
+ margin-top: 0.8em;
232
+ margin-bottom: 0.8em;
233
+ }
234
+ dt {
235
+ margin-top: 0.5em;
236
+ margin-bottom: 0;
237
+ font-style: normal;
238
+ color: navy;
239
+ }
240
+ dd > *:first-child {
241
+ margin-top: 0.1em;
242
+ }
243
+
244
+ ul, ol {
245
+ list-style-position: outside;
246
+ }
247
+ ol.arabic {
248
+ list-style-type: decimal;
249
+ }
250
+ ol.loweralpha {
251
+ list-style-type: lower-alpha;
252
+ }
253
+ ol.upperalpha {
254
+ list-style-type: upper-alpha;
255
+ }
256
+ ol.lowerroman {
257
+ list-style-type: lower-roman;
258
+ }
259
+ ol.upperroman {
260
+ list-style-type: upper-roman;
261
+ }
262
+
263
+ div.compact ul, div.compact ol,
264
+ div.compact p, div.compact p,
265
+ div.compact div, div.compact div {
266
+ margin-top: 0.1em;
267
+ margin-bottom: 0.1em;
268
+ }
269
+
270
+ tfoot {
271
+ font-weight: bold;
272
+ }
273
+ td > div.verse {
274
+ white-space: pre;
275
+ }
276
+
277
+ div.hdlist {
278
+ margin-top: 0.8em;
279
+ margin-bottom: 0.8em;
280
+ }
281
+ div.hdlist tr {
282
+ padding-bottom: 15px;
283
+ }
284
+ dt.hdlist1.strong, td.hdlist1.strong {
285
+ font-weight: bold;
286
+ }
287
+ td.hdlist1 {
288
+ vertical-align: top;
289
+ font-style: normal;
290
+ padding-right: 0.8em;
291
+ color: navy;
292
+ }
293
+ td.hdlist2 {
294
+ vertical-align: top;
295
+ }
296
+ div.hdlist.compact tr {
297
+ margin: 0;
298
+ padding-bottom: 0;
299
+ }
300
+
301
+ .comment {
302
+ background: yellow;
303
+ }
304
+
305
+ .footnote, .footnoteref {
306
+ font-size: 0.8em;
307
+ }
308
+
309
+ span.footnote, span.footnoteref {
310
+ vertical-align: super;
311
+ }
312
+
313
+ #footnotes {
314
+ margin: 20px 0 20px 0;
315
+ padding: 7px 0 0 0;
316
+ }
317
+
318
+ #footnotes div.footnote {
319
+ margin: 0 0 5px 0;
320
+ }
321
+
322
+ #footnotes hr {
323
+ border: none;
324
+ border-top: 1px solid silver;
325
+ height: 1px;
326
+ text-align: left;
327
+ margin-left: 0;
328
+ width: 20%;
329
+ min-width: 100px;
330
+ }
331
+
332
+ div.colist td {
333
+ padding-right: 0.5em;
334
+ padding-bottom: 0.3em;
335
+ vertical-align: top;
336
+ }
337
+ div.colist td img {
338
+ margin-top: 0.3em;
339
+ }
340
+
341
+ @media print {
342
+ #footer-badges { display: none; }
343
+ }
344
+
345
+ #toc {
346
+ margin-bottom: 2.5em;
347
+ }
348
+
349
+ #toctitle {
350
+ color: #527bbd;
351
+ font-size: 1.1em;
352
+ font-weight: bold;
353
+ margin-top: 1.0em;
354
+ margin-bottom: 0.1em;
355
+ }
356
+
357
+ div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
358
+ margin-top: 0;
359
+ margin-bottom: 0;
360
+ }
361
+ div.toclevel2 {
362
+ margin-left: 2em;
363
+ font-size: 0.9em;
364
+ }
365
+ div.toclevel3 {
366
+ margin-left: 4em;
367
+ font-size: 0.9em;
368
+ }
369
+ div.toclevel4 {
370
+ margin-left: 6em;
371
+ font-size: 0.9em;
372
+ }
373
+
374
+ span.aqua { color: aqua; }
375
+ span.black { color: black; }
376
+ span.blue { color: blue; }
377
+ span.fuchsia { color: fuchsia; }
378
+ span.gray { color: gray; }
379
+ span.green { color: green; }
380
+ span.lime { color: lime; }
381
+ span.maroon { color: maroon; }
382
+ span.navy { color: navy; }
383
+ span.olive { color: olive; }
384
+ span.purple { color: purple; }
385
+ span.red { color: red; }
386
+ span.silver { color: silver; }
387
+ span.teal { color: teal; }
388
+ span.white { color: white; }
389
+ span.yellow { color: yellow; }
390
+
391
+ span.aqua-background { background: aqua; }
392
+ span.black-background { background: black; }
393
+ span.blue-background { background: blue; }
394
+ span.fuchsia-background { background: fuchsia; }
395
+ span.gray-background { background: gray; }
396
+ span.green-background { background: green; }
397
+ span.lime-background { background: lime; }
398
+ span.maroon-background { background: maroon; }
399
+ span.navy-background { background: navy; }
400
+ span.olive-background { background: olive; }
401
+ span.purple-background { background: purple; }
402
+ span.red-background { background: red; }
403
+ span.silver-background { background: silver; }
404
+ span.teal-background { background: teal; }
405
+ span.white-background { background: white; }
406
+ span.yellow-background { background: yellow; }
407
+
408
+ span.big { font-size: 2em; }
409
+ span.small { font-size: 0.6em; }
410
+
411
+ span.underline { text-decoration: underline; }
412
+ span.overline { text-decoration: overline; }
413
+ span.line-through { text-decoration: line-through; }
414
+
415
+ div.unbreakable { page-break-inside: avoid; }
416
+
417
+
418
+ /*
419
+ * xhtml11 specific
420
+ *
421
+ * */
422
+
423
+ div.tableblock {
424
+ margin-top: 1.0em;
425
+ margin-bottom: 1.5em;
426
+ }
427
+ div.tableblock > table {
428
+ border: 3px solid #527bbd;
429
+ }
430
+ thead, p.table.header {
431
+ font-weight: bold;
432
+ color: #527bbd;
433
+ }
434
+ p.table {
435
+ margin-top: 0;
436
+ }
437
+ /* Because the table frame attribute is overridden by CSS in most browsers. */
438
+ div.tableblock > table[frame="void"] {
439
+ border-style: none;
440
+ }
441
+ div.tableblock > table[frame="hsides"] {
442
+ border-left-style: none;
443
+ border-right-style: none;
444
+ }
445
+ div.tableblock > table[frame="vsides"] {
446
+ border-top-style: none;
447
+ border-bottom-style: none;
448
+ }
449
+
450
+
451
+ /*
452
+ * html5 specific
453
+ *
454
+ * */
455
+
456
+ table.tableblock {
457
+ margin-top: 1.0em;
458
+ margin-bottom: 1.5em;
459
+ }
460
+ thead, p.tableblock.header {
461
+ font-weight: bold;
462
+ color: #527bbd;
463
+ }
464
+ p.tableblock {
465
+ margin-top: 0;
466
+ }
467
+ table.tableblock {
468
+ border-width: 3px;
469
+ border-spacing: 0px;
470
+ border-style: solid;
471
+ border-color: #527bbd;
472
+ border-collapse: collapse;
473
+ }
474
+ th.tableblock, td.tableblock {
475
+ border-width: 1px;
476
+ padding: 4px;
477
+ border-style: solid;
478
+ border-color: #527bbd;
479
+ }
480
+
481
+ table.tableblock.frame-topbot {
482
+ border-left-style: hidden;
483
+ border-right-style: hidden;
484
+ }
485
+ table.tableblock.frame-sides {
486
+ border-top-style: hidden;
487
+ border-bottom-style: hidden;
488
+ }
489
+ table.tableblock.frame-none {
490
+ border-style: hidden;
491
+ }
492
+
493
+ th.tableblock.halign-left, td.tableblock.halign-left {
494
+ text-align: left;
495
+ }
496
+ th.tableblock.halign-center, td.tableblock.halign-center {
497
+ text-align: center;
498
+ }
499
+ th.tableblock.halign-right, td.tableblock.halign-right {
500
+ text-align: right;
501
+ }
502
+
503
+ th.tableblock.valign-top, td.tableblock.valign-top {
504
+ vertical-align: top;
505
+ }
506
+ th.tableblock.valign-middle, td.tableblock.valign-middle {
507
+ vertical-align: middle;
508
+ }
509
+ th.tableblock.valign-bottom, td.tableblock.valign-bottom {
510
+ vertical-align: bottom;
511
+ }
512
+
513
+
514
+ /*
515
+ * manpage specific
516
+ *
517
+ * */
518
+
519
+ body.manpage h1 {
520
+ padding-top: 0.5em;
521
+ padding-bottom: 0.5em;
522
+ border-top: 2px solid silver;
523
+ border-bottom: 2px solid silver;
524
+ }
525
+ body.manpage h2 {
526
+ border-style: none;
527
+ }
528
+ body.manpage div.sectionbody {
529
+ margin-left: 3em;
530
+ }
531
+
532
+ @media print {
533
+ body.manpage div#toc { display: none; }
534
+ }
535
+
536
+
537
+ </style>
538
+ <script type="text/javascript">
539
+ /*<![CDATA[*/
540
+ var asciidoc = { // Namespace.
541
+
542
+ /////////////////////////////////////////////////////////////////////
543
+ // Table Of Contents generator
544
+ /////////////////////////////////////////////////////////////////////
545
+
546
+ /* Author: Mihai Bazon, September 2002
547
+ * http://students.infoiasi.ro/~mishoo
548
+ *
549
+ * Table Of Content generator
550
+ * Version: 0.4
551
+ *
552
+ * Feel free to use this script under the terms of the GNU General Public
553
+ * License, as long as you do not remove or alter this notice.
554
+ */
555
+
556
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
557
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
558
+
559
+ // toclevels = 1..4.
560
+ toc: function (toclevels) {
561
+
562
+ function getText(el) {
563
+ var text = "";
564
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
565
+ if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
566
+ text += i.data;
567
+ else if (i.firstChild != null)
568
+ text += getText(i);
569
+ }
570
+ return text;
571
+ }
572
+
573
+ function TocEntry(el, text, toclevel) {
574
+ this.element = el;
575
+ this.text = text;
576
+ this.toclevel = toclevel;
577
+ }
578
+
579
+ function tocEntries(el, toclevels) {
580
+ var result = new Array;
581
+ var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
582
+ // Function that scans the DOM tree for header elements (the DOM2
583
+ // nodeIterator API would be a better technique but not supported by all
584
+ // browsers).
585
+ var iterate = function (el) {
586
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
587
+ if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
588
+ var mo = re.exec(i.tagName);
589
+ if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
590
+ result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
591
+ }
592
+ iterate(i);
593
+ }
594
+ }
595
+ }
596
+ iterate(el);
597
+ return result;
598
+ }
599
+
600
+ var toc = document.getElementById("toc");
601
+ if (!toc) {
602
+ return;
603
+ }
604
+
605
+ // Delete existing TOC entries in case we're reloading the TOC.
606
+ var tocEntriesToRemove = [];
607
+ var i;
608
+ for (i = 0; i < toc.childNodes.length; i++) {
609
+ var entry = toc.childNodes[i];
610
+ if (entry.nodeName.toLowerCase() == 'div'
611
+ && entry.getAttribute("class")
612
+ && entry.getAttribute("class").match(/^toclevel/))
613
+ tocEntriesToRemove.push(entry);
614
+ }
615
+ for (i = 0; i < tocEntriesToRemove.length; i++) {
616
+ toc.removeChild(tocEntriesToRemove[i]);
617
+ }
618
+
619
+ // Rebuild TOC entries.
620
+ var entries = tocEntries(document.getElementById("content"), toclevels);
621
+ for (var i = 0; i < entries.length; ++i) {
622
+ var entry = entries[i];
623
+ if (entry.element.id == "")
624
+ entry.element.id = "_toc_" + i;
625
+ var a = document.createElement("a");
626
+ a.href = "#" + entry.element.id;
627
+ a.appendChild(document.createTextNode(entry.text));
628
+ var div = document.createElement("div");
629
+ div.appendChild(a);
630
+ div.className = "toclevel" + entry.toclevel;
631
+ toc.appendChild(div);
632
+ }
633
+ if (entries.length == 0)
634
+ toc.parentNode.removeChild(toc);
635
+ },
636
+
637
+
638
+ /////////////////////////////////////////////////////////////////////
639
+ // Footnotes generator
640
+ /////////////////////////////////////////////////////////////////////
641
+
642
+ /* Based on footnote generation code from:
643
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
644
+ */
645
+
646
+ footnotes: function () {
647
+ // Delete existing footnote entries in case we're reloading the footnodes.
648
+ var i;
649
+ var noteholder = document.getElementById("footnotes");
650
+ if (!noteholder) {
651
+ return;
652
+ }
653
+ var entriesToRemove = [];
654
+ for (i = 0; i < noteholder.childNodes.length; i++) {
655
+ var entry = noteholder.childNodes[i];
656
+ if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
657
+ entriesToRemove.push(entry);
658
+ }
659
+ for (i = 0; i < entriesToRemove.length; i++) {
660
+ noteholder.removeChild(entriesToRemove[i]);
661
+ }
662
+
663
+ // Rebuild footnote entries.
664
+ var cont = document.getElementById("content");
665
+ var spans = cont.getElementsByTagName("span");
666
+ var refs = {};
667
+ var n = 0;
668
+ for (i=0; i<spans.length; i++) {
669
+ if (spans[i].className == "footnote") {
670
+ n++;
671
+ var note = spans[i].getAttribute("data-note");
672
+ if (!note) {
673
+ // Use [\s\S] in place of . so multi-line matches work.
674
+ // Because JavaScript has no s (dotall) regex flag.
675
+ note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
676
+ spans[i].innerHTML =
677
+ "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
678
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
679
+ spans[i].setAttribute("data-note", note);
680
+ }
681
+ noteholder.innerHTML +=
682
+ "<div class='footnote' id='_footnote_" + n + "'>" +
683
+ "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
684
+ n + "</a>. " + note + "</div>";
685
+ var id =spans[i].getAttribute("id");
686
+ if (id != null) refs["#"+id] = n;
687
+ }
688
+ }
689
+ if (n == 0)
690
+ noteholder.parentNode.removeChild(noteholder);
691
+ else {
692
+ // Process footnoterefs.
693
+ for (i=0; i<spans.length; i++) {
694
+ if (spans[i].className == "footnoteref") {
695
+ var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
696
+ href = href.match(/#.*/)[0]; // Because IE return full URL.
697
+ n = refs[href];
698
+ spans[i].innerHTML =
699
+ "[<a href='#_footnote_" + n +
700
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
701
+ }
702
+ }
703
+ }
704
+ },
705
+
706
+ install: function(toclevels) {
707
+ var timerId;
708
+
709
+ function reinstall() {
710
+ asciidoc.footnotes();
711
+ if (toclevels) {
712
+ asciidoc.toc(toclevels);
713
+ }
714
+ }
715
+
716
+ function reinstallAndRemoveTimer() {
717
+ clearInterval(timerId);
718
+ reinstall();
719
+ }
720
+
721
+ timerId = setInterval(reinstall, 500);
722
+ if (document.addEventListener)
723
+ document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
724
+ else
725
+ window.onload = reinstallAndRemoveTimer;
726
+ }
727
+
728
+ }
729
+ asciidoc.install();
730
+ /*]]>*/
731
+ </script>
732
+ </head>
733
+ <body class="manpage">
734
+ <div id="header">
735
+ <h1>
736
+ COMBINE_LANG_MODEL(1) Manual Page
737
+ </h1>
738
+ <h2>NAME</h2>
739
+ <div class="sectionbody">
740
+ <p>combine_lang_model -
741
+ generate starter traineddata
742
+ </p>
743
+ </div>
744
+ </div>
745
+ <div id="content">
746
+ <div class="sect1">
747
+ <h2 id="_synopsis">SYNOPSIS</h2>
748
+ <div class="sectionbody">
749
+ <div class="paragraph"><p><strong>combine_lang_model</strong> --input_unicharset <em>filename</em> --script_dir <em>dirname</em> --output_dir <em>rootdir</em> --lang <em>lang</em> [--lang_is_rtl] [pass_through_recoder] [--words file --puncs file --numbers file]</p></div>
750
+ </div>
751
+ </div>
752
+ <div class="sect1">
753
+ <h2 id="_description">DESCRIPTION</h2>
754
+ <div class="sectionbody">
755
+ <div class="paragraph"><p>combine_lang_model(1) generates a starter traineddata file that can be used to train an LSTM-based neural network model. It takes as input a unicharset and an optional set of wordlists. It eliminates the need to run set_unicharset_properties(1), wordlist2dawg(1), some non-existent binary to generate the recoder (unicode compressor), and finally combine_tessdata(1).</p></div>
756
+ </div>
757
+ </div>
758
+ <div class="sect1">
759
+ <h2 id="_options">OPTIONS</h2>
760
+ <div class="sectionbody">
761
+ <div class="dlist"><dl>
762
+ <dt class="hdlist1">
763
+ <em>--lang lang</em>
764
+ </dt>
765
+ <dd>
766
+ <p>
767
+ The language to use.
768
+ Tesseract uses 3-character ISO 639-2 language codes. (See LANGUAGES)
769
+ </p>
770
+ </dd>
771
+ <dt class="hdlist1">
772
+ <em>--script_dir PATH</em>
773
+ </dt>
774
+ <dd>
775
+ <p>
776
+ Directory name for input script unicharsets. It should point to the location of langdata (github repo) directory. (type:string default:)
777
+ </p>
778
+ </dd>
779
+ <dt class="hdlist1">
780
+ <em>--input_unicharset FILE</em>
781
+ </dt>
782
+ <dd>
783
+ <p>
784
+ Unicharset to complete and use in encoding. It can be a hand-created file with incomplete fields. Its basic and script properties will be set before it is used. (type:string default:)
785
+ </p>
786
+ </dd>
787
+ <dt class="hdlist1">
788
+ <em>--lang_is_rtl BOOL</em>
789
+ </dt>
790
+ <dd>
791
+ <p>
792
+ True if language being processed is written right-to-left (eg Arabic/Hebrew). (type:bool default:false)
793
+ </p>
794
+ </dd>
795
+ <dt class="hdlist1">
796
+ <em>--pass_through_recoder BOOL</em>
797
+ </dt>
798
+ <dd>
799
+ <p>
800
+ If true, the recoder is a simple pass-through of the unicharset. Otherwise, potentially a compression of it by encoding Hangul in Jamos, decomposing multi-unicode symbols into sequences of unicodes, and encoding Han using the data in the radical_table_data, which must be the content of the file: langdata/radical-stroke.txt. (type:bool default:false)
801
+ </p>
802
+ </dd>
803
+ <dt class="hdlist1">
804
+ <em>--version_str STRING</em>
805
+ </dt>
806
+ <dd>
807
+ <p>
808
+ An arbitrary version label to add to traineddata file (type:string default:)
809
+ </p>
810
+ </dd>
811
+ <dt class="hdlist1">
812
+ <em>--words FILE</em>
813
+ </dt>
814
+ <dd>
815
+ <p>
816
+ (Optional) File listing words to use for the system dictionary (type:string default:)
817
+ </p>
818
+ </dd>
819
+ <dt class="hdlist1">
820
+ <em>--numbers FILE</em>
821
+ </dt>
822
+ <dd>
823
+ <p>
824
+ (Optional) File listing number patterns (type:string default:)
825
+ </p>
826
+ </dd>
827
+ <dt class="hdlist1">
828
+ <em>--puncs FILE</em>
829
+ </dt>
830
+ <dd>
831
+ <p>
832
+ (Optional) File listing punctuation patterns. The words/puncs/numbers lists may be all empty. If any are non-empty then puncs must be non-empty. (type:string default:)
833
+ </p>
834
+ </dd>
835
+ <dt class="hdlist1">
836
+ <em>--output_dir PATH</em>
837
+ </dt>
838
+ <dd>
839
+ <p>
840
+ Root directory for output files. Output files will be written to &lt;output_dir&gt;/&lt;lang&gt;/&lt;lang&gt;.* (type:string default:)
841
+ </p>
842
+ </dd>
843
+ </dl></div>
844
+ </div>
845
+ </div>
846
+ <div class="sect1">
847
+ <h2 id="_history">HISTORY</h2>
848
+ <div class="sectionbody">
849
+ <div class="paragraph"><p>combine_lang_model(1) was first made available for tesseract4.00.00alpha.</p></div>
850
+ </div>
851
+ </div>
852
+ <div class="sect1">
853
+ <h2 id="_resources">RESOURCES</h2>
854
+ <div class="sectionbody">
855
+ <div class="paragraph"><p>Main web site: <a href="https://github.com/tesseract-ocr">https://github.com/tesseract-ocr</a><br>
856
+ Information on training tesseract LSTM: <a href="https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00.html">https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00.html</a></p></div>
857
+ </div>
858
+ </div>
859
+ <div class="sect1">
860
+ <h2 id="_see_also">SEE ALSO</h2>
861
+ <div class="sectionbody">
862
+ <div class="paragraph"><p>tesseract(1)</p></div>
863
+ </div>
864
+ </div>
865
+ <div class="sect1">
866
+ <h2 id="_copying">COPYING</h2>
867
+ <div class="sectionbody">
868
+ <div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
869
+ Licensed under the Apache License, Version 2.0</p></div>
870
+ </div>
871
+ </div>
872
+ <div class="sect1">
873
+ <h2 id="_author">AUTHOR</h2>
874
+ <div class="sectionbody">
875
+ <div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
876
+ at Hewlett Packard (1985-1995) and Google (2006-2018).</p></div>
877
+ </div>
878
+ </div>
879
+ </div>
880
+ <div id="footnotes"><hr></div>
881
+ <div id="footer">
882
+ <div id="footer-text">
883
+ Last updated
884
+ 2024-05-03 17:30:23 CEST
885
+ </div>
886
+ </div>
887
+ </body>
888
+ </html>
combine_lang_model.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd26b40ab4c2633dade1a2678769e9c51075e552be9161d3ce652f52546697e
3
+ size 3503232
combine_tessdata.1.html ADDED
@@ -0,0 +1,1070 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
5
+ <meta name="generator" content="AsciiDoc 10.2.0">
6
+ <title>COMBINE_TESSDATA(1)</title>
7
+ <style type="text/css">
8
+ /* Shared CSS for AsciiDoc xhtml11 and html5 backends */
9
+
10
+ /* Default font. */
11
+ body {
12
+ font-family: Georgia,serif;
13
+ }
14
+
15
+ /* Title font. */
16
+ h1, h2, h3, h4, h5, h6,
17
+ div.title, caption.title,
18
+ thead, p.table.header,
19
+ #toctitle,
20
+ #author, #revnumber, #revdate, #revremark,
21
+ #footer {
22
+ font-family: Arial,Helvetica,sans-serif;
23
+ }
24
+
25
+ body {
26
+ margin: 1em 5% 1em 5%;
27
+ }
28
+
29
+ a {
30
+ color: blue;
31
+ text-decoration: underline;
32
+ }
33
+ a:visited {
34
+ color: fuchsia;
35
+ }
36
+
37
+ em {
38
+ font-style: italic;
39
+ color: navy;
40
+ }
41
+
42
+ strong {
43
+ font-weight: bold;
44
+ color: #083194;
45
+ }
46
+
47
+ h1, h2, h3, h4, h5, h6 {
48
+ color: #527bbd;
49
+ margin-top: 1.2em;
50
+ margin-bottom: 0.5em;
51
+ line-height: 1.3;
52
+ }
53
+
54
+ h1, h2, h3 {
55
+ border-bottom: 2px solid silver;
56
+ }
57
+ h2 {
58
+ padding-top: 0.5em;
59
+ }
60
+ h3 {
61
+ float: left;
62
+ }
63
+ h3 + * {
64
+ clear: left;
65
+ }
66
+ h5 {
67
+ font-size: 1.0em;
68
+ }
69
+
70
+ div.sectionbody {
71
+ margin-left: 0;
72
+ }
73
+
74
+ hr {
75
+ border: 1px solid silver;
76
+ }
77
+
78
+ p {
79
+ margin-top: 0.5em;
80
+ margin-bottom: 0.5em;
81
+ }
82
+
83
+ ul, ol, li > p {
84
+ margin-top: 0;
85
+ }
86
+ ul > li { color: #aaa; }
87
+ ul > li > * { color: black; }
88
+
89
+ .monospaced, code, pre {
90
+ font-family: "Courier New", Courier, monospace;
91
+ font-size: inherit;
92
+ color: navy;
93
+ padding: 0;
94
+ margin: 0;
95
+ }
96
+ pre {
97
+ white-space: pre-wrap;
98
+ }
99
+
100
+ #author {
101
+ color: #527bbd;
102
+ font-weight: bold;
103
+ font-size: 1.1em;
104
+ }
105
+ #email {
106
+ }
107
+ #revnumber, #revdate, #revremark {
108
+ }
109
+
110
+ #footer {
111
+ font-size: small;
112
+ border-top: 2px solid silver;
113
+ padding-top: 0.5em;
114
+ margin-top: 4.0em;
115
+ }
116
+ #footer-text {
117
+ float: left;
118
+ padding-bottom: 0.5em;
119
+ }
120
+ #footer-badges {
121
+ float: right;
122
+ padding-bottom: 0.5em;
123
+ }
124
+
125
+ #preamble {
126
+ margin-top: 1.5em;
127
+ margin-bottom: 1.5em;
128
+ }
129
+ div.imageblock, div.exampleblock, div.verseblock,
130
+ div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
131
+ div.admonitionblock {
132
+ margin-top: 1.0em;
133
+ margin-bottom: 1.5em;
134
+ }
135
+ div.admonitionblock {
136
+ margin-top: 2.0em;
137
+ margin-bottom: 2.0em;
138
+ margin-right: 10%;
139
+ color: #606060;
140
+ }
141
+
142
+ div.content { /* Block element content. */
143
+ padding: 0;
144
+ }
145
+
146
+ /* Block element titles. */
147
+ div.title, caption.title {
148
+ color: #527bbd;
149
+ font-weight: bold;
150
+ text-align: left;
151
+ margin-top: 1.0em;
152
+ margin-bottom: 0.5em;
153
+ }
154
+ div.title + * {
155
+ margin-top: 0;
156
+ }
157
+
158
+ td div.title:first-child {
159
+ margin-top: 0.0em;
160
+ }
161
+ div.content div.title:first-child {
162
+ margin-top: 0.0em;
163
+ }
164
+ div.content + div.title {
165
+ margin-top: 0.0em;
166
+ }
167
+
168
+ div.sidebarblock > div.content {
169
+ background: #ffffee;
170
+ border: 1px solid #dddddd;
171
+ border-left: 4px solid #f0f0f0;
172
+ padding: 0.5em;
173
+ }
174
+
175
+ div.listingblock > div.content {
176
+ border: 1px solid #dddddd;
177
+ border-left: 5px solid #f0f0f0;
178
+ background: #f8f8f8;
179
+ padding: 0.5em;
180
+ }
181
+
182
+ div.quoteblock, div.verseblock {
183
+ padding-left: 1.0em;
184
+ margin-left: 1.0em;
185
+ margin-right: 10%;
186
+ border-left: 5px solid #f0f0f0;
187
+ color: #888;
188
+ }
189
+
190
+ div.quoteblock > div.attribution {
191
+ padding-top: 0.5em;
192
+ text-align: right;
193
+ }
194
+
195
+ div.verseblock > pre.content {
196
+ font-family: inherit;
197
+ font-size: inherit;
198
+ }
199
+ div.verseblock > div.attribution {
200
+ padding-top: 0.75em;
201
+ text-align: left;
202
+ }
203
+ /* DEPRECATED: Pre version 8.2.7 verse style literal block. */
204
+ div.verseblock + div.attribution {
205
+ text-align: left;
206
+ }
207
+
208
+ div.admonitionblock .icon {
209
+ vertical-align: top;
210
+ font-size: 1.1em;
211
+ font-weight: bold;
212
+ text-decoration: underline;
213
+ color: #527bbd;
214
+ padding-right: 0.5em;
215
+ }
216
+ div.admonitionblock td.content {
217
+ padding-left: 0.5em;
218
+ border-left: 3px solid #dddddd;
219
+ }
220
+
221
+ div.exampleblock > div.content {
222
+ border-left: 3px solid #dddddd;
223
+ padding-left: 0.5em;
224
+ }
225
+
226
+ div.imageblock div.content { padding-left: 0; }
227
+ span.image img { border-style: none; vertical-align: text-bottom; }
228
+ a.image:visited { color: white; }
229
+
230
+ dl {
231
+ margin-top: 0.8em;
232
+ margin-bottom: 0.8em;
233
+ }
234
+ dt {
235
+ margin-top: 0.5em;
236
+ margin-bottom: 0;
237
+ font-style: normal;
238
+ color: navy;
239
+ }
240
+ dd > *:first-child {
241
+ margin-top: 0.1em;
242
+ }
243
+
244
+ ul, ol {
245
+ list-style-position: outside;
246
+ }
247
+ ol.arabic {
248
+ list-style-type: decimal;
249
+ }
250
+ ol.loweralpha {
251
+ list-style-type: lower-alpha;
252
+ }
253
+ ol.upperalpha {
254
+ list-style-type: upper-alpha;
255
+ }
256
+ ol.lowerroman {
257
+ list-style-type: lower-roman;
258
+ }
259
+ ol.upperroman {
260
+ list-style-type: upper-roman;
261
+ }
262
+
263
+ div.compact ul, div.compact ol,
264
+ div.compact p, div.compact p,
265
+ div.compact div, div.compact div {
266
+ margin-top: 0.1em;
267
+ margin-bottom: 0.1em;
268
+ }
269
+
270
+ tfoot {
271
+ font-weight: bold;
272
+ }
273
+ td > div.verse {
274
+ white-space: pre;
275
+ }
276
+
277
+ div.hdlist {
278
+ margin-top: 0.8em;
279
+ margin-bottom: 0.8em;
280
+ }
281
+ div.hdlist tr {
282
+ padding-bottom: 15px;
283
+ }
284
+ dt.hdlist1.strong, td.hdlist1.strong {
285
+ font-weight: bold;
286
+ }
287
+ td.hdlist1 {
288
+ vertical-align: top;
289
+ font-style: normal;
290
+ padding-right: 0.8em;
291
+ color: navy;
292
+ }
293
+ td.hdlist2 {
294
+ vertical-align: top;
295
+ }
296
+ div.hdlist.compact tr {
297
+ margin: 0;
298
+ padding-bottom: 0;
299
+ }
300
+
301
+ .comment {
302
+ background: yellow;
303
+ }
304
+
305
+ .footnote, .footnoteref {
306
+ font-size: 0.8em;
307
+ }
308
+
309
+ span.footnote, span.footnoteref {
310
+ vertical-align: super;
311
+ }
312
+
313
+ #footnotes {
314
+ margin: 20px 0 20px 0;
315
+ padding: 7px 0 0 0;
316
+ }
317
+
318
+ #footnotes div.footnote {
319
+ margin: 0 0 5px 0;
320
+ }
321
+
322
+ #footnotes hr {
323
+ border: none;
324
+ border-top: 1px solid silver;
325
+ height: 1px;
326
+ text-align: left;
327
+ margin-left: 0;
328
+ width: 20%;
329
+ min-width: 100px;
330
+ }
331
+
332
+ div.colist td {
333
+ padding-right: 0.5em;
334
+ padding-bottom: 0.3em;
335
+ vertical-align: top;
336
+ }
337
+ div.colist td img {
338
+ margin-top: 0.3em;
339
+ }
340
+
341
+ @media print {
342
+ #footer-badges { display: none; }
343
+ }
344
+
345
+ #toc {
346
+ margin-bottom: 2.5em;
347
+ }
348
+
349
+ #toctitle {
350
+ color: #527bbd;
351
+ font-size: 1.1em;
352
+ font-weight: bold;
353
+ margin-top: 1.0em;
354
+ margin-bottom: 0.1em;
355
+ }
356
+
357
+ div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
358
+ margin-top: 0;
359
+ margin-bottom: 0;
360
+ }
361
+ div.toclevel2 {
362
+ margin-left: 2em;
363
+ font-size: 0.9em;
364
+ }
365
+ div.toclevel3 {
366
+ margin-left: 4em;
367
+ font-size: 0.9em;
368
+ }
369
+ div.toclevel4 {
370
+ margin-left: 6em;
371
+ font-size: 0.9em;
372
+ }
373
+
374
+ span.aqua { color: aqua; }
375
+ span.black { color: black; }
376
+ span.blue { color: blue; }
377
+ span.fuchsia { color: fuchsia; }
378
+ span.gray { color: gray; }
379
+ span.green { color: green; }
380
+ span.lime { color: lime; }
381
+ span.maroon { color: maroon; }
382
+ span.navy { color: navy; }
383
+ span.olive { color: olive; }
384
+ span.purple { color: purple; }
385
+ span.red { color: red; }
386
+ span.silver { color: silver; }
387
+ span.teal { color: teal; }
388
+ span.white { color: white; }
389
+ span.yellow { color: yellow; }
390
+
391
+ span.aqua-background { background: aqua; }
392
+ span.black-background { background: black; }
393
+ span.blue-background { background: blue; }
394
+ span.fuchsia-background { background: fuchsia; }
395
+ span.gray-background { background: gray; }
396
+ span.green-background { background: green; }
397
+ span.lime-background { background: lime; }
398
+ span.maroon-background { background: maroon; }
399
+ span.navy-background { background: navy; }
400
+ span.olive-background { background: olive; }
401
+ span.purple-background { background: purple; }
402
+ span.red-background { background: red; }
403
+ span.silver-background { background: silver; }
404
+ span.teal-background { background: teal; }
405
+ span.white-background { background: white; }
406
+ span.yellow-background { background: yellow; }
407
+
408
+ span.big { font-size: 2em; }
409
+ span.small { font-size: 0.6em; }
410
+
411
+ span.underline { text-decoration: underline; }
412
+ span.overline { text-decoration: overline; }
413
+ span.line-through { text-decoration: line-through; }
414
+
415
+ div.unbreakable { page-break-inside: avoid; }
416
+
417
+
418
+ /*
419
+ * xhtml11 specific
420
+ *
421
+ * */
422
+
423
+ div.tableblock {
424
+ margin-top: 1.0em;
425
+ margin-bottom: 1.5em;
426
+ }
427
+ div.tableblock > table {
428
+ border: 3px solid #527bbd;
429
+ }
430
+ thead, p.table.header {
431
+ font-weight: bold;
432
+ color: #527bbd;
433
+ }
434
+ p.table {
435
+ margin-top: 0;
436
+ }
437
+ /* Because the table frame attribute is overridden by CSS in most browsers. */
438
+ div.tableblock > table[frame="void"] {
439
+ border-style: none;
440
+ }
441
+ div.tableblock > table[frame="hsides"] {
442
+ border-left-style: none;
443
+ border-right-style: none;
444
+ }
445
+ div.tableblock > table[frame="vsides"] {
446
+ border-top-style: none;
447
+ border-bottom-style: none;
448
+ }
449
+
450
+
451
+ /*
452
+ * html5 specific
453
+ *
454
+ * */
455
+
456
+ table.tableblock {
457
+ margin-top: 1.0em;
458
+ margin-bottom: 1.5em;
459
+ }
460
+ thead, p.tableblock.header {
461
+ font-weight: bold;
462
+ color: #527bbd;
463
+ }
464
+ p.tableblock {
465
+ margin-top: 0;
466
+ }
467
+ table.tableblock {
468
+ border-width: 3px;
469
+ border-spacing: 0px;
470
+ border-style: solid;
471
+ border-color: #527bbd;
472
+ border-collapse: collapse;
473
+ }
474
+ th.tableblock, td.tableblock {
475
+ border-width: 1px;
476
+ padding: 4px;
477
+ border-style: solid;
478
+ border-color: #527bbd;
479
+ }
480
+
481
+ table.tableblock.frame-topbot {
482
+ border-left-style: hidden;
483
+ border-right-style: hidden;
484
+ }
485
+ table.tableblock.frame-sides {
486
+ border-top-style: hidden;
487
+ border-bottom-style: hidden;
488
+ }
489
+ table.tableblock.frame-none {
490
+ border-style: hidden;
491
+ }
492
+
493
+ th.tableblock.halign-left, td.tableblock.halign-left {
494
+ text-align: left;
495
+ }
496
+ th.tableblock.halign-center, td.tableblock.halign-center {
497
+ text-align: center;
498
+ }
499
+ th.tableblock.halign-right, td.tableblock.halign-right {
500
+ text-align: right;
501
+ }
502
+
503
+ th.tableblock.valign-top, td.tableblock.valign-top {
504
+ vertical-align: top;
505
+ }
506
+ th.tableblock.valign-middle, td.tableblock.valign-middle {
507
+ vertical-align: middle;
508
+ }
509
+ th.tableblock.valign-bottom, td.tableblock.valign-bottom {
510
+ vertical-align: bottom;
511
+ }
512
+
513
+
514
+ /*
515
+ * manpage specific
516
+ *
517
+ * */
518
+
519
+ body.manpage h1 {
520
+ padding-top: 0.5em;
521
+ padding-bottom: 0.5em;
522
+ border-top: 2px solid silver;
523
+ border-bottom: 2px solid silver;
524
+ }
525
+ body.manpage h2 {
526
+ border-style: none;
527
+ }
528
+ body.manpage div.sectionbody {
529
+ margin-left: 3em;
530
+ }
531
+
532
+ @media print {
533
+ body.manpage div#toc { display: none; }
534
+ }
535
+
536
+
537
+ </style>
538
+ <script type="text/javascript">
539
+ /*<![CDATA[*/
540
+ var asciidoc = { // Namespace.
541
+
542
+ /////////////////////////////////////////////////////////////////////
543
+ // Table Of Contents generator
544
+ /////////////////////////////////////////////////////////////////////
545
+
546
+ /* Author: Mihai Bazon, September 2002
547
+ * http://students.infoiasi.ro/~mishoo
548
+ *
549
+ * Table Of Content generator
550
+ * Version: 0.4
551
+ *
552
+ * Feel free to use this script under the terms of the GNU General Public
553
+ * License, as long as you do not remove or alter this notice.
554
+ */
555
+
556
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
557
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
558
+
559
+ // toclevels = 1..4.
560
+ toc: function (toclevels) {
561
+
562
+ function getText(el) {
563
+ var text = "";
564
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
565
+ if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
566
+ text += i.data;
567
+ else if (i.firstChild != null)
568
+ text += getText(i);
569
+ }
570
+ return text;
571
+ }
572
+
573
+ function TocEntry(el, text, toclevel) {
574
+ this.element = el;
575
+ this.text = text;
576
+ this.toclevel = toclevel;
577
+ }
578
+
579
+ function tocEntries(el, toclevels) {
580
+ var result = new Array;
581
+ var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
582
+ // Function that scans the DOM tree for header elements (the DOM2
583
+ // nodeIterator API would be a better technique but not supported by all
584
+ // browsers).
585
+ var iterate = function (el) {
586
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
587
+ if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
588
+ var mo = re.exec(i.tagName);
589
+ if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
590
+ result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
591
+ }
592
+ iterate(i);
593
+ }
594
+ }
595
+ }
596
+ iterate(el);
597
+ return result;
598
+ }
599
+
600
+ var toc = document.getElementById("toc");
601
+ if (!toc) {
602
+ return;
603
+ }
604
+
605
+ // Delete existing TOC entries in case we're reloading the TOC.
606
+ var tocEntriesToRemove = [];
607
+ var i;
608
+ for (i = 0; i < toc.childNodes.length; i++) {
609
+ var entry = toc.childNodes[i];
610
+ if (entry.nodeName.toLowerCase() == 'div'
611
+ && entry.getAttribute("class")
612
+ && entry.getAttribute("class").match(/^toclevel/))
613
+ tocEntriesToRemove.push(entry);
614
+ }
615
+ for (i = 0; i < tocEntriesToRemove.length; i++) {
616
+ toc.removeChild(tocEntriesToRemove[i]);
617
+ }
618
+
619
+ // Rebuild TOC entries.
620
+ var entries = tocEntries(document.getElementById("content"), toclevels);
621
+ for (var i = 0; i < entries.length; ++i) {
622
+ var entry = entries[i];
623
+ if (entry.element.id == "")
624
+ entry.element.id = "_toc_" + i;
625
+ var a = document.createElement("a");
626
+ a.href = "#" + entry.element.id;
627
+ a.appendChild(document.createTextNode(entry.text));
628
+ var div = document.createElement("div");
629
+ div.appendChild(a);
630
+ div.className = "toclevel" + entry.toclevel;
631
+ toc.appendChild(div);
632
+ }
633
+ if (entries.length == 0)
634
+ toc.parentNode.removeChild(toc);
635
+ },
636
+
637
+
638
+ /////////////////////////////////////////////////////////////////////
639
+ // Footnotes generator
640
+ /////////////////////////////////////////////////////////////////////
641
+
642
+ /* Based on footnote generation code from:
643
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
644
+ */
645
+
646
+ footnotes: function () {
647
+ // Delete existing footnote entries in case we're reloading the footnodes.
648
+ var i;
649
+ var noteholder = document.getElementById("footnotes");
650
+ if (!noteholder) {
651
+ return;
652
+ }
653
+ var entriesToRemove = [];
654
+ for (i = 0; i < noteholder.childNodes.length; i++) {
655
+ var entry = noteholder.childNodes[i];
656
+ if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
657
+ entriesToRemove.push(entry);
658
+ }
659
+ for (i = 0; i < entriesToRemove.length; i++) {
660
+ noteholder.removeChild(entriesToRemove[i]);
661
+ }
662
+
663
+ // Rebuild footnote entries.
664
+ var cont = document.getElementById("content");
665
+ var spans = cont.getElementsByTagName("span");
666
+ var refs = {};
667
+ var n = 0;
668
+ for (i=0; i<spans.length; i++) {
669
+ if (spans[i].className == "footnote") {
670
+ n++;
671
+ var note = spans[i].getAttribute("data-note");
672
+ if (!note) {
673
+ // Use [\s\S] in place of . so multi-line matches work.
674
+ // Because JavaScript has no s (dotall) regex flag.
675
+ note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
676
+ spans[i].innerHTML =
677
+ "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
678
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
679
+ spans[i].setAttribute("data-note", note);
680
+ }
681
+ noteholder.innerHTML +=
682
+ "<div class='footnote' id='_footnote_" + n + "'>" +
683
+ "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
684
+ n + "</a>. " + note + "</div>";
685
+ var id =spans[i].getAttribute("id");
686
+ if (id != null) refs["#"+id] = n;
687
+ }
688
+ }
689
+ if (n == 0)
690
+ noteholder.parentNode.removeChild(noteholder);
691
+ else {
692
+ // Process footnoterefs.
693
+ for (i=0; i<spans.length; i++) {
694
+ if (spans[i].className == "footnoteref") {
695
+ var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
696
+ href = href.match(/#.*/)[0]; // Because IE return full URL.
697
+ n = refs[href];
698
+ spans[i].innerHTML =
699
+ "[<a href='#_footnote_" + n +
700
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
701
+ }
702
+ }
703
+ }
704
+ },
705
+
706
+ install: function(toclevels) {
707
+ var timerId;
708
+
709
+ function reinstall() {
710
+ asciidoc.footnotes();
711
+ if (toclevels) {
712
+ asciidoc.toc(toclevels);
713
+ }
714
+ }
715
+
716
+ function reinstallAndRemoveTimer() {
717
+ clearInterval(timerId);
718
+ reinstall();
719
+ }
720
+
721
+ timerId = setInterval(reinstall, 500);
722
+ if (document.addEventListener)
723
+ document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
724
+ else
725
+ window.onload = reinstallAndRemoveTimer;
726
+ }
727
+
728
+ }
729
+ asciidoc.install();
730
+ /*]]>*/
731
+ </script>
732
+ </head>
733
+ <body class="article">
734
+ <div id="header">
735
+ <h1>COMBINE_TESSDATA(1)</h1>
736
+ </div>
737
+ <div id="content">
738
+ <div class="sect1">
739
+ <h2 id="_name">NAME</h2>
740
+ <div class="sectionbody">
741
+ <div class="paragraph"><p>combine_tessdata - combine/extract/overwrite/list/compact Tesseract data</p></div>
742
+ </div>
743
+ </div>
744
+ <div class="sect1">
745
+ <h2 id="_synopsis">SYNOPSIS</h2>
746
+ <div class="sectionbody">
747
+ <div class="paragraph"><p><strong>combine_tessdata</strong> [<em>OPTION</em>] <em>FILE</em>&#8230;</p></div>
748
+ </div>
749
+ </div>
750
+ <div class="sect1">
751
+ <h2 id="_description">DESCRIPTION</h2>
752
+ <div class="sectionbody">
753
+ <div class="paragraph"><p>combine_tessdata(1) is the main program to combine/extract/overwrite/list/compact
754
+ tessdata components in [lang].traineddata files.</p></div>
755
+ <div class="paragraph"><p>To combine all the individual tessdata components (unicharset, DAWGs,
756
+ classifier templates, ambiguities, language configs) located at, say,
757
+ /home/$USER/temp/eng.* run:</p></div>
758
+ <div class="literalblock">
759
+ <div class="content monospaced">
760
+ <pre>combine_tessdata /home/$USER/temp/eng.</pre>
761
+ </div></div>
762
+ <div class="paragraph"><p>The result will be a combined tessdata file /home/$USER/temp/eng.traineddata</p></div>
763
+ <div class="paragraph"><p>Specify option -e if you would like to extract individual components
764
+ from a combined traineddata file. For example, to extract language config
765
+ file and the unicharset from tessdata/eng.traineddata run:</p></div>
766
+ <div class="literalblock">
767
+ <div class="content monospaced">
768
+ <pre>combine_tessdata -e tessdata/eng.traineddata \
769
+ /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</pre>
770
+ </div></div>
771
+ <div class="paragraph"><p>The desired config file and unicharset will be written to
772
+ /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</p></div>
773
+ <div class="paragraph"><p>Specify option -o to overwrite individual components of the given
774
+ [lang].traineddata file. For example, to overwrite language config
775
+ and unichar ambiguities files in tessdata/eng.traineddata use:</p></div>
776
+ <div class="literalblock">
777
+ <div class="content monospaced">
778
+ <pre>combine_tessdata -o tessdata/eng.traineddata \
779
+ /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs</pre>
780
+ </div></div>
781
+ <div class="paragraph"><p>As a result, tessdata/eng.traineddata will contain the new language config
782
+ and unichar ambigs, plus all the original DAWGs, classifier templates, etc.</p></div>
783
+ <div class="paragraph"><p>Note: the file names of the files to extract to and to overwrite from should
784
+ have the appropriate file suffixes (extensions) indicating their tessdata
785
+ component type (.unicharset for the unicharset, .unicharambigs for unichar
786
+ ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h.</p></div>
787
+ <div class="paragraph"><p>Specify option -u to unpack all the components to the specified path:</p></div>
788
+ <div class="literalblock">
789
+ <div class="content monospaced">
790
+ <pre>combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng.</pre>
791
+ </div></div>
792
+ <div class="paragraph"><p>This will create /home/$USER/temp/eng.* files with individual tessdata
793
+ components from tessdata/eng.traineddata.</p></div>
794
+ </div>
795
+ </div>
796
+ <div class="sect1">
797
+ <h2 id="_options">OPTIONS</h2>
798
+ <div class="sectionbody">
799
+ <div class="paragraph"><p><strong>-c</strong> <em>.traineddata</em> <em>FILE</em>&#8230;:
800
+ Compacts the LSTM component in the .traineddata file to int.</p></div>
801
+ <div class="paragraph"><p><strong>-d</strong> <em>.traineddata</em> <em>FILE</em>&#8230;:
802
+ Lists directory of components from the .traineddata file.</p></div>
803
+ <div class="paragraph"><p><strong>-e</strong> <em>.traineddata</em> <em>FILE</em>&#8230;:
804
+ Extracts the specified components from the .traineddata file</p></div>
805
+ <div class="paragraph"><p><strong>-l</strong> <em>.traineddata</em> <em>FILE</em>&#8230;:
806
+ List the network information.</p></div>
807
+ <div class="paragraph"><p><strong>-o</strong> <em>.traineddata</em> <em>FILE</em>&#8230;:
808
+ Overwrites the specified components of the .traineddata file
809
+ with those provided on the command line.</p></div>
810
+ <div class="paragraph"><p><strong>-u</strong> <em>.traineddata</em> <em>PATHPREFIX</em>
811
+ Unpacks the .traineddata using the provided prefix.</p></div>
812
+ </div>
813
+ </div>
814
+ <div class="sect1">
815
+ <h2 id="_caveats">CAVEATS</h2>
816
+ <div class="sectionbody">
817
+ <div class="paragraph"><p><em>Prefix</em> refers to the full file prefix, including period (.)</p></div>
818
+ </div>
819
+ </div>
820
+ <div class="sect1">
821
+ <h2 id="_components">COMPONENTS</h2>
822
+ <div class="sectionbody">
823
+ <div class="paragraph"><p>The components in a Tesseract lang.traineddata file as of
824
+ Tesseract 4.0 are briefly described below; For more information on
825
+ many of these files, see
826
+ <a href="https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html">https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html</a>
827
+ and
828
+ <a href="https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00.html">https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00.html</a></p></div>
829
+ <div class="dlist"><dl>
830
+ <dt class="hdlist1">
831
+ lang.config
832
+ </dt>
833
+ <dd>
834
+ <p>
835
+ (Optional) Language-specific overrides to default config variables.
836
+ For 4.0 traineddata files, lang.config provides control parameters which
837
+ can affect layout analysis, and sub-languages.
838
+ </p>
839
+ </dd>
840
+ <dt class="hdlist1">
841
+ lang.unicharset
842
+ </dt>
843
+ <dd>
844
+ <p>
845
+ (Required - 3.0x legacy tesseract) The list of symbols that Tesseract recognizes, with properties.
846
+ See unicharset(5).
847
+ </p>
848
+ </dd>
849
+ <dt class="hdlist1">
850
+ lang.unicharambigs
851
+ </dt>
852
+ <dd>
853
+ <p>
854
+ (Optional - 3.0x legacy tesseract) This file contains information on pairs of recognized symbols
855
+ which are often confused. For example, <em>rn</em> and <em>m</em>.
856
+ </p>
857
+ </dd>
858
+ <dt class="hdlist1">
859
+ lang.inttemp
860
+ </dt>
861
+ <dd>
862
+ <p>
863
+ (Required - 3.0x legacy tesseract) Character shape templates for each unichar. Produced by
864
+ mftraining(1).
865
+ </p>
866
+ </dd>
867
+ <dt class="hdlist1">
868
+ lang.pffmtable
869
+ </dt>
870
+ <dd>
871
+ <p>
872
+ (Required - 3.0x legacy tesseract) The number of features expected for each unichar.
873
+ Produced by mftraining(1) from <strong>.tr</strong> files.
874
+ </p>
875
+ </dd>
876
+ <dt class="hdlist1">
877
+ lang.normproto
878
+ </dt>
879
+ <dd>
880
+ <p>
881
+ (Required - 3.0x legacy tesseract) Character normalization prototypes generated by cntraining(1)
882
+ from <strong>.tr</strong> files.
883
+ </p>
884
+ </dd>
885
+ <dt class="hdlist1">
886
+ lang.punc-dawg
887
+ </dt>
888
+ <dd>
889
+ <p>
890
+ (Optional - 3.0x legacy tesseract) A dawg made from punctuation patterns found around words.
891
+ The "word" part is replaced by a single space.
892
+ </p>
893
+ </dd>
894
+ <dt class="hdlist1">
895
+ lang.word-dawg
896
+ </dt>
897
+ <dd>
898
+ <p>
899
+ (Optional - 3.0x legacy tesseract) A dawg made from dictionary words from the language.
900
+ </p>
901
+ </dd>
902
+ <dt class="hdlist1">
903
+ lang.number-dawg
904
+ </dt>
905
+ <dd>
906
+ <p>
907
+ (Optional - 3.0x legacy tesseract) A dawg made from tokens which originally contained digits.
908
+ Each digit is replaced by a space character.
909
+ </p>
910
+ </dd>
911
+ <dt class="hdlist1">
912
+ lang.freq-dawg
913
+ </dt>
914
+ <dd>
915
+ <p>
916
+ (Optional - 3.0x legacy tesseract) A dawg made from the most frequent words which would have
917
+ gone into word-dawg.
918
+ </p>
919
+ </dd>
920
+ <dt class="hdlist1">
921
+ lang.fixed-length-dawgs
922
+ </dt>
923
+ <dd>
924
+ <p>
925
+ (Optional - 3.0x legacy tesseract) Several dawgs of different fixed lengths&#8201;&#8212;&#8201;useful for
926
+ languages like Chinese.
927
+ </p>
928
+ </dd>
929
+ <dt class="hdlist1">
930
+ lang.shapetable
931
+ </dt>
932
+ <dd>
933
+ <p>
934
+ (Optional - 3.0x legacy tesseract) When present, a shapetable is an extra layer between the character
935
+ classifier and the word recognizer that allows the character classifier to
936
+ return a collection of unichar ids and fonts instead of a single unichar-id
937
+ and font.
938
+ </p>
939
+ </dd>
940
+ <dt class="hdlist1">
941
+ lang.bigram-dawg
942
+ </dt>
943
+ <dd>
944
+ <p>
945
+ (Optional - 3.0x legacy tesseract) A dawg of word bigrams where the words are separated by a space
946
+ and each digit is replaced by a <em>?</em>.
947
+ </p>
948
+ </dd>
949
+ <dt class="hdlist1">
950
+ lang.unambig-dawg
951
+ </dt>
952
+ <dd>
953
+ <p>
954
+ (Optional - 3.0x legacy tesseract) .
955
+ </p>
956
+ </dd>
957
+ <dt class="hdlist1">
958
+ lang.params-model
959
+ </dt>
960
+ <dd>
961
+ <p>
962
+ (Optional - 3.0x legacy tesseract) .
963
+ </p>
964
+ </dd>
965
+ <dt class="hdlist1">
966
+ lang.lstm
967
+ </dt>
968
+ <dd>
969
+ <p>
970
+ (Required - 4.0 LSTM) Neural net trained recognition model generated by lstmtraining.
971
+ </p>
972
+ </dd>
973
+ <dt class="hdlist1">
974
+ lang.lstm-punc-dawg
975
+ </dt>
976
+ <dd>
977
+ <p>
978
+ (Optional - 4.0 LSTM) A dawg made from punctuation patterns found around words.
979
+ The "word" part is replaced by a single space. Uses lang.lstm-unicharset.
980
+ </p>
981
+ </dd>
982
+ <dt class="hdlist1">
983
+ lang.lstm-word-dawg
984
+ </dt>
985
+ <dd>
986
+ <p>
987
+ (Optional - 4.0 LSTM) A dawg made from dictionary words from the language.
988
+ Uses lang.lstm-unicharset.
989
+ </p>
990
+ </dd>
991
+ <dt class="hdlist1">
992
+ lang.lstm-number-dawg
993
+ </dt>
994
+ <dd>
995
+ <p>
996
+ (Optional - 4.0 LSTM) A dawg made from tokens which originally contained digits.
997
+ Each digit is replaced by a space character. Uses lang.lstm-unicharset.
998
+ </p>
999
+ </dd>
1000
+ <dt class="hdlist1">
1001
+ lang.lstm-unicharset
1002
+ </dt>
1003
+ <dd>
1004
+ <p>
1005
+ (Required - 4.0 LSTM) The unicode character set that Tesseract recognizes, with properties.
1006
+ Same unicharset must be used to train the LSTM and build the lstm-*-dawgs files.
1007
+ </p>
1008
+ </dd>
1009
+ <dt class="hdlist1">
1010
+ lang.lstm-recoder
1011
+ </dt>
1012
+ <dd>
1013
+ <p>
1014
+ (Required - 4.0 LSTM) Unicharcompress, aka the recoder, which maps the unicharset
1015
+ further to the codes actually used by the neural network recognizer. This is created as
1016
+ part of the starter traineddata by combine_lang_model.
1017
+ </p>
1018
+ </dd>
1019
+ <dt class="hdlist1">
1020
+ lang.version
1021
+ </dt>
1022
+ <dd>
1023
+ <p>
1024
+ (Optional) Version string for the traineddata file.
1025
+ First appeared in version 4.0 of Tesseract.
1026
+ Old version of traineddata files will report Version:Pre-4.0.0.
1027
+ 4.0 version of traineddata files may include the network spec
1028
+ used for LSTM training as part of version string.
1029
+ </p>
1030
+ </dd>
1031
+ </dl></div>
1032
+ </div>
1033
+ </div>
1034
+ <div class="sect1">
1035
+ <h2 id="_history">HISTORY</h2>
1036
+ <div class="sectionbody">
1037
+ <div class="paragraph"><p>combine_tessdata(1) first appeared in version 3.00 of Tesseract</p></div>
1038
+ </div>
1039
+ </div>
1040
+ <div class="sect1">
1041
+ <h2 id="_see_also">SEE ALSO</h2>
1042
+ <div class="sectionbody">
1043
+ <div class="paragraph"><p>tesseract(1), wordlist2dawg(1), cntraining(1), mftraining(1), unicharset(5),
1044
+ unicharambigs(5)</p></div>
1045
+ </div>
1046
+ </div>
1047
+ <div class="sect1">
1048
+ <h2 id="_copying">COPYING</h2>
1049
+ <div class="sectionbody">
1050
+ <div class="paragraph"><p>Copyright (C) 2009, Google Inc.
1051
+ Licensed under the Apache License, Version 2.0</p></div>
1052
+ </div>
1053
+ </div>
1054
+ <div class="sect1">
1055
+ <h2 id="_author">AUTHOR</h2>
1056
+ <div class="sectionbody">
1057
+ <div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
1058
+ at Hewlett Packard (1985-1995) and Google (2006-2018).</p></div>
1059
+ </div>
1060
+ </div>
1061
+ </div>
1062
+ <div id="footnotes"><hr></div>
1063
+ <div id="footer">
1064
+ <div id="footer-text">
1065
+ Last updated
1066
+ 2024-05-03 17:30:23 CEST
1067
+ </div>
1068
+ </div>
1069
+ </body>
1070
+ </html>
combine_tessdata.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f905d89cd6bd3737fa78369e6a91ce06a3f10641f56a2e8671e0d6c6168f485a
3
+ size 1281096
dawg2wordlist.1.html ADDED
@@ -0,0 +1,802 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
5
+ <meta name="generator" content="AsciiDoc 10.2.0">
6
+ <title>DAWG2WORDLIST(1)</title>
7
+ <style type="text/css">
8
+ /* Shared CSS for AsciiDoc xhtml11 and html5 backends */
9
+
10
+ /* Default font. */
11
+ body {
12
+ font-family: Georgia,serif;
13
+ }
14
+
15
+ /* Title font. */
16
+ h1, h2, h3, h4, h5, h6,
17
+ div.title, caption.title,
18
+ thead, p.table.header,
19
+ #toctitle,
20
+ #author, #revnumber, #revdate, #revremark,
21
+ #footer {
22
+ font-family: Arial,Helvetica,sans-serif;
23
+ }
24
+
25
+ body {
26
+ margin: 1em 5% 1em 5%;
27
+ }
28
+
29
+ a {
30
+ color: blue;
31
+ text-decoration: underline;
32
+ }
33
+ a:visited {
34
+ color: fuchsia;
35
+ }
36
+
37
+ em {
38
+ font-style: italic;
39
+ color: navy;
40
+ }
41
+
42
+ strong {
43
+ font-weight: bold;
44
+ color: #083194;
45
+ }
46
+
47
+ h1, h2, h3, h4, h5, h6 {
48
+ color: #527bbd;
49
+ margin-top: 1.2em;
50
+ margin-bottom: 0.5em;
51
+ line-height: 1.3;
52
+ }
53
+
54
+ h1, h2, h3 {
55
+ border-bottom: 2px solid silver;
56
+ }
57
+ h2 {
58
+ padding-top: 0.5em;
59
+ }
60
+ h3 {
61
+ float: left;
62
+ }
63
+ h3 + * {
64
+ clear: left;
65
+ }
66
+ h5 {
67
+ font-size: 1.0em;
68
+ }
69
+
70
+ div.sectionbody {
71
+ margin-left: 0;
72
+ }
73
+
74
+ hr {
75
+ border: 1px solid silver;
76
+ }
77
+
78
+ p {
79
+ margin-top: 0.5em;
80
+ margin-bottom: 0.5em;
81
+ }
82
+
83
+ ul, ol, li > p {
84
+ margin-top: 0;
85
+ }
86
+ ul > li { color: #aaa; }
87
+ ul > li > * { color: black; }
88
+
89
+ .monospaced, code, pre {
90
+ font-family: "Courier New", Courier, monospace;
91
+ font-size: inherit;
92
+ color: navy;
93
+ padding: 0;
94
+ margin: 0;
95
+ }
96
+ pre {
97
+ white-space: pre-wrap;
98
+ }
99
+
100
+ #author {
101
+ color: #527bbd;
102
+ font-weight: bold;
103
+ font-size: 1.1em;
104
+ }
105
+ #email {
106
+ }
107
+ #revnumber, #revdate, #revremark {
108
+ }
109
+
110
+ #footer {
111
+ font-size: small;
112
+ border-top: 2px solid silver;
113
+ padding-top: 0.5em;
114
+ margin-top: 4.0em;
115
+ }
116
+ #footer-text {
117
+ float: left;
118
+ padding-bottom: 0.5em;
119
+ }
120
+ #footer-badges {
121
+ float: right;
122
+ padding-bottom: 0.5em;
123
+ }
124
+
125
+ #preamble {
126
+ margin-top: 1.5em;
127
+ margin-bottom: 1.5em;
128
+ }
129
+ div.imageblock, div.exampleblock, div.verseblock,
130
+ div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
131
+ div.admonitionblock {
132
+ margin-top: 1.0em;
133
+ margin-bottom: 1.5em;
134
+ }
135
+ div.admonitionblock {
136
+ margin-top: 2.0em;
137
+ margin-bottom: 2.0em;
138
+ margin-right: 10%;
139
+ color: #606060;
140
+ }
141
+
142
+ div.content { /* Block element content. */
143
+ padding: 0;
144
+ }
145
+
146
+ /* Block element titles. */
147
+ div.title, caption.title {
148
+ color: #527bbd;
149
+ font-weight: bold;
150
+ text-align: left;
151
+ margin-top: 1.0em;
152
+ margin-bottom: 0.5em;
153
+ }
154
+ div.title + * {
155
+ margin-top: 0;
156
+ }
157
+
158
+ td div.title:first-child {
159
+ margin-top: 0.0em;
160
+ }
161
+ div.content div.title:first-child {
162
+ margin-top: 0.0em;
163
+ }
164
+ div.content + div.title {
165
+ margin-top: 0.0em;
166
+ }
167
+
168
+ div.sidebarblock > div.content {
169
+ background: #ffffee;
170
+ border: 1px solid #dddddd;
171
+ border-left: 4px solid #f0f0f0;
172
+ padding: 0.5em;
173
+ }
174
+
175
+ div.listingblock > div.content {
176
+ border: 1px solid #dddddd;
177
+ border-left: 5px solid #f0f0f0;
178
+ background: #f8f8f8;
179
+ padding: 0.5em;
180
+ }
181
+
182
+ div.quoteblock, div.verseblock {
183
+ padding-left: 1.0em;
184
+ margin-left: 1.0em;
185
+ margin-right: 10%;
186
+ border-left: 5px solid #f0f0f0;
187
+ color: #888;
188
+ }
189
+
190
+ div.quoteblock > div.attribution {
191
+ padding-top: 0.5em;
192
+ text-align: right;
193
+ }
194
+
195
+ div.verseblock > pre.content {
196
+ font-family: inherit;
197
+ font-size: inherit;
198
+ }
199
+ div.verseblock > div.attribution {
200
+ padding-top: 0.75em;
201
+ text-align: left;
202
+ }
203
+ /* DEPRECATED: Pre version 8.2.7 verse style literal block. */
204
+ div.verseblock + div.attribution {
205
+ text-align: left;
206
+ }
207
+
208
+ div.admonitionblock .icon {
209
+ vertical-align: top;
210
+ font-size: 1.1em;
211
+ font-weight: bold;
212
+ text-decoration: underline;
213
+ color: #527bbd;
214
+ padding-right: 0.5em;
215
+ }
216
+ div.admonitionblock td.content {
217
+ padding-left: 0.5em;
218
+ border-left: 3px solid #dddddd;
219
+ }
220
+
221
+ div.exampleblock > div.content {
222
+ border-left: 3px solid #dddddd;
223
+ padding-left: 0.5em;
224
+ }
225
+
226
+ div.imageblock div.content { padding-left: 0; }
227
+ span.image img { border-style: none; vertical-align: text-bottom; }
228
+ a.image:visited { color: white; }
229
+
230
+ dl {
231
+ margin-top: 0.8em;
232
+ margin-bottom: 0.8em;
233
+ }
234
+ dt {
235
+ margin-top: 0.5em;
236
+ margin-bottom: 0;
237
+ font-style: normal;
238
+ color: navy;
239
+ }
240
+ dd > *:first-child {
241
+ margin-top: 0.1em;
242
+ }
243
+
244
+ ul, ol {
245
+ list-style-position: outside;
246
+ }
247
+ ol.arabic {
248
+ list-style-type: decimal;
249
+ }
250
+ ol.loweralpha {
251
+ list-style-type: lower-alpha;
252
+ }
253
+ ol.upperalpha {
254
+ list-style-type: upper-alpha;
255
+ }
256
+ ol.lowerroman {
257
+ list-style-type: lower-roman;
258
+ }
259
+ ol.upperroman {
260
+ list-style-type: upper-roman;
261
+ }
262
+
263
+ div.compact ul, div.compact ol,
264
+ div.compact p, div.compact p,
265
+ div.compact div, div.compact div {
266
+ margin-top: 0.1em;
267
+ margin-bottom: 0.1em;
268
+ }
269
+
270
+ tfoot {
271
+ font-weight: bold;
272
+ }
273
+ td > div.verse {
274
+ white-space: pre;
275
+ }
276
+
277
+ div.hdlist {
278
+ margin-top: 0.8em;
279
+ margin-bottom: 0.8em;
280
+ }
281
+ div.hdlist tr {
282
+ padding-bottom: 15px;
283
+ }
284
+ dt.hdlist1.strong, td.hdlist1.strong {
285
+ font-weight: bold;
286
+ }
287
+ td.hdlist1 {
288
+ vertical-align: top;
289
+ font-style: normal;
290
+ padding-right: 0.8em;
291
+ color: navy;
292
+ }
293
+ td.hdlist2 {
294
+ vertical-align: top;
295
+ }
296
+ div.hdlist.compact tr {
297
+ margin: 0;
298
+ padding-bottom: 0;
299
+ }
300
+
301
+ .comment {
302
+ background: yellow;
303
+ }
304
+
305
+ .footnote, .footnoteref {
306
+ font-size: 0.8em;
307
+ }
308
+
309
+ span.footnote, span.footnoteref {
310
+ vertical-align: super;
311
+ }
312
+
313
+ #footnotes {
314
+ margin: 20px 0 20px 0;
315
+ padding: 7px 0 0 0;
316
+ }
317
+
318
+ #footnotes div.footnote {
319
+ margin: 0 0 5px 0;
320
+ }
321
+
322
+ #footnotes hr {
323
+ border: none;
324
+ border-top: 1px solid silver;
325
+ height: 1px;
326
+ text-align: left;
327
+ margin-left: 0;
328
+ width: 20%;
329
+ min-width: 100px;
330
+ }
331
+
332
+ div.colist td {
333
+ padding-right: 0.5em;
334
+ padding-bottom: 0.3em;
335
+ vertical-align: top;
336
+ }
337
+ div.colist td img {
338
+ margin-top: 0.3em;
339
+ }
340
+
341
+ @media print {
342
+ #footer-badges { display: none; }
343
+ }
344
+
345
+ #toc {
346
+ margin-bottom: 2.5em;
347
+ }
348
+
349
+ #toctitle {
350
+ color: #527bbd;
351
+ font-size: 1.1em;
352
+ font-weight: bold;
353
+ margin-top: 1.0em;
354
+ margin-bottom: 0.1em;
355
+ }
356
+
357
+ div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
358
+ margin-top: 0;
359
+ margin-bottom: 0;
360
+ }
361
+ div.toclevel2 {
362
+ margin-left: 2em;
363
+ font-size: 0.9em;
364
+ }
365
+ div.toclevel3 {
366
+ margin-left: 4em;
367
+ font-size: 0.9em;
368
+ }
369
+ div.toclevel4 {
370
+ margin-left: 6em;
371
+ font-size: 0.9em;
372
+ }
373
+
374
+ span.aqua { color: aqua; }
375
+ span.black { color: black; }
376
+ span.blue { color: blue; }
377
+ span.fuchsia { color: fuchsia; }
378
+ span.gray { color: gray; }
379
+ span.green { color: green; }
380
+ span.lime { color: lime; }
381
+ span.maroon { color: maroon; }
382
+ span.navy { color: navy; }
383
+ span.olive { color: olive; }
384
+ span.purple { color: purple; }
385
+ span.red { color: red; }
386
+ span.silver { color: silver; }
387
+ span.teal { color: teal; }
388
+ span.white { color: white; }
389
+ span.yellow { color: yellow; }
390
+
391
+ span.aqua-background { background: aqua; }
392
+ span.black-background { background: black; }
393
+ span.blue-background { background: blue; }
394
+ span.fuchsia-background { background: fuchsia; }
395
+ span.gray-background { background: gray; }
396
+ span.green-background { background: green; }
397
+ span.lime-background { background: lime; }
398
+ span.maroon-background { background: maroon; }
399
+ span.navy-background { background: navy; }
400
+ span.olive-background { background: olive; }
401
+ span.purple-background { background: purple; }
402
+ span.red-background { background: red; }
403
+ span.silver-background { background: silver; }
404
+ span.teal-background { background: teal; }
405
+ span.white-background { background: white; }
406
+ span.yellow-background { background: yellow; }
407
+
408
+ span.big { font-size: 2em; }
409
+ span.small { font-size: 0.6em; }
410
+
411
+ span.underline { text-decoration: underline; }
412
+ span.overline { text-decoration: overline; }
413
+ span.line-through { text-decoration: line-through; }
414
+
415
+ div.unbreakable { page-break-inside: avoid; }
416
+
417
+
418
+ /*
419
+ * xhtml11 specific
420
+ *
421
+ * */
422
+
423
+ div.tableblock {
424
+ margin-top: 1.0em;
425
+ margin-bottom: 1.5em;
426
+ }
427
+ div.tableblock > table {
428
+ border: 3px solid #527bbd;
429
+ }
430
+ thead, p.table.header {
431
+ font-weight: bold;
432
+ color: #527bbd;
433
+ }
434
+ p.table {
435
+ margin-top: 0;
436
+ }
437
+ /* Because the table frame attribute is overridden by CSS in most browsers. */
438
+ div.tableblock > table[frame="void"] {
439
+ border-style: none;
440
+ }
441
+ div.tableblock > table[frame="hsides"] {
442
+ border-left-style: none;
443
+ border-right-style: none;
444
+ }
445
+ div.tableblock > table[frame="vsides"] {
446
+ border-top-style: none;
447
+ border-bottom-style: none;
448
+ }
449
+
450
+
451
+ /*
452
+ * html5 specific
453
+ *
454
+ * */
455
+
456
+ table.tableblock {
457
+ margin-top: 1.0em;
458
+ margin-bottom: 1.5em;
459
+ }
460
+ thead, p.tableblock.header {
461
+ font-weight: bold;
462
+ color: #527bbd;
463
+ }
464
+ p.tableblock {
465
+ margin-top: 0;
466
+ }
467
+ table.tableblock {
468
+ border-width: 3px;
469
+ border-spacing: 0px;
470
+ border-style: solid;
471
+ border-color: #527bbd;
472
+ border-collapse: collapse;
473
+ }
474
+ th.tableblock, td.tableblock {
475
+ border-width: 1px;
476
+ padding: 4px;
477
+ border-style: solid;
478
+ border-color: #527bbd;
479
+ }
480
+
481
+ table.tableblock.frame-topbot {
482
+ border-left-style: hidden;
483
+ border-right-style: hidden;
484
+ }
485
+ table.tableblock.frame-sides {
486
+ border-top-style: hidden;
487
+ border-bottom-style: hidden;
488
+ }
489
+ table.tableblock.frame-none {
490
+ border-style: hidden;
491
+ }
492
+
493
+ th.tableblock.halign-left, td.tableblock.halign-left {
494
+ text-align: left;
495
+ }
496
+ th.tableblock.halign-center, td.tableblock.halign-center {
497
+ text-align: center;
498
+ }
499
+ th.tableblock.halign-right, td.tableblock.halign-right {
500
+ text-align: right;
501
+ }
502
+
503
+ th.tableblock.valign-top, td.tableblock.valign-top {
504
+ vertical-align: top;
505
+ }
506
+ th.tableblock.valign-middle, td.tableblock.valign-middle {
507
+ vertical-align: middle;
508
+ }
509
+ th.tableblock.valign-bottom, td.tableblock.valign-bottom {
510
+ vertical-align: bottom;
511
+ }
512
+
513
+
514
+ /*
515
+ * manpage specific
516
+ *
517
+ * */
518
+
519
+ body.manpage h1 {
520
+ padding-top: 0.5em;
521
+ padding-bottom: 0.5em;
522
+ border-top: 2px solid silver;
523
+ border-bottom: 2px solid silver;
524
+ }
525
+ body.manpage h2 {
526
+ border-style: none;
527
+ }
528
+ body.manpage div.sectionbody {
529
+ margin-left: 3em;
530
+ }
531
+
532
+ @media print {
533
+ body.manpage div#toc { display: none; }
534
+ }
535
+
536
+
537
+ </style>
538
+ <script type="text/javascript">
539
+ /*<![CDATA[*/
540
+ var asciidoc = { // Namespace.
541
+
542
+ /////////////////////////////////////////////////////////////////////
543
+ // Table Of Contents generator
544
+ /////////////////////////////////////////////////////////////////////
545
+
546
+ /* Author: Mihai Bazon, September 2002
547
+ * http://students.infoiasi.ro/~mishoo
548
+ *
549
+ * Table Of Content generator
550
+ * Version: 0.4
551
+ *
552
+ * Feel free to use this script under the terms of the GNU General Public
553
+ * License, as long as you do not remove or alter this notice.
554
+ */
555
+
556
+ /* modified by Troy D. Hanson, September 2006. License: GPL */
557
+ /* modified by Stuart Rackham, 2006, 2009. License: GPL */
558
+
559
+ // toclevels = 1..4.
560
+ toc: function (toclevels) {
561
+
562
+ function getText(el) {
563
+ var text = "";
564
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
565
+ if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
566
+ text += i.data;
567
+ else if (i.firstChild != null)
568
+ text += getText(i);
569
+ }
570
+ return text;
571
+ }
572
+
573
+ function TocEntry(el, text, toclevel) {
574
+ this.element = el;
575
+ this.text = text;
576
+ this.toclevel = toclevel;
577
+ }
578
+
579
+ function tocEntries(el, toclevels) {
580
+ var result = new Array;
581
+ var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
582
+ // Function that scans the DOM tree for header elements (the DOM2
583
+ // nodeIterator API would be a better technique but not supported by all
584
+ // browsers).
585
+ var iterate = function (el) {
586
+ for (var i = el.firstChild; i != null; i = i.nextSibling) {
587
+ if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
588
+ var mo = re.exec(i.tagName);
589
+ if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
590
+ result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
591
+ }
592
+ iterate(i);
593
+ }
594
+ }
595
+ }
596
+ iterate(el);
597
+ return result;
598
+ }
599
+
600
+ var toc = document.getElementById("toc");
601
+ if (!toc) {
602
+ return;
603
+ }
604
+
605
+ // Delete existing TOC entries in case we're reloading the TOC.
606
+ var tocEntriesToRemove = [];
607
+ var i;
608
+ for (i = 0; i < toc.childNodes.length; i++) {
609
+ var entry = toc.childNodes[i];
610
+ if (entry.nodeName.toLowerCase() == 'div'
611
+ && entry.getAttribute("class")
612
+ && entry.getAttribute("class").match(/^toclevel/))
613
+ tocEntriesToRemove.push(entry);
614
+ }
615
+ for (i = 0; i < tocEntriesToRemove.length; i++) {
616
+ toc.removeChild(tocEntriesToRemove[i]);
617
+ }
618
+
619
+ // Rebuild TOC entries.
620
+ var entries = tocEntries(document.getElementById("content"), toclevels);
621
+ for (var i = 0; i < entries.length; ++i) {
622
+ var entry = entries[i];
623
+ if (entry.element.id == "")
624
+ entry.element.id = "_toc_" + i;
625
+ var a = document.createElement("a");
626
+ a.href = "#" + entry.element.id;
627
+ a.appendChild(document.createTextNode(entry.text));
628
+ var div = document.createElement("div");
629
+ div.appendChild(a);
630
+ div.className = "toclevel" + entry.toclevel;
631
+ toc.appendChild(div);
632
+ }
633
+ if (entries.length == 0)
634
+ toc.parentNode.removeChild(toc);
635
+ },
636
+
637
+
638
+ /////////////////////////////////////////////////////////////////////
639
+ // Footnotes generator
640
+ /////////////////////////////////////////////////////////////////////
641
+
642
+ /* Based on footnote generation code from:
643
+ * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
644
+ */
645
+
646
+ footnotes: function () {
647
+ // Delete existing footnote entries in case we're reloading the footnodes.
648
+ var i;
649
+ var noteholder = document.getElementById("footnotes");
650
+ if (!noteholder) {
651
+ return;
652
+ }
653
+ var entriesToRemove = [];
654
+ for (i = 0; i < noteholder.childNodes.length; i++) {
655
+ var entry = noteholder.childNodes[i];
656
+ if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
657
+ entriesToRemove.push(entry);
658
+ }
659
+ for (i = 0; i < entriesToRemove.length; i++) {
660
+ noteholder.removeChild(entriesToRemove[i]);
661
+ }
662
+
663
+ // Rebuild footnote entries.
664
+ var cont = document.getElementById("content");
665
+ var spans = cont.getElementsByTagName("span");
666
+ var refs = {};
667
+ var n = 0;
668
+ for (i=0; i<spans.length; i++) {
669
+ if (spans[i].className == "footnote") {
670
+ n++;
671
+ var note = spans[i].getAttribute("data-note");
672
+ if (!note) {
673
+ // Use [\s\S] in place of . so multi-line matches work.
674
+ // Because JavaScript has no s (dotall) regex flag.
675
+ note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
676
+ spans[i].innerHTML =
677
+ "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
678
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
679
+ spans[i].setAttribute("data-note", note);
680
+ }
681
+ noteholder.innerHTML +=
682
+ "<div class='footnote' id='_footnote_" + n + "'>" +
683
+ "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
684
+ n + "</a>. " + note + "</div>";
685
+ var id =spans[i].getAttribute("id");
686
+ if (id != null) refs["#"+id] = n;
687
+ }
688
+ }
689
+ if (n == 0)
690
+ noteholder.parentNode.removeChild(noteholder);
691
+ else {
692
+ // Process footnoterefs.
693
+ for (i=0; i<spans.length; i++) {
694
+ if (spans[i].className == "footnoteref") {
695
+ var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
696
+ href = href.match(/#.*/)[0]; // Because IE return full URL.
697
+ n = refs[href];
698
+ spans[i].innerHTML =
699
+ "[<a href='#_footnote_" + n +
700
+ "' title='View footnote' class='footnote'>" + n + "</a>]";
701
+ }
702
+ }
703
+ }
704
+ },
705
+
706
+ install: function(toclevels) {
707
+ var timerId;
708
+
709
+ function reinstall() {
710
+ asciidoc.footnotes();
711
+ if (toclevels) {
712
+ asciidoc.toc(toclevels);
713
+ }
714
+ }
715
+
716
+ function reinstallAndRemoveTimer() {
717
+ clearInterval(timerId);
718
+ reinstall();
719
+ }
720
+
721
+ timerId = setInterval(reinstall, 500);
722
+ if (document.addEventListener)
723
+ document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
724
+ else
725
+ window.onload = reinstallAndRemoveTimer;
726
+ }
727
+
728
+ }
729
+ asciidoc.install();
730
+ /*]]>*/
731
+ </script>
732
+ </head>
733
+ <body class="manpage">
734
+ <div id="header">
735
+ <h1>
736
+ DAWG2WORDLIST(1) Manual Page
737
+ </h1>
738
+ <h2>NAME</h2>
739
+ <div class="sectionbody">
740
+ <p>dawg2wordlist -
741
+ convert a Tesseract DAWG to a wordlist
742
+ </p>
743
+ </div>
744
+ </div>
745
+ <div id="content">
746
+ <div class="sect1">
747
+ <h2 id="_synopsis">SYNOPSIS</h2>
748
+ <div class="sectionbody">
749
+ <div class="paragraph"><p><strong>dawg2wordlist</strong> <em>UNICHARSET</em> <em>DAWG</em> <em>WORDLIST</em></p></div>
750
+ </div>
751
+ </div>
752
+ <div class="sect1">
753
+ <h2 id="_description">DESCRIPTION</h2>
754
+ <div class="sectionbody">
755
+ <div class="paragraph"><p>dawg2wordlist(1) converts a Tesseract Directed Acyclic Word
756
+ Graph (DAWG) to a list of words using a unicharset as key.</p></div>
757
+ </div>
758
+ </div>
759
+ <div class="sect1">
760
+ <h2 id="_options">OPTIONS</h2>
761
+ <div class="sectionbody">
762
+ <div class="paragraph"><p><em>UNICHARSET</em>
763
+ The unicharset of the language. This is the unicharset
764
+ generated by mftraining(1).</p></div>
765
+ <div class="paragraph"><p><em>DAWG</em>
766
+ The input DAWG, created by wordlist2dawg(1)</p></div>
767
+ <div class="paragraph"><p><em>WORDLIST</em>
768
+ Plain text (output) file in UTF-8, one word per line</p></div>
769
+ </div>
770
+ </div>
771
+ <div class="sect1">
772
+ <h2 id="_see_also">SEE ALSO</h2>
773
+ <div class="sectionbody">
774
+ <div class="paragraph"><p>tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5),
775
+ combine_tessdata(1)</p></div>
776
+ <div class="paragraph"><p><a href="https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html">https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html</a></p></div>
777
+ </div>
778
+ </div>
779
+ <div class="sect1">
780
+ <h2 id="_copying">COPYING</h2>
781
+ <div class="sectionbody">
782
+ <div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
783
+ Licensed under the Apache License, Version 2.0</p></div>
784
+ </div>
785
+ </div>
786
+ <div class="sect1">
787
+ <h2 id="_author">AUTHOR</h2>
788
+ <div class="sectionbody">
789
+ <div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
790
+ at Hewlett Packard (1985-1995) and Google (2006-2018).</p></div>
791
+ </div>
792
+ </div>
793
+ </div>
794
+ <div id="footnotes"><hr></div>
795
+ <div id="footer">
796
+ <div id="footer-text">
797
+ Last updated
798
+ 2024-05-03 17:30:23 CEST
799
+ </div>
800
+ </div>
801
+ </body>
802
+ </html>
dawg2wordlist.exe ADDED
Binary file (575 kB). View file
 
doc/AUTHORS ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Ray Smith (lead developer) <[email protected]>
2
+ Ahmad Abdulkader
3
+ Rika Antonova
4
+ Nicholas Beato
5
+ Jeff Breidenbach
6
+ Samuel Charron
7
+ Phil Cheatle
8
+ Simon Crouch
9
+ David Eger
10
+ Sheelagh Huddleston
11
+ Dan Johnson
12
+ Rajesh Katikam
13
+ Thomas Kielbus
14
+ Dar-Shyang Lee
15
+ Zongyi (Joe) Liu
16
+ Robert Moss
17
+ Chris Newton
18
+ Michael Reimer
19
+ Marius Renn
20
+ Raquel Romano
21
+ Christy Russon
22
+ Shobhit Saxena
23
+ Mark Seaman
24
+ Faisal Shafait
25
+ Hiroshi Takenaka
26
+ Ranjith Unnikrishnan
27
+ Joern Wanke
28
+ Ping Ping Xiu
29
+ Andrew Ziem
30
+ Oscar Zuniga
31
+
32
+ Community Contributors:
33
+ Zdenko Podobný (Maintainer)
34
+ Jim Regan (Maintainer)
35
+ James R Barlow
36
+ Stefan Brechtken
37
+ Thomas Breuel
38
+ Amit Dovev
39
+ Martin Ettl
40
+ Shree Devi Kumar
41
+ Noah Metzger
42
+ Tom Morris
43
+ Tobias Müller
44
+ Egor Pugin
45
+ Robert Sachunsky
46
+ Raf Schietekat
47
+ Sundar M. Vaidya
48
+ Robin Watts
49
+ Stefan Weil
50
+ Nick White
51
+ Alexander Zaitsev
doc/LICENSE ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright [yyyy] [name of copyright owner]
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ http://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
doc/README.md ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Tesseract OCR
2
+
3
+ [![Build status](https://ci.appveyor.com/api/projects/status/miah0ikfsf0j3819/branch/master?svg=true)](https://ci.appveyor.com/project/zdenop/tesseract/)
4
+ [![Build status](https://github.com/tesseract-ocr/tesseract/actions/workflows/sw.yml/badge.svg)](https://github.com/tesseract-ocr/tesseract/actions/workflows/sw.yml)\
5
+ [![Coverity Scan Build Status](https://scan.coverity.com/projects/tesseract-ocr/badge.svg)](https://scan.coverity.com/projects/tesseract-ocr)
6
+ [![CodeQL](https://github.com/tesseract-ocr/tesseract/workflows/CodeQL/badge.svg)](https://github.com/tesseract-ocr/tesseract/security/code-scanning)
7
+ [![OSS-Fuzz](https://img.shields.io/badge/oss--fuzz-fuzzing-brightgreen)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=2&q=proj:tesseract-ocr)
8
+ \
9
+ [![GitHub license](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](https://raw.githubusercontent.com/tesseract-ocr/tesseract/main/LICENSE)
10
+ [![Downloads](https://img.shields.io/badge/download-all%20releases-brightgreen.svg)](https://github.com/tesseract-ocr/tesseract/releases/)
11
+
12
+ ## Table of Contents
13
+
14
+ * [Tesseract OCR](#tesseract-ocr)
15
+ * [About](#about)
16
+ * [Brief history](#brief-history)
17
+ * [Installing Tesseract](#installing-tesseract)
18
+ * [Running Tesseract](#running-tesseract)
19
+ * [For developers](#for-developers)
20
+ * [Support](#support)
21
+ * [License](#license)
22
+ * [Dependencies](#dependencies)
23
+ * [Latest Version of README](#latest-version-of-readme)
24
+
25
+ ## About
26
+
27
+ This package contains an **OCR engine** - `libtesseract` and a **command line program** - `tesseract`.
28
+
29
+ Tesseract 4 adds a new neural net (LSTM) based [OCR engine](https://en.wikipedia.org/wiki/Optical_character_recognition) which is focused on line recognition, but also still supports the legacy Tesseract OCR engine of Tesseract 3 which works by recognizing character patterns. Compatibility with Tesseract 3 is enabled by using the Legacy OCR Engine mode (--oem 0).
30
+ It also needs [traineddata](https://tesseract-ocr.github.io/tessdoc/Data-Files.html) files which support the legacy engine, for example those from the [tessdata](https://github.com/tesseract-ocr/tessdata) repository.
31
+
32
+ Stefan Weil is the current lead developer. Ray Smith was the lead developer until 2018. The maintainer is Zdenko Podobny. For a list of contributors see [AUTHORS](https://github.com/tesseract-ocr/tesseract/blob/main/AUTHORS)
33
+ and GitHub's log of [contributors](https://github.com/tesseract-ocr/tesseract/graphs/contributors).
34
+
35
+ Tesseract has **unicode (UTF-8) support**, and can **recognize [more than 100 languages](https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html)** "out of the box".
36
+
37
+ Tesseract supports **[various image formats](https://tesseract-ocr.github.io/tessdoc/InputFormats)** including PNG, JPEG and TIFF.
38
+
39
+ Tesseract supports **various output formats**: plain text, hOCR (HTML), PDF, invisible-text-only PDF, TSV, ALTO and PAGE.
40
+
41
+ You should note that in many cases, in order to get better OCR results, you'll need to **[improve the quality](https://tesseract-ocr.github.io/tessdoc/ImproveQuality.html) of the image** you are giving Tesseract.
42
+
43
+ This project **does not include a GUI application**. If you need one, please see the [3rdParty](https://tesseract-ocr.github.io/tessdoc/User-Projects-%E2%80%93-3rdParty.html) documentation.
44
+
45
+ Tesseract **can be trained to recognize other languages**.
46
+ See [Tesseract Training](https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html) for more information.
47
+
48
+ ## Brief history
49
+
50
+ Tesseract was originally developed at Hewlett-Packard Laboratories Bristol UK and at Hewlett-Packard Co, Greeley Colorado USA between 1985 and 1994, with some more changes made in 1996 to port to Windows, and some C++izing in 1998. In 2005 Tesseract was open sourced by HP. From 2006 until November 2018 it was developed by Google.
51
+
52
+ Major version 5 is the current stable version and started with release
53
+ [5.0.0](https://github.com/tesseract-ocr/tesseract/releases/tag/5.0.0) on November 30, 2021. Newer minor versions and bugfix versions are available from
54
+ [GitHub](https://github.com/tesseract-ocr/tesseract/releases/).
55
+
56
+ Latest source code is available from [main branch on GitHub](https://github.com/tesseract-ocr/tesseract/tree/main).
57
+ Open issues can be found in [issue tracker](https://github.com/tesseract-ocr/tesseract/issues),
58
+ and [planning documentation](https://tesseract-ocr.github.io/tessdoc/Planning.html).
59
+
60
+ See **[Release Notes](https://tesseract-ocr.github.io/tessdoc/ReleaseNotes.html)**
61
+ and **[Change Log](https://github.com/tesseract-ocr/tesseract/blob/main/ChangeLog)** for more details of the releases.
62
+
63
+ ## Installing Tesseract
64
+
65
+ You can either [Install Tesseract via pre-built binary package](https://tesseract-ocr.github.io/tessdoc/Installation.html)
66
+ or [build it from source](https://tesseract-ocr.github.io/tessdoc/Compiling.html).
67
+
68
+ Before building Tesseract from source, please check that your system has a compiler which is one of the [supported compilers](https://tesseract-ocr.github.io/tessdoc/supported-compilers.html).
69
+
70
+ ## Running Tesseract
71
+
72
+ Basic **[command line usage](https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html)**:
73
+
74
+ tesseract imagename outputbase [-l lang] [--oem ocrenginemode] [--psm pagesegmode] [configfiles...]
75
+
76
+ For more information about the various command line options use `tesseract --help` or `man tesseract`.
77
+
78
+ Examples can be found in the [documentation](https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html#simplest-invocation-to-ocr-an-image).
79
+
80
+ ## For developers
81
+
82
+ Developers can use `libtesseract` [C](https://github.com/tesseract-ocr/tesseract/blob/main/include/tesseract/capi.h) or
83
+ [C++](https://github.com/tesseract-ocr/tesseract/blob/main/include/tesseract/baseapi.h) API to build their own application. If you need bindings to `libtesseract` for other programming languages, please see the
84
+ [wrapper](https://tesseract-ocr.github.io/tessdoc/AddOns.html#tesseract-wrappers) section in the AddOns documentation.
85
+
86
+ Documentation of Tesseract generated from source code by doxygen can be found on [tesseract-ocr.github.io](https://tesseract-ocr.github.io/).
87
+
88
+ ## Support
89
+
90
+ Before you submit an issue, please review **[the guidelines for this repository](https://github.com/tesseract-ocr/tesseract/blob/main/CONTRIBUTING.md)**.
91
+
92
+ For support, first read the [documentation](https://tesseract-ocr.github.io/tessdoc/),
93
+ particularly the [FAQ](https://tesseract-ocr.github.io/tessdoc/FAQ.html) to see if your problem is addressed there.
94
+ If not, search the [Tesseract user forum](https://groups.google.com/g/tesseract-ocr), the [Tesseract developer forum](https://groups.google.com/g/tesseract-dev) and [past issues](https://github.com/tesseract-ocr/tesseract/issues), and if you still can't find what you need, ask for support in the mailing-lists.
95
+
96
+ Mailing-lists:
97
+
98
+ * [tesseract-ocr](https://groups.google.com/g/tesseract-ocr) - For tesseract users.
99
+ * [tesseract-dev](https://groups.google.com/g/tesseract-dev) - For tesseract developers.
100
+
101
+ Please report an issue only for a **bug**, not for asking questions.
102
+
103
+ ## License
104
+
105
+ The code in this repository is licensed under the Apache License, Version 2.0 (the "License");
106
+ you may not use this file except in compliance with the License.
107
+ You may obtain a copy of the License at
108
+
109
+ http://www.apache.org/licenses/LICENSE-2.0
110
+
111
+ Unless required by applicable law or agreed to in writing, software
112
+ distributed under the License is distributed on an "AS IS" BASIS,
113
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
114
+ See the License for the specific language governing permissions and
115
+ limitations under the License.
116
+
117
+ **NOTE**: This software depends on other packages that may be licensed under different open source licenses.
118
+
119
+ Tesseract uses [Leptonica library](http://leptonica.com/) which essentially
120
+ uses a [BSD 2-clause license](http://leptonica.com/about-the-license.html).
121
+
122
+ ## Dependencies
123
+
124
+ Tesseract uses [Leptonica library](https://github.com/DanBloomberg/leptonica)
125
+ for opening input images (e.g. not documents like pdf).
126
+ It is suggested to use leptonica with built-in support for [zlib](https://zlib.net),
127
+ [png](https://sourceforge.net/projects/libpng) and
128
+ [tiff](http://www.simplesystems.org/libtiff) (for multipage tiff).
129
+
130
+ ## Latest Version of README
131
+
132
+ For the latest online version of the README.md see:
133
+
134
+ <https://github.com/tesseract-ocr/tesseract/blob/main/README.md>
libLerc.dll ADDED
Binary file (761 kB). View file
 
libarchive-13.dll ADDED
Binary file (769 kB). View file
 
libb2-1.dll ADDED
Binary file (34.4 kB). View file
 
libbrotlicommon.dll ADDED
Binary file (143 kB). View file
 
libbrotlidec.dll ADDED
Binary file (60.4 kB). View file
 
libbz2-1.dll ADDED
Binary file (101 kB). View file
 
libcairo-2.dll ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a30ed066ebbb5ed6e97da2c119d61a24e4135518c18f8cd8c61299a5182e720
3
+ size 1207571
libcrypto-3-x64.dll ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dbc1531aa728e1e5d1b11e2927882b8ee2a12bacb2495f04f0d248d5be82cd1
3
+ size 5094016
libcurl-4.dll ADDED
Binary file (752 kB). View file
 
libdatrie-1.dll ADDED
Binary file (34.4 kB). View file
 
libdeflate.dll ADDED
Binary file (91 kB). View file
 
libexpat-1.dll ADDED
Binary file (189 kB). View file
 
libffi-8.dll ADDED
Binary file (33.7 kB). View file
 
libfontconfig-1.dll ADDED
Binary file (350 kB). View file
 
libfreetype-6.dll ADDED
Binary file (777 kB). View file
 
libfribidi-0.dll ADDED
Binary file (150 kB). View file
 
libgcc_s_seh-1.dll ADDED
Binary file (117 kB). View file
 
libgif-7.dll ADDED
Binary file (41.4 kB). View file
 
libgio-2.0-0.dll ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ec4f4ddbc2384dd8a0d27c6d559bc7034b390c6bfba1ef89b755c2291cd8ef9
3
+ size 1800402
libglib-2.0-0.dll ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:876cf35b494d95ab012e7220208501882d5f40eb7c3f421b7a92b05ff9e8c3f9
3
+ size 1448224
libgmodule-2.0-0.dll ADDED
Binary file (26.8 kB). View file
 
libgobject-2.0-0.dll ADDED
Binary file (358 kB). View file
 
libgraphite2.dll ADDED
Binary file (153 kB). View file
 
libharfbuzz-0.dll ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ccf58a209c175b4d8bc583e4866e1fb55414effcddebd5f8b2ea5d2feacb0af
3
+ size 1291986
libiconv-2.dll ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:967189adfbc889fde89aafc867f7a1f02731f8592cf6fd5a4ace1929213e2e13
3
+ size 1118202
libicudt74.dll ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90a20195f28e27f65709211c8fa02af5197038f5746692c49b34f7f1bdd98931
3
+ size 30796806
libicuin74.dll ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69328f8c98dc27ee26bba561cd9b6077a63dcfca88d601ade3bb50143a171d2e
3
+ size 2958241
libicuuc74.dll ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faea09e5599364842e832ab3f196ad2f166bada760124d10b2acc67c1c06d55c
3
+ size 1839550
libidn2-0.dll ADDED
Binary file (243 kB). View file
 
libintl-8.dll ADDED
Binary file (189 kB). View file
 
libjbig-0.dll ADDED
Binary file (62.1 kB). View file
 
libjpeg-8.dll ADDED
Binary file (929 kB). View file
 
libleptonica-6.dll ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26861baff4d25446a03ed1f6064d2eae7435eddf51bb36af4e634c0eb4eb3af0
3
+ size 2721006
liblz4.dll ADDED
Binary file (147 kB). View file