File size: 9,481 Bytes
b028d48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
###########################
# Baseline ATB Newswire Datasets
#
# This file creates the three data sets used in the current
# line of Arabic parsing research:
#
#   (1) Raw (no Bies mapping) / Unvocalized  ("Raw")
#   (2) Bies + DT / Unvocalized              ("Unvoc")
#   (3) Bies + DT / Vocalized                ("Voc")
#   (4) Bies + DT / Unvocalized              ("NoDashTags")
#       -No traces or phrasal tag decorations. For training the Berkeley parser.
#
# Note that "Bies + DT" refers to the enhancement to the Bies mappings
# proposed by Kulick et al. (2006).
#
# The training/dev/test set is the "Mona Diab split" from the 2005 JHU
# workshop on parsing Arabic dialects (Chiang et al., 2006).
#
#
# IMPORTANT: All paths should reference the base Arabic data directory
#
#       /u/nlp/data/Arabic
#
###########################

NAME=1 Raw Train
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/train
OUTPUT_ENCODING=UTF8
FLAT=true

;;

NAME=1 Raw Dev
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/dev
OUTPUT_ENCODING=UTF8
FLAT=true

;;

NAME=1 Raw Test
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/test
OUTPUT_ENCODING=UTF8
FLAT=true

;;

NAME=2 Unvoc All
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
USEDET=true

;;

NAME=2 Unvoc Train
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/train
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
USEDET=true

;;

NAME=2 Unvoc Dev
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/dev
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
USEDET=true

;;

NAME=2 Unvoc Test
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/test
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
USEDET=true

;;

NAME=3 Voc Train
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/with-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/with-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/with-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/train
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
USEDET=true
LEXMAPPER=edu.stanford.nlp.international.arabic.pipeline.UnvocLexicalMapper
FLAT=true

;;

NAME=3 Voc Dev
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/with-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/with-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/with-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/dev
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
USEDET=true
LEXMAPPER=edu.stanford.nlp.international.arabic.pipeline.UnvocLexicalMapper
FLAT=true

;;

NAME=3 Voc Test
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/with-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/with-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/with-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/test
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
USEDET=true
LEXMAPPER=edu.stanford.nlp.international.arabic.pipeline.UnvocLexicalMapper
FLAT=true

;;


NAME=4 Unvoc Train NoDashTags
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/train
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
NODASHTAGS=true
ADDROOT=true
USEDET=true

;;

NAME=4 Unvoc Dev NoDashTags
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/dev
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
NODASHTAGS=true
ADDROOT=true
USEDET=true

;;

NAME=4 Unvoc Test NoDashTags
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/test
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
NODASHTAGS=true
ADDROOT=true
USEDET=true

;;