mohdelgaar's picture
upload lng
b028d48
raw
history blame
No virus
9.48 kB
###########################
# Baseline ATB Newswire Datasets
#
# This file creates the three data sets used in the current
# line of Arabic parsing research:
#
# (1) Raw (no Bies mapping) / Unvocalized ("Raw")
# (2) Bies + DT / Unvocalized ("Unvoc")
# (3) Bies + DT / Vocalized ("Voc")
# (4) Bies + DT / Unvocalized ("NoDashTags")
# -No traces or phrasal tag decorations. For training the Berkeley parser.
#
# Note that "Bies + DT" refers to the enhancement to the Bies mappings
# proposed by Kulick et al. (2006).
#
# The training/dev/test set is the "Mona Diab split" from the 2005 JHU
# workshop on parsing Arabic dialects (Chiang et al., 2006).
#
#
# IMPORTANT: All paths should reference the base Arabic data directory
#
# /u/nlp/data/Arabic
#
###########################
NAME=1 Raw Train
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/train
OUTPUT_ENCODING=UTF8
FLAT=true
;;
NAME=1 Raw Dev
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/dev
OUTPUT_ENCODING=UTF8
FLAT=true
;;
NAME=1 Raw Test
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/test
OUTPUT_ENCODING=UTF8
FLAT=true
;;
NAME=2 Unvoc All
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
USEDET=true
;;
NAME=2 Unvoc Train
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/train
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
USEDET=true
;;
NAME=2 Unvoc Dev
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/dev
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
USEDET=true
;;
NAME=2 Unvoc Test
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/test
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
USEDET=true
;;
NAME=3 Voc Train
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/with-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/with-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/with-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/train
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
USEDET=true
LEXMAPPER=edu.stanford.nlp.international.arabic.pipeline.UnvocLexicalMapper
FLAT=true
;;
NAME=3 Voc Dev
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/with-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/with-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/with-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/dev
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
USEDET=true
LEXMAPPER=edu.stanford.nlp.international.arabic.pipeline.UnvocLexicalMapper
FLAT=true
;;
NAME=3 Voc Test
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/with-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/with-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/with-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/test
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
USEDET=true
LEXMAPPER=edu.stanford.nlp.international.arabic.pipeline.UnvocLexicalMapper
FLAT=true
;;
NAME=4 Unvoc Train NoDashTags
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/train
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
NODASHTAGS=true
ADDROOT=true
USEDET=true
;;
NAME=4 Unvoc Dev NoDashTags
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/dev
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
NODASHTAGS=true
ADDROOT=true
USEDET=true
;;
NAME=4 Unvoc Test NoDashTags
TYPE=edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset
PATHp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/data/penntree/without-vowel
PATHp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/data/penntree/without-vowel
PATHp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/data/penntree/without-vowel
SPLIT=/u/nlp/data/Arabic/splits/mona-diab-split/test
OUTPUT_ENCODING=UTF8
MAPPINGp1=/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp2=/u/nlp/data/Arabic/ldc/atb-latest/p2/docs/atb2-v3.0-taglist-conversion-to-PennPOS-forrelease.lisp
MAPPINGp3=/u/nlp/data/Arabic/ldc/atb-latest/p3/docs/atb3-v3.1-taglist-conversion-to-PennPOS-forrelease.lisp
NODASHTAGS=true
ADDROOT=true
USEDET=true
;;