indiejoseph commited on
Commit
cf05cbe
1 Parent(s): 1890488

Update text/cantonese.py

Browse files
Files changed (1) hide show
  1. text/cantonese.py +8 -5
text/cantonese.py CHANGED
@@ -177,10 +177,16 @@ def get_jyutping(text):
177
 
178
  words = word_segmentation(text)
179
  jyutping_array = []
 
 
180
 
181
  for word in words:
182
- if word in punctuation:
183
- jyutping_array.append(word)
 
 
 
 
184
  else:
185
  jyutpings = ""
186
 
@@ -189,9 +195,6 @@ def get_jyutping(text):
189
  else:
190
  jyutpings = word2jyutping(word)
191
 
192
- if 'la1' in jyutpings:
193
- print(text, words, jyutpings)
194
-
195
  # match multple jyutping eg: liu4 ge3, or single jyutping eg: liu4
196
  if not re.search(r"^([a-z]+[1-6]+[ ]?)+$", jyutpings):
197
  raise ValueError(
 
177
 
178
  words = word_segmentation(text)
179
  jyutping_array = []
180
+ punct_pattern = re.compile(
181
+ r"^[{}]+$".format(re.escape("".join(punctuation))))
182
 
183
  for word in words:
184
+ if punct_pattern.match(word):
185
+ puncts = re.split(r"([{}])".format(
186
+ re.escape("".join(punctuation))), word)
187
+ for punct in puncts:
188
+ if len(punct) > 0:
189
+ jyutping_array.append(punct)
190
  else:
191
  jyutpings = ""
192
 
 
195
  else:
196
  jyutpings = word2jyutping(word)
197
 
 
 
 
198
  # match multple jyutping eg: liu4 ge3, or single jyutping eg: liu4
199
  if not re.search(r"^([a-z]+[1-6]+[ ]?)+$", jyutpings):
200
  raise ValueError(