Spaces:
Running
Running
indiejoseph
commited on
Commit
•
cf05cbe
1
Parent(s):
1890488
Update text/cantonese.py
Browse files- text/cantonese.py +8 -5
text/cantonese.py
CHANGED
@@ -177,10 +177,16 @@ def get_jyutping(text):
|
|
177 |
|
178 |
words = word_segmentation(text)
|
179 |
jyutping_array = []
|
|
|
|
|
180 |
|
181 |
for word in words:
|
182 |
-
if word
|
183 |
-
|
|
|
|
|
|
|
|
|
184 |
else:
|
185 |
jyutpings = ""
|
186 |
|
@@ -189,9 +195,6 @@ def get_jyutping(text):
|
|
189 |
else:
|
190 |
jyutpings = word2jyutping(word)
|
191 |
|
192 |
-
if 'la1' in jyutpings:
|
193 |
-
print(text, words, jyutpings)
|
194 |
-
|
195 |
# match multple jyutping eg: liu4 ge3, or single jyutping eg: liu4
|
196 |
if not re.search(r"^([a-z]+[1-6]+[ ]?)+$", jyutpings):
|
197 |
raise ValueError(
|
|
|
177 |
|
178 |
words = word_segmentation(text)
|
179 |
jyutping_array = []
|
180 |
+
punct_pattern = re.compile(
|
181 |
+
r"^[{}]+$".format(re.escape("".join(punctuation))))
|
182 |
|
183 |
for word in words:
|
184 |
+
if punct_pattern.match(word):
|
185 |
+
puncts = re.split(r"([{}])".format(
|
186 |
+
re.escape("".join(punctuation))), word)
|
187 |
+
for punct in puncts:
|
188 |
+
if len(punct) > 0:
|
189 |
+
jyutping_array.append(punct)
|
190 |
else:
|
191 |
jyutpings = ""
|
192 |
|
|
|
195 |
else:
|
196 |
jyutpings = word2jyutping(word)
|
197 |
|
|
|
|
|
|
|
198 |
# match multple jyutping eg: liu4 ge3, or single jyutping eg: liu4
|
199 |
if not re.search(r"^([a-z]+[1-6]+[ ]?)+$", jyutpings):
|
200 |
raise ValueError(
|