Mahiruoshi
commited on
Commit
•
f0ca36a
1
Parent(s):
96bd1d3
Upload 18 files
Browse files
text/__pycache__/korean.cpython-38.pyc
ADDED
Binary file (5.72 kB). View file
|
|
text/__pycache__/sanskrit.cpython-38.pyc
ADDED
Binary file (1.69 kB). View file
|
|
text/__pycache__/shanghainese.cpython-38.pyc
ADDED
Binary file (2.13 kB). View file
|
|
text/__pycache__/thai.cpython-38.pyc
ADDED
Binary file (1.46 kB). View file
|
|
text/sanskrit.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
from indic_transliteration import sanscript
|
3 |
+
|
4 |
+
|
5 |
+
# List of (iast, ipa) pairs:
|
6 |
+
_iast_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
|
7 |
+
('a', 'ə'),
|
8 |
+
('ā', 'aː'),
|
9 |
+
('ī', 'iː'),
|
10 |
+
('ū', 'uː'),
|
11 |
+
('ṛ', 'ɹ`'),
|
12 |
+
('ṝ', 'ɹ`ː'),
|
13 |
+
('ḷ', 'l`'),
|
14 |
+
('ḹ', 'l`ː'),
|
15 |
+
('e', 'eː'),
|
16 |
+
('o', 'oː'),
|
17 |
+
('k', 'k⁼'),
|
18 |
+
('k⁼h', 'kʰ'),
|
19 |
+
('g', 'g⁼'),
|
20 |
+
('g⁼h', 'gʰ'),
|
21 |
+
('ṅ', 'ŋ'),
|
22 |
+
('c', 'ʧ⁼'),
|
23 |
+
('ʧ⁼h', 'ʧʰ'),
|
24 |
+
('j', 'ʥ⁼'),
|
25 |
+
('ʥ⁼h', 'ʥʰ'),
|
26 |
+
('ñ', 'n^'),
|
27 |
+
('ṭ', 't`⁼'),
|
28 |
+
('t`⁼h', 't`ʰ'),
|
29 |
+
('ḍ', 'd`⁼'),
|
30 |
+
('d`⁼h', 'd`ʰ'),
|
31 |
+
('ṇ', 'n`'),
|
32 |
+
('t', 't⁼'),
|
33 |
+
('t⁼h', 'tʰ'),
|
34 |
+
('d', 'd⁼'),
|
35 |
+
('d⁼h', 'dʰ'),
|
36 |
+
('p', 'p⁼'),
|
37 |
+
('p⁼h', 'pʰ'),
|
38 |
+
('b', 'b⁼'),
|
39 |
+
('b⁼h', 'bʰ'),
|
40 |
+
('y', 'j'),
|
41 |
+
('ś', 'ʃ'),
|
42 |
+
('ṣ', 's`'),
|
43 |
+
('r', 'ɾ'),
|
44 |
+
('l̤', 'l`'),
|
45 |
+
('h', 'ɦ'),
|
46 |
+
("'", ''),
|
47 |
+
('~', '^'),
|
48 |
+
('ṃ', '^')
|
49 |
+
]]
|
50 |
+
|
51 |
+
|
52 |
+
def devanagari_to_ipa(text):
|
53 |
+
text = text.replace('ॐ', 'ओम्')
|
54 |
+
text = re.sub(r'\s*।\s*$', '.', text)
|
55 |
+
text = re.sub(r'\s*।\s*', ', ', text)
|
56 |
+
text = re.sub(r'\s*॥', '.', text)
|
57 |
+
text = sanscript.transliterate(text, sanscript.DEVANAGARI, sanscript.IAST)
|
58 |
+
for regex, replacement in _iast_to_ipa:
|
59 |
+
text = re.sub(regex, replacement, text)
|
60 |
+
text = re.sub('(.)[`ː]*ḥ', lambda x: x.group(0)
|
61 |
+
[:-1]+'h'+x.group(1)+'*', text)
|
62 |
+
return text
|