File size: 1,470 Bytes
f0ca36a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import re
from indic_transliteration import sanscript


# List of (iast, ipa) pairs:
_iast_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
    ('a', 'ə'),
    ('ā', 'aː'),
    ('ī', 'iː'),
    ('ū', 'uː'),
    ('ṛ', 'ɹ`'),
    ('ṝ', 'ɹ`ː'),
    ('ḷ', 'l`'),
    ('ḹ', 'l`ː'),
    ('e', 'eː'),
    ('o', 'oː'),
    ('k', 'k⁼'),
    ('k⁼h', 'kʰ'),
    ('g', 'g⁼'),
    ('g⁼h', 'gʰ'),
    ('ṅ', 'ŋ'),
    ('c', 'ʧ⁼'),
    ('ʧ⁼h', 'ʧʰ'),
    ('j', 'ʥ⁼'),
    ('ʥ⁼h', 'ʥʰ'),
    ('ñ', 'n^'),
    ('ṭ', 't`⁼'),
    ('t`⁼h', 't`ʰ'),
    ('ḍ', 'd`⁼'),
    ('d`⁼h', 'd`ʰ'),
    ('ṇ', 'n`'),
    ('t', 't⁼'),
    ('t⁼h', 'tʰ'),
    ('d', 'd⁼'),
    ('d⁼h', 'dʰ'),
    ('p', 'p⁼'),
    ('p⁼h', 'pʰ'),
    ('b', 'b⁼'),
    ('b⁼h', 'bʰ'),
    ('y', 'j'),
    ('ś', 'ʃ'),
    ('ṣ', 's`'),
    ('r', 'ɾ'),
    ('l̤', 'l`'),
    ('h', 'ɦ'),
    ("'", ''),
    ('~', '^'),
    ('ṃ', '^')
]]


def devanagari_to_ipa(text):
    text = text.replace('ॐ', 'ओम्')
    text = re.sub(r'\s*।\s*$', '.', text)
    text = re.sub(r'\s*।\s*', ', ', text)
    text = re.sub(r'\s*॥', '.', text)
    text = sanscript.transliterate(text, sanscript.DEVANAGARI, sanscript.IAST)
    for regex, replacement in _iast_to_ipa:
        text = re.sub(regex, replacement, text)
    text = re.sub('(.)[`ː]*ḥ', lambda x: x.group(0)
                  [:-1]+'h'+x.group(1)+'*', text)
    return text