Модул:hi-IPA

Аз Wiktionary

Documentation for this module may be created at Модул:hi-IPA/doc

-- IPA for Hindi (possibly other languages using Devanagari script, except for Sanskrit) in progress! Note: this is very rough, so only use this as a guide - check the outcome!

local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match

local conv = {
	-- consonants
	['क'] = 'k', ['ख'] = 'kʰ', ['ग'] = 'ɡ', ['घ'] = 'ɡʱ', ['ङ'] = 'ŋ',
	['च'] = 't͡ʃ', ['छ'] = 't͡ʃʰ', ['ज'] = 'd͡ʒ', ['झ'] = 'd͡ʒʱ', ['ञ'] = 'ɲ', 
	['ट'] = 'ʈ', ['ठ'] = 'ʈʰ', ['ड'] = 'ɖ', ['ढ'] = 'ɖʱ', ['ण'] = 'ɳ',
	['त'] = 't̪', ['थ'] = 't̪ʰ', ['द'] = 'd̪', ['ध'] = 'd̪ʱ', ['न'] = 'n',
	['प'] = 'p', ['फ'] = 'pʰ', ['ब'] = 'b', ['भ'] = 'bʱ', ['म'] = 'm', 
	['य'] = 'j', ['र'] = 'ɾ', ['ल'] = 'l', ['व'] = 'ʋ', ['ळ'] = 'ɭ̆',
	['श'] = 'ʃ', ['ष'] = 'ʂ', ['स'] = 's', ['ह'] = 'ɦ',
	['क़'] = 'q', ['ख़'] = 'x', ['ग़'] = 'ɣ', ['ऴ'] = 'ḻ',
	['ज़'] = 'z', ['झ़'] = 'ʒ', ['ड़'] = 'ɽ', ['ढ़'] = 'ɽʱ',
	['फ़'] = 'f', ['थ़'] = 'θ', ['ऩ'] = 'n', ['ऱ'] = 'r', 

	-- vowel diacritics
	['ि'] = 'ɪ', ['ु'] = 'ʊ', ['े'] = 'e', ['ो'] = 'o', 
	['ा'] = 'ɑː', ['ी'] = 'iː', ['ू'] = 'uː', 
	['ृ'] = 'ɻʲ',
	['ै'] = 'ɛː', ['ौ'] = 'ɔː',
	['ॉ'] = 'ɒ',
	['ॅ'] = 'ɛ',

	-- vowel signs
	['अ'] = 'ə', ['इ'] = 'ɪ', ['उ'] = 'ʊ', ['ए'] = 'e', ['ओ'] = 'o',
	['आ'] = 'ɑː', ['ई'] = 'iː', ['ऊ'] = 'uː', 
	['ऋ'] = 'ɻʲ', 
	['ऐ'] = 'ɛː', ['औ'] = 'ɔː', 
	['ऑ'] = 'ɒ',
	['ऍ'] = 'ɛ',
	
	-- chandrabindu
	['ँ'] = 'ñ',
	
	-- anusvara
	['ं'] = '̃',
	
	-- visarga
	['ः'] = 'h',
	
	-- virama
	['्'] = '',
	
	-- punctuation
	['।'] = '.', -- danda
	['+'] = '', -- compound separator
}

local nasal_assim = {
	['क'] = 'ङ', ['ख'] = 'ङ', ['ग'] = 'ङ', ['घ'] = 'ङ', 
	['च'] = 'ञ', ['छ'] = 'ञ', ['ज'] = 'ञ', ['झ'] = 'ञ',  
	['ट'] = 'ण', ['ठ'] = 'ण', ['ड'] = 'ण', ['ढ'] = 'ण',
	['प'] = 'म', ['फ'] = 'म', ['ब'] = 'म', ['भ'] = 'म', ['म'] = 'म',
}

local all_cons, special_cons = 'कखगघङचछजझञटठडढतथदधपफबभशषसयरलवहणनम', 'यरलवहनम'
local vowel, vowel_sign = 'aिुृेोाीूैौॉॅ', 'अइउएओआईऊऋऐऔऑऍ'
local syncope_pattern = '([' .. vowel .. vowel_sign .. '])([' .. all_cons .. '])a([' .. gsub(all_cons, "य", "") .. '])(ं?[' .. vowel .. vowel_sign .. '])'

local function rev_string(text)
	local result, length = '', mw.ustring.len(text)
	for i = 1, length do
		result = result .. mw.ustring.sub(text, length - i + 1, length - i + 1)
	end
	return result
end

function export.tr(text, lang, sc)
	text = gsub(text, '([' .. all_cons .. ']़?)([' .. vowel .. '्]?)', function(c, d)
		return c .. (d == "" and 'ə' or d) end)
	local result = {}
	for word in mw.text.gsplit(text, " ", true) do
		word = rev_string(word)
		word = gsub(word, '^ə(़?)([' .. all_cons .. '])(.)', function(opt, first, second)
			return (((match(first, '[' .. special_cons .. ']') and match(second, '्')) or match(first .. second, 'य[ीेै]'))
				and 'ə' or "") .. opt .. first .. second end)
		while match(word, syncope_pattern) do
			word = gsub(word, syncope_pattern, '%1%2%3%4')
		end
		word = gsub(word, '(.?)ं(.)', function(succ, prev)
			return succ .. (succ..prev == "ə" and "्म" or 
				(succ == "" and match(prev, '[' .. vowel .. ']') and "̃" or nasal_assim[succ] or "n")) .. prev end)
		table.insert(result, rev_string(word))
	end
	text = table.concat(result, " ")
	text = gsub(text, '.़?', conv)
	return mw.ustring.toNFC(text)
end

return export