Ҷаҳиш ба мӯҳтаво

Модул:en-utilities

Аз Wiktionary

Documentation for this module may be created at Модул:en-utilities/doc

local export = {}

local require_when_needed = require("Module:require when needed")

local find = string.find
local get_i_stem -- Defined below.
local match = string.match
local reverse = string.reverse
local sub = string.sub
local toNFD = mw.ustring.toNFD
local ugsub = require_when_needed("Module:string utilities", "gsub")
local ulower = require_when_needed("Module:string utilities", "lower")
local umatch = require_when_needed("Module:string utilities", "match")

local vowels, diacritics = "aæᴀᴁɐɑɒ@eᴇǝⱻəɛɘɜɞɤiıɪɨᵻoøœᴏɶɔᴐɵuᴜʉᵾɯꟺʊʋʌyʏ"

-- Normalize the base stem, so that diacritics are ignored.
local function normalize(str)
	if not diacritics then
		diacritics = mw.loadData("Module:headword/data").page.comb_chars.diacritics_all .. "+"
	end
	return ulower(ugsub(toNFD(str), diacritics, ""))
end

-- Returns the stem used for suffixes that sometimes convert final "y"
-- into "i", such as "-es" ("-ies"), e.g. "penny" → "penni" ("pennies").
-- If `replace_ey` is true, final "ey" may also be converted, e.g.
-- "plaguey" → "plagui"; this is needed for "-er" ("-ier") and "-est"
-- ("-iest").
function export.get_i_stem(str, replace_ey)
	if sub(str, -1) ~= "y" then
		return str
	end
	local final2 = sub(str, -2)
	-- If `replace_ey` is true, treat final "-ey" can also be reduced.
	if replace_ey and final2 == "ey" then
		local final3 = sub(str, -3)
		-- Special case: treat "eey" as "ee" + "y" (e.g. "treey" →
		-- "treeiest"). This doesn't apply if `replace_ey` is not true, as
		-- `replace_ey` assumes final "(e)y" is likely to be syllabic.
		if final3 == "eey" then
			return sub(str, 1, -2) .. "i"
		end
		local base_stem = sub(str, 1, -3)
		-- Special case: allow final "-ey" ("potato-ey" → "potato-iest").
		if final3 == "-ey" then
			return base_stem .. "i"
		end
		-- Final "ey" becomes "i" iff the term is polysyllabic (e.g. not
		-- "grey"). "ey" is common if the base stem ends in a vowel
		-- ("echo" → "echoey"), so the presence of a vowel anywhere in the
		-- base stem is sufficient to deem it polysyllabic. ("echoey" →
		-- "echo" → "echoiest", "beigey" → "beig" → "beigiest", but
		-- "grey" → "gr" → "greyest"). The first "y" in "-yey" can be
		-- treated as a vowel as long as it's preceded by something
		-- ("clayey" → "clay" → "clayiest", "cryey" → "cry" → "cryiest"),
		-- so it needs to be treated as a special case.
		local normalized = normalize(base_stem)
		if sub(normalized, -1) == "y" then
			if umatch(normalized, "[%w@][yY]$") then
				return base_stem .. "i"
			end
		elseif umatch(normalized, "[" .. vowels .. "%d]%w*$") then
			return base_stem .. "i"
		end
	-- Special cases: match final "-y" ("bro-y" → "bro-iest"), and "quy"
	-- ("soliloquy" → "soliloquies").
	elseif final2 == "-y" or sub(str, -3) == "quy" then
	-- Otherwise, final "y" becomes "i" iff it's not preceded by a vowel
	-- ("shy" → "shiest", "horsy" → "horsies", but "day" → "days", "coy" →
	-- "coyest").
		return sub(str, 1, -2) .. "i"
	else
		local base_stem = sub(str, 1, -2)
		if umatch(normalize(base_stem), "[^%s%p" .. vowels .. "]$") then
			return base_stem .. "i"
		end
	end
	return str
end
get_i_stem = export.get_i_stem

function export.get_stem(str, replace_ey)
	local final = sub(str, -1)
	if final == "e" then
		return sub(str, 1, -2)
	elseif final == "y" then
		return get_i_stem(str, replace_ey)
	end
	final = match(final, "[bcdfgjklmnpqrstvz]")
	if not final then
		return str
	end
	local initial = umatch(normalize(sub(str, 1, -2)), "([%w%p]*)[" .. vowels .. "]$")
	if not initial then
		return str
	elseif initial == "" or initial == "y" then
		return str .. final
	elseif match(initial, "^.[\128-\191]*$") then
		return umatch(initial, "[^" .. vowels .. "]") and (str .. final) or str
	end
	return umatch(initial, "^[^" .. vowels .. "]*%f[^%w]$") and (str .. final) or str
end

do
	local function word_takes_es_plural(str)
		local char = sub(str, -1)
		if char == "s" or char == "x" or char == "z" then
			return true
		elseif char ~= "h" then
			return false
		end
		char = sub(str, -2, -2)
		return char == "c" or char == "s" or char == "z"
	end
	
	local function do_pluralize(str)
		if word_takes_es_plural(str) then
			return str .. "es"
		end
		-- FIXME, a subrule of rule #1 below says the -ies ending doesn't
		-- apply to proper nouns, hence "the Gettys", "the public Ivys".
		-- We should maybe consider applying this rule here; but it may not
		-- be important as this function is almost always called on common nouns
		-- (e.g. parts of speech, place types).
		local i_stem = get_i_stem(str)
		if i_stem ~= str then
			return i_stem .. "es"
		end
		return str .. "s"
	end	
	
	--[==[
	Pluralize a word in a smart fashion, according to normal English rules.
	# If the word ends in a consonant or "qu" + "-y", replace "-y" with "-ies".
	# If the word ends in "s", "x", "z", "ch", "sh" or "zh", add "-es".
	# Otherwise, add "-s".

	This handles links correctly:
	# If a piped link, change the second part appropriately.
	# If a non-piped link and rule #1 above applies, convert to a piped link with the second part containing the plural.
	# If a non-piped link and rules #2 or #3 above apply, add the plural outside the link.
	]==]
	function export.pluralize(str)
		-- Treat as a link if a "[[" is present and the string ends with "]]".
		if not (find(str, "[[", 1, true) and sub(str, -2) == "]]") then
			return do_pluralize(str)
		end
		-- Find the last "[[" (in case there is more than one) by reversing
		-- the string.
		local str_rev = reverse(str)
		local open = find(str_rev, "[[", 3, true)
		-- If the last "[[" is followed by a "]]" which isn't at the end,
		-- then the final "]]" is just plaintext (e.g. "[[foo]]bar]]").
		local bad_close = find(str_rev, "]]", 3, true)
		-- Note: the bad "]]" will have a lower index than the last "[[" in
		-- the reversed string.
		if bad_close and bad_close < open then
			return do_pluralize(str)
		end
		open = #str - open + 2
		-- Get the target and display text by searching from just after "[[".
		local target, display = match(str, "([^|]*)|?(.*)%]%]$", open)
		display = do_pluralize(display ~= "" and display or target)
		-- If the link target is a substring of the display text, then
		-- use a trail (e.g. "[[foo]]" → "[[foo]]s", since "foo" is a substring
		-- of "foos").
		local index, trail = find(display, target, 1, true)
		if index == 1 then
			return sub(str, 1, open - 1) .. target .. "]]" .. sub(display, trail + 1)
		end
		-- Otherwise, return a piped link.
		return sub(str, 1, open - 1) .. target .. "|" .. display .. "]]"
	end
end

return export