Module:Fallback/sandbox

From Multilingual Bookbinding Dictionary
Jump to navigation Jump to search

Documentation for this module may be created at Module:Fallback/sandbox/doc

--[[
  __  __           _       _        _____     _ _ _                _    
 |  \/  | ___   __| |_   _| | ___ _|  ___|_ _| | | |__   __ _  ___| | __
 | |\/| |/ _ \ / _` | | | | |/ _ (_) |_ / _` | | | '_ \ / _` |/ __| |/ /
 | |  | | (_) | (_| | |_| | |  __/_|  _| (_| | | | |_) | (_| | (__|   < 
 |_|  |_|\___/ \__,_|\__,_|_|\___(_)_|  \__,_|_|_|_.__/ \__,_|\___|_|\_\
 
 Authors and maintainers:
* User:Zolo   - original version
* User:Jarekt 
* User:Verdy_p - updated for stricter compliance with BCP 47 for the support of fallbacks (see talk page)
]]
--[==[
These are source lists of fallbacks available for language `lang` :
* Local source (on Commons):    _fblist[lang] = mw.loadData('Module:Fallbacklist')[lang]
* Builtin source of MediaWiki:  mw.language.getFallbacksFor(lang)

None of these two sources define a root "default" (which is implicit at end of these lists)

These sources are also still not recursively expanded for BCP 47 conformance.

Beside the "default", there are supplementary languages to try (the default language of the wiki,
and "en" for MediaWiki itself). However MediaWiki implicitly adds "en" at end of all its returned
lists (meant to be used for its own UI whose source of translations is always English), it has to
be removed temporarily before the expansion (if the source language is not "en" or "en-*"), and then
added back again (if it has not been removed) after this expansion and after adding the "default".

To solve this build and export a getFallbacksFor(lang) function in that module,
to replace the function with same signature from the builtin Mediawiki module.
]==]
local _fblist = mw.loadData('Module:Fallbacklist')

--[==[
_removevalue
	Utility: generic function missing in the standard `table` library of Lua.
Parameters:
	t - the array to scan and modify
	value - the value of array entries to remove
]==]
local function _removevalue(t, value)
	for k, v in ipairs(t) do
		if v == value then
			table.remove(t, k)
		end
	end
end

--[==[
_findvalue
	Utility: generic function missing in the standard `table` library of Lua.
Parameters:
	t - the array to scan
	value - the value of array entries to locate
Returns:
	nil if not found, or the key of the first occurence found
]==]
local function _findvalue(t, value)
	for k, v in ipairs(t) do
		if v == value then
			return k
		end
	end
	return nil
end

--[==[
_undesiredTruncatedFallbacks
	List of undesired truncated fallbacks to remove or to truncate more. All other truncated language
	codes are assumed to be useful for translations. Note that the mapped codes may be valid under
	BCP47 (except those ending in '-x') but not useful for translations (they are language families,
	not invidual languages or macrolanguages for which we search fallback translations); we don't
	support them.
]==]
local _undesiredTruncatedFallbacks = {
	['bat'] = '', -- e.g. 'bat-smg'
	['be-x'] = 'be', -- e.g. 'be-x-old'
	['fiu'] = '', -- e.g. 'fiu-vro'
	['map'] = '', -- e.g. 'map-bms'
	['roa'] = '', -- e.g. 'roa-rup', 'roa-tara'
	['zh-min'] = 'zh', -- e.g. 'zh-min-nan'
}

--[==[
getFallbacksFor (alias getfblist)
	Expand a language code in string to an array of language codes with their fallbacks.
	Similar to mw.language.getFallbacksFor(lang) but also takes input from
	the Commons fallback chain and process them in a BCP 47 conforming way.
	The returned array will contain the given language, then their fallbacks,
	adding also their default BCP 47 fallbacks when language codes are variants,
	then 'default', and 'en', without any duplicate language code.
Parameters:
	lang - desired language (often user's native language)
Returns:
	An array of language code expanded by recursively adding their fallbacks extracted
	from the local `Module:Fallbacklist` of Commons or from MediaWiki builtin fallbacks.
Error handling:
]==]
local function getFallbacksFor(lang)
	-- 1. First expand the list with Commons-local fallbacks from _fblist[lang].
	local languages, result, hasEnglish = {lang}, {}, false
	while true do
		-- Extract a language from the array to process from the start.
		local lang = table.remove(languages, 1)
		if lang == nil then
			break
		end
		-- Normalize the given language code to lower case, with dashes separators only, and all spaces trimmed
		lang = string.gsub(string.gsub(string.lower(lang), '_', '-'), ' ','')
		-- Insert it (only once) at end of the result array.
		if _findvalue(result, lang) == nil then
			table.insert(result, lang)
		end
		-- Check if English (or a variant of English) is in source languages: in that case,
		-- the next processing loop will not ignore 'en' found in MediaWiki builtin fallbacks
		-- so it will be found before the final 'default'.
		if lang == 'en' or lang:match('^en%-') then
			hasEnglish = true
		end
		-- Enumerate its local local fallbacks and process them.
		local fallbacks = _fblist[lang]
		if fallbacks then
			for _, lang in ipairs(fallbacks) do
				-- Don't need to process again a language already in the result list.
				if _findvalue(result, lang) == nil then
					-- Don't process now a language still in the list to process
					-- (otherwise the while loop would be infinite).
					if _findvalue(languages, lang) == nil then
						table.insert(languages, lang)
					end
				end
			end
		end
	end
	-- 2. Same thing but expand the list for BCP 47 conformance where it contains language variants with '-'.
	languages, result = result, {}
	while true do
		-- Extract a language from the array to process from the start.
		local lang = table.remove(languages, 1)
		if lang == nil then
			break
		end
		-- Insert it (only once) at end of the result array.
		if _findvalue(result, lang) == nil then
			table.insert(result, lang)
		end
		-- Enumerate its default BCP47 fallbacks (if lang is a variant) and process them.
		local fallbacks
		while true do
			lang, fallbacks = string.gsub(lang, "%-%w*$", "")
			-- no truncated fallback found, or undesired truncated fallbacks (to language families only)
			if fallbacks == 0 or _undesiredTruncatedFallbacks[lang] == '' then
				break
			-- truncated fallback possible, but must be truncated a bit more (there were several variant extensions)
			elseif _undesiredTruncatedFallbacks[lang] ~= nil then
				lang = _undesiredTruncatedFallbacks[lang]
			-- other truncations are safe and recommended (generally these are variants for region codes or script codes after the base language)
			end
			-- Don't need to process again a language already in the result list.
			if _findvalue(result, lang) == nil then
				-- Don't process now a language still in the list to process
				-- (otherwise the while loop would be infinite).
				if _findvalue(languages, lang) == nil then
					table.insert(languages, lang)
				end
			end
		end
	end
	-- 3. Same thing but process it now with MediaWiki fallbacks from mw.language.getFallbacksFor(lang).
	languages, result = result, {}
	while true do
		-- Extract a language from the array to process from the start.
		local lang = table.remove(languages, 1)
		if lang == nil then
			break
		end
		-- Insert it (only once) at end of the result array.
		if _findvalue(result, lang) == nil then
			table.insert(result, lang)
		end
		-- Eumerate its MediaWiki fallbacks and process them.
		local fallbacks = mw.language.getFallbacksFor(lang)
		if fallbacks then
			for _, lang in ipairs(fallbacks) do
				-- MediaWiki includes 'en' at end of all lists, discard it for now unless source language is English
				if lang ~= 'en' or hasEnglish then
					-- Don't need to process again a language already in the result list.
					if _findvalue(result, lang) == nil then
						-- Don't process now a language still in the list to process
						-- (otherwise the while loop would be infinite).
						if _findvalue(languages, lang) == nil then
							table.insert(languages, lang)
						end
					end
				end
			end
		end
	end
	-- 4. Same thing but expand the list for BCP 47 conformance where it contains language variants with '-'.
	languages, result = result, {}
	while true do
		-- Extract a language from the array to process from the start.
		local lang = table.remove(languages, 1)
		if lang == nil then
			break
		end
		-- Insert it (only once) at end of the result array.
		if _findvalue(result, lang) == nil then
			table.insert(result, lang)
		end
		-- Enumerate its default BCP47 fallbacks (if lang is a variant) and process them.
		local fallbacks
		while true do
			lang, fallbacks = string.gsub(lang, "%-%w*$", "")
			-- no truncated fallback found, or undesired truncated fallbacks (to language families only)
			if fallbacks == 0 or _undesiredTruncatedFallbacks[lang] == '' then
				break
			-- truncated fallback possible, but must be truncated a bit more (there were several variant extensions)
			elseif _undesiredTruncatedFallbacks[lang] ~= nil then
				lang = _undesiredTruncatedFallbacks[lang]
			-- other truncations are safe and recommended (generally these are variants for region codes or script codes after the base language)
			end
			-- Don't need to process again a language already in the result list.
			if _findvalue(result, lang) == nil then
				-- Don't process now a language still in the list to process
				-- (otherwise the while loop would be infinite).
				if _findvalue(languages, lang) == nil then
					table.insert(languages, lang)
				end
			end
		end
	end
	-- 5. Finally add the 'default'.
	if _findvalue(result, 'default') == nil then
		table.insert(result, 'default')
	end
	-- 6. We may want to add here the default language 'xx' of the local wiki (when it is not English, e.g. in Wikipedia).
	-- May be we have a variable in the `mw` environment for this code, instead of editing the two occurences of 'xx' below.
	--[==[
	if _findvalue(result, 'xx') == nil then
		table.insert(result, 'xx')
	end
	--]==]
	-- 7. Add 'en' as the last fallback (ignored when processing the Mediawiki list).
	if _findvalue(result, 'en') == nil then
		table.insert(result, 'en')
	end
	return result
end

--[==[
_langSwitch
	This function is the core part of the LangSwitch template.
Example usage from Lua:
	text = _langSwitch({
			en = 'text in English',
			['bs hr'] = 'bosanski ili hrvatski tekst',
			pl = 'tekst po Polsku'
		}, lang)
Parameters:
	args - table with translations by language
	lang - desired language (often user's native language)
Error handling:
]==]
local function _langSwitch(args, lang) -- args: table of translations
	-- Expand the table of translation when it has argument named with multiple valid language codes
	-- (like 'de/gsw' or 'de, gsw') for mapping the same text to several languages codes. These codes
	-- are not case-significant and can only contain letters, digits, hyphens or underscores.
	-- These codes are normalized to lowercase letters, digits and hyphens for matching from fallback chains.
	local args1 = {}
	for name, value in pairs(args) do
		if type(name) == 'string' then
			for code in name:gmatch('[-0-9A-Z_a-z]+') do
				code = code:lower():gsub('_', '-')
				args1[code] = value
			end
		end
	end
	args = args1
	-- Return error if there is not default and no English version.
	if not args.en and not args.default then
		if args.nocat == '1' then
			return '<strong class="error">LangSwitch Error: no default</strong>'
		else
			return '<strong class="error">LangSwitch Error: no default</strong>[[Category:LangSwitch template without default version]]'
		end
	end
	-- Get the desired language (either stated one or user's default language).
	if not lang then
		return '<strong class="error">LangSwitch Error: no lang</strong>' -- must become proper error
	end
	-- Get the list of acceptable language (lang + those in lang's fallback chain) and check their content.
	local langList = getFallbacksFor(lang)
	for _, language in ipairs(langList) do
		if args[language ] == '~' then
			return ''
		elseif args[language] and args[language] ~= '' then
			return args[language]
		end
	end
end

--[==[
translatelua
	Allows easy translation or internalization of pages in Lua.
Example usage from a template:
	{{#invoke:Fallback|translatelua| i18n/oil on canvas|lang={{{lang|}}}}}
Parameters:
	frame.args[1] - name of translation module
	frame.args[2] - field name of the structure in Module:[frame.args[1]] to use
	frame.args.lang - desired language (often user's native language)
Error handling:
]==]
local function translatelua(frame)
	local args = frame.args
	-- if no expected args provided, then check in parent template/module frame
	if not args or args.lang==nil then
		args = mw.getCurrentFrame():getParent().args
	end
	local lang = args.lang
	local page = mw.loadData('Module:' .. mw.text.trim(args[1])) -- page should only contain a simple of translations
	if not lang or mw.text.trim(lang) == '' then
		lang = frame:callParserFunction('Int', 'Lang') -- get user's chosen language
	end
	if args[2] then
		page = page[mw.text.trim(args[2])]
	end
	return _langSwitch(page, lang)
end

--[==[
langSwitch
	This function is the core part of the LangSwitch template.
Example usage from a template:
	{{#invoke:Fallback|langSwitch|en=text in english|pl=tekst po polsku|lang={{int:lang}} }}
Parameters:
	frame.args - table with translations by language
	frame.args.lang - desired language (often user's native language)
Error handling:
]==]
local function langSwitch(frame) -- version to be used from wikitext
	local args = frame.args
	-- if no expected args provided, then check in parent template/module frame
	if not args or args.en==nil and args.default==nil and args.nocat==nil then
		args = mw.getCurrentFrame():getParent().args
	end
	local lang = args.lang
	if not lang or mw.text.trim(lang) == '' then
		lang = frame:callParserFunction('Int', 'Lang') -- get user's chosen language
	end
	args.lang = nil
	return _langSwitch(args, lang)
end

--[==[
autotranslate
	This function is the core part of the Autotranslate template.
Usage from a template:
	{{#invoke:Fallback|autotranslate|base=|lang= }}
Parameters:
	frame.args.base - base page name
	frame.args.lang - desired language (often user's native language)
Error handling:
]==]
local function autotranslate(frame)
	local args = frame.args
	-- If no expected args provided, then check in parent template/module frame.
	if not args or args.base==nil then
		args = mw.getCurrentFrame():getParent().args
	end
	-- find base page
	local base = args.base
	if not base or base == '' then
		return '<strong class="error">Base page not provided for autotranslate</strong>'
	end
	if not mw.ustring.find(base, ':') then -- If base page does not indicate a namespace (can be a leading ':'),
		base = 'Template:' .. base     -- then assume it is a template.
	end
	if not args.lang or mw.text.trim(args.lang) == '' then
		args.lang = frame:callParserFunction('Int', 'Lang') -- Get user's chosen language.
	end
	local langList = getFallbacksFor(args.lang)
	-- find base template language subpage
	local page = nil
	for _, language in ipairs(langList) do
		if mw.title.new(base .. '/' .. language).exists then
			page =  base .. '/' .. language -- Returns only the page.
			break
		end
	end
	if not page then
		return string.format('<strong class="error">no fallback page found for Autotranslate (base=[[%s]], lang=%s)</strong>', args.base, args.lang)
	end
	-- Repack args in a standard table.
	local inArgs = {}
	for key, value in pairs(args) do
		inArgs[key] = value;
	end
	-- Transclude {{page|....}} with template arguments the same as the ones passed to {{Autotranslate}} template.
	inArgs.base = nil
	return frame:expandTemplate{ title = page, args = inArgs }
end

-- exports
local p = {
	_undesiredTruncatedFallbacks = _undesiredTruncatedFallbacks,
	getFallbacksFor = getFallbacksFor,
	fblist = getFallbacksFor, -- (alias kept for compatiblity before checking if it's needed)
	_langSwitch = _langSwitch,
	langSwitch = langSwitch,
	translatelua = translatelua,
	autotranslate = autotranslate,
}
setmetatable(p, {
	-- This function must be named quickTests().
	quickTests = function()
		-- TODO. For now look at {{FULLPAGENAME}}/testcases and results in {{TALKPAGENAME}}/testcases
		return true
	end
})
return p