Modul:Benutzer:Herzi Pinki/kmlhack

aus Wikipedia, der freien Enzyklopädie
Zur Navigation springen Zur Suche springen

Die Dokumentation für dieses Modul kann unter Modul:Benutzer:Herzi Pinki/kmlhack/Doku erstellt werden

-- table copied from Modul:Sort/latin, modified
local codeReplacements = {
[  5760] = " ",  -- OGHAM SPACE MARK
[  8192] = " ",  -- EN QUAD
[  8193] = " ",  -- EM QUAD
[  8194] = " ",  -- N-SPACE
[  8195] = " ",  -- M-SPACE
[  8196] = " ",  -- THREE-PER-EM SPACE
[  8197] = " ",  -- FOUR-PER-EM SPACE
[  8198] = " ",  -- SIX-PER-EM SPACE
[  8199] = " ",  -- FIGURE SPACE
[  8200] = " ",  -- PUNCTUATION SPACE
[  8201] = " ",  -- thinsp
[  8202] = " ",  -- HAIR SPACE (english typography)
[  8239] = " ",  -- NARROW NO-BREAK SPACE
[  8287] = " ",  -- MEDIUM MATHEMATICAL SPACE
[ 12288] = " ",  -- IDEOGRAPHIC SPACE
[ 12351] = " ",  -- IDEOGRAPHIC HALF FILL SPACE
[917536] = " ",  -- TAG SPACE
[  8208] = "-",  -- HYPHEN
[  8209] = "-",  -- NON-BREAKING HYPHEN
[  8210] = "-",  -- FIGURE DASH
[  8211] = "-",  -- ndash
[  8212] = "-",  -- mdash
[  8213] = "-",  -- HORIZONTAL BAR
[  8259] = "-",  -- HYPHEN BULLET
[  8722] = "-",  -- MINUS sign
[  8216] = "'",  -- lsquo
[  8217] = "'",  -- rsquo
[  8218] = "'",  -- sbquo
[  8249] = "'",  -- lsaquo
[  8250] = "'",  -- rsaquo
[  8220] = "\"", -- ldquo
[  8221] = "\"", -- rdquo
[  8222] = "\"", -- bdquo
-- some 2-byte chars skipped
[  7838] = "SS", -- CAPITAL SHARP S
[  7840] = "A",  -- A with dot below
[  7841] = "a",  -- a with dot below
[  7842] = "A",  -- A with hook above
[  7843] = "a",  -- a with hook above
[  7844] = "A",  -- A with circumflex and acute
[  7845] = "a",  -- a with circumflex and acute
[  7846] = "A",  -- A with circumflex and grave
[  7847] = "a",  -- a with circumflex and grave
[  7848] = "A",  -- A with circumflex and hook above
[  7849] = "a",  -- a with circumflex and hook above
[  7850] = "A",  -- A with circumflex and tilde
[  7851] = "a",  -- a with circumflex and tilde
[  7852] = "A",  -- A with circumflex and dot below
[  7853] = "a",  -- a with circumflex and dot below
[  7854] = "A",  -- A with breve and acute
[  7855] = "a",  -- a with breve and acute
[  7856] = "A",  -- A with breve and grave
[  7857] = "a",  -- a with breve and grave
[  7858] = "A",  -- A with breve and hook above
[  7859] = "a",  -- a with breve and hook above
[  7860] = "A",  -- A with breve and tilde
[  7861] = "a",  -- a with breve and tilde
[  7862] = "A",  -- A with breve and dot below
[  7863] = "a",  -- a with breve and dot below
[  7864] = "E",  -- E with dot below
[  7865] = "e",  -- e with dot below
[  7866] = "E",  -- E with hook above
[  7867] = "e",  -- e with hook above
[  7868] = "E",  -- E with tilde
[  7869] = "e",  -- e with tilde
[  7870] = "E",  -- E with circumflex and acute
[  7871] = "e",  -- e with circumflex and acute
[  7872] = "E",  -- E with circumflex and grave
[  7873] = "e",  -- e with circumflex and grave
[  7874] = "E",  -- E with circumflex and hook above
[  7875] = "e",  -- e with circumflex and hook above
[  7876] = "E",  -- E with circumflex and tilde
[  7877] = "e",  -- e with circumflex and tilde
[  7878] = "E",  -- E with circumflex and dot below
[  7879] = "e",  -- e with circumflex and dot below
[  7880] = "I",  -- I with hook above
[  7881] = "i",  -- i with hook above
[  7882] = "I",  -- I with dot below
[  7883] = "i",  -- i with dot below
[  7884] = "O",  -- O with dot below
[  7885] = "o",  -- o with dot below
[  7886] = "O",  -- O with hook above
[  7887] = "o",  -- o with hook above
[  7888] = "O",  -- O with circumflex and acute
[  7889] = "o",  -- o with circumflex and acute
[  7890] = "O",  -- O with circumflex and grave
[  7891] = "o",  -- o with circumflex and grave
[  7892] = "O",  -- O with circumflex and hook above
[  7893] = "o",  -- o with circumflex and hook above
[  7894] = "O",  -- O with circumflex and tilde
[  7895] = "o",  -- o with circumflex and tilde
[  7896] = "O",  -- O with circumflex and dot below
[  7897] = "o",  -- o with circumflex and dot below
[  7898] = "O",  -- O with horn and acute
[  7899] = "o",  -- o with horn and acute
[  7900] = "O",  -- O with horn and grave
[  7901] = "o",  -- o with horn and grave
[  7902] = "O",  -- O with horn and hook above
[  7903] = "o",  -- o with horn and hook above
[  7904] = "O",  -- O with horn and tilde
[  7905] = "o",  -- o with horn and tilde
[  7906] = "O",  -- O with horn and dot below
[  7907] = "o",  -- o with horn and dot below
[  7908] = "U",  -- U with dot below
[  7909] = "u",  -- u with dot below
[  7910] = "U",  -- U with hook above
[  7911] = "u",  -- u with hook above
[  7912] = "U",  -- U with horn and acute
[  7913] = "u",  -- u with horn and acute
[  7914] = "U",  -- U with horn and grave
[  7915] = "u",  -- u with horn and grave
[  7916] = "U",  -- U with horn and hook above
[  7917] = "u",  -- u with horn and hook above
[  7918] = "U",  -- U with horn and tilde
[  7919] = "u",  -- u with horn and tilde
[  7920] = "U",  -- U with horn and dot below
[  7921] = "u",  -- u with horn and dot below
[  7922] = "Y",  -- Y with grave
[  7923] = "y",  -- y with grave
[  7924] = "Y",  -- Y with dot below
[  7925] = "y",  -- y with dot below
[  7926] = "Y",  -- Y with hook above
[  7927] = "y",  -- y with hook above
[  7928] = "Y",  -- Y with tilde
[  7929] = "y",  -- y with tilde
[  7932] = "V",  -- V (middle-welsh)
[  7933] = "v",  -- v (middle-welsh)
[  7934] = "Y",  -- Y with loop
[  7935] = "y",  -- y with loop
-- some 2-byte chars skipped
}

-- for me it is easier to match characters than to match bytes (of various length)
local charReplacements = {}
for k, v in pairs(codeReplacements) do
	charReplacements[mw.ustring.char(k)] = v
end

local p = {}

p.subHighChars = function ( frame )
	local s = mw.text.decode(frame.args[ 1 ], decodeNamedEntities ) -- replace html entities by code
    local pattern = mw.ustring.char( 91, 0x0800, 45, 0x1FFFF, 93 )
    local res, n = mw.ustring.gsub( s, pattern, charReplacements )
    return res
end

p.removeHighChars = function ( frame )
	local s = mw.text.decode(frame.args[ 1 ], decodeNamedEntities ) -- replace html entities by code
    local pattern = mw.ustring.char( 91, 0x0800, 45, 0x1FFFF, 93 )
    local res, n = mw.ustring.gsub( s, pattern, "" )
    return res
end

p.kmlhack = function ( frame )
	local s = mw.text.decode(frame.args[ 1 ], decodeNamedEntities ) -- replace html entities by code
    local pattern = mw.ustring.char( 91, 0x0800, 45, 0x1FFFF, 93 )
    local res, n = mw.ustring.gsub( s, pattern, charReplacements )
    local res, n = mw.ustring.gsub( res, pattern, "" )
    return res
end

return p;