Module:User:Surjection/luadump

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is a private module sandbox of Surjection, for their own experimentation. Items in this module may be added and removed at Surjection's discretion; do not rely on this module's stability.


return function(s)
	local r = ""
	local b
	local i = 1
	
	local function escape(b)
		return "\\" .. string.format("%03d", b)
	end
	
	local function valid_utf8(s, i, u, l, m)
		local b
		for j = 1, l do
			b = s:byte(i + j)
			if not b or b < 128 or b >= 192 then
				return 1, escape(s:byte(i))
			end
			u = u * 64 + (b - 128)
		end
		if u >= 0x110000 or u < m then
			return 1, escape(s:byte(i))
		end
		local c = mw.ustring.char(u)
		return c:len(), c
	end
	
	while i <= #s do
		b = s:byte(i)
		-- may be a valid UTF-8 sequence. do not necessarily escape it in that case.
		if b < 32 or (b >= 127 and b < 192) or b >= 248 then
			r = r .. escape(b)
			i = i + 1
		elseif b >= 240 then
			local l, t = valid_utf8(s, i, b % 8, 3, 0x10000)
			r = r .. t
			i = i + l
		elseif b >= 224 then
			local l, t = valid_utf8(s, i, b % 16, 2, 0x0800)
			r = r .. t
			i = i + l
		elseif b >= 192 then
			local l, t = valid_utf8(s, i, b % 32, 1, 0x80)
			r = r .. t
			i = i + l
		else
			r = r .. s:sub(i, i)
			i = i + 1
		end
	end
	return r
end