Improve unicode finder script
This commit is contained in:
parent
a8301442e8
commit
e41f871697
2 changed files with 38 additions and 3 deletions
34
bin/cp2utf8
Executable file
34
bin/cp2utf8
Executable file
|
@ -0,0 +1,34 @@
|
||||||
|
#!/usr/bin/env luajit
|
||||||
|
|
||||||
|
local bit = require 'bit'
|
||||||
|
|
||||||
|
local function code_to_utf8(code)
|
||||||
|
if code <= 0x7f then
|
||||||
|
return string.char(code)
|
||||||
|
elseif code <= 0x7ff then
|
||||||
|
return string.char(
|
||||||
|
bit.bor(0xC0, bit.rshift(code, 6)),
|
||||||
|
bit.bor(0x80, bit.band(code, 0x3f))
|
||||||
|
)
|
||||||
|
elseif code <= 0xffff then
|
||||||
|
return string.char(
|
||||||
|
bit.bor(0xE0, bit.rshift(code, 12)),
|
||||||
|
bit.bor(0x80, bit.band(bit.rshift(code, 6), 0x3f)),
|
||||||
|
bit.bor(0x80, bit.band(code, 0x3f))
|
||||||
|
)
|
||||||
|
elseif code <= 0x10FFFF then
|
||||||
|
return string.char(
|
||||||
|
bit.bor(0xE0, bit.rshift(code, 18)),
|
||||||
|
bit.bor(0x80, bit.band(bit.rshift(code, 12), 0x3f)),
|
||||||
|
bit.bor(0x80, bit.band(bit.rshift(code, 06), 0x3f)),
|
||||||
|
bit.bor(0x80, bit.band(code, 0x3f))
|
||||||
|
)
|
||||||
|
else
|
||||||
|
error 'Invalid character!'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
for line in io.stdin:lines() do
|
||||||
|
local code = tonumber(line:match('%x+'), 16)
|
||||||
|
print(line, code_to_utf8(code))
|
||||||
|
end
|
|
@ -5,10 +5,11 @@ then
|
||||||
echo $namelist not found, downloading...
|
echo $namelist not found, downloading...
|
||||||
mkdir -p $(dirname "$namelist")
|
mkdir -p $(dirname "$namelist")
|
||||||
touch "$namelist"
|
touch "$namelist"
|
||||||
wget 'https://www.unicode.org/Public/UCD/latest/ucd/NamesList.txt' \
|
curl 'https://www.unicode.org/Public/UCD/latest/ucd/NamesList.txt' \
|
||||||
-O "$namelist" || exit
|
| sed -n -e '/<.*>/d' -e '/^[0-9A-F]\+/p' \
|
||||||
|
| cp2utf8 > "$namelist" || exit
|
||||||
fi
|
fi
|
||||||
codepoint=$(grep -i '^[0-9A-F]\+' "$namelist" | sed -e '/<.*>/d' -e 's/\t/ /g' | dmenu -i -l 20 -b | awk '{print $1}' | grep '^[0-9A-F]\+')
|
codepoint=$(cat "$namelist" | sed -e 's/\t/ /g' | dmenu -i -l 20 -b | awk '{print $1}' | grep '^[0-9A-F]\+')
|
||||||
if [ -z "$codepoint" ];
|
if [ -z "$codepoint" ];
|
||||||
then echo No codepoint selected >&2; exit
|
then echo No codepoint selected >&2; exit
|
||||||
fi
|
fi
|
||||||
|
|
Loading…
Reference in a new issue