Improve unicode finder script

This commit is contained in:
Talia 2020-01-31 11:08:50 +01:00
parent a8301442e8
commit e41f871697
2 changed files with 38 additions and 3 deletions

34
bin/cp2utf8 Executable file
View File

@ -0,0 +1,34 @@
#!/usr/bin/env luajit
local bit = require 'bit'
local function code_to_utf8(code)
if code <= 0x7f then
return string.char(code)
elseif code <= 0x7ff then
return string.char(
bit.bor(0xC0, bit.rshift(code, 6)),
bit.bor(0x80, bit.band(code, 0x3f))
)
elseif code <= 0xffff then
return string.char(
bit.bor(0xE0, bit.rshift(code, 12)),
bit.bor(0x80, bit.band(bit.rshift(code, 6), 0x3f)),
bit.bor(0x80, bit.band(code, 0x3f))
)
elseif code <= 0x10FFFF then
return string.char(
bit.bor(0xE0, bit.rshift(code, 18)),
bit.bor(0x80, bit.band(bit.rshift(code, 12), 0x3f)),
bit.bor(0x80, bit.band(bit.rshift(code, 06), 0x3f)),
bit.bor(0x80, bit.band(code, 0x3f))
)
else
error 'Invalid character!'
end
end
for line in io.stdin:lines() do
local code = tonumber(line:match('%x+'), 16)
print(line, code_to_utf8(code))
end

View File

@ -5,10 +5,11 @@ then
echo $namelist not found, downloading...
mkdir -p $(dirname "$namelist")
touch "$namelist"
wget 'https://www.unicode.org/Public/UCD/latest/ucd/NamesList.txt' \
-O "$namelist" || exit
curl 'https://www.unicode.org/Public/UCD/latest/ucd/NamesList.txt' \
| sed -n -e '/<.*>/d' -e '/^[0-9A-F]\+/p' \
| cp2utf8 > "$namelist" || exit
fi
codepoint=$(grep -i '^[0-9A-F]\+' "$namelist" | sed -e '/<.*>/d' -e 's/\t/ /g' | dmenu -i -l 20 -b | awk '{print $1}' | grep '^[0-9A-F]\+')
codepoint=$(cat "$namelist" | sed -e 's/\t/ /g' | dmenu -i -l 20 -b | awk '{print $1}' | grep '^[0-9A-F]\+')
if [ -z "$codepoint" ];
then echo No codepoint selected >&2; exit
fi