From e41f87169785caad35ecc3aaa38784592a156ea2 Mon Sep 17 00:00:00 2001 From: DarkWiiPlayer Date: Fri, 31 Jan 2020 11:08:50 +0100 Subject: [PATCH] Improve unicode finder script --- bin/cp2utf8 | 34 ++++++++++++++++++++++++++++++++++ bin/unicode | 7 ++++--- 2 files changed, 38 insertions(+), 3 deletions(-) create mode 100755 bin/cp2utf8 diff --git a/bin/cp2utf8 b/bin/cp2utf8 new file mode 100755 index 0000000..9c6a30b --- /dev/null +++ b/bin/cp2utf8 @@ -0,0 +1,34 @@ +#!/usr/bin/env luajit + +local bit = require 'bit' + +local function code_to_utf8(code) + if code <= 0x7f then + return string.char(code) + elseif code <= 0x7ff then + return string.char( + bit.bor(0xC0, bit.rshift(code, 6)), + bit.bor(0x80, bit.band(code, 0x3f)) + ) + elseif code <= 0xffff then + return string.char( + bit.bor(0xE0, bit.rshift(code, 12)), + bit.bor(0x80, bit.band(bit.rshift(code, 6), 0x3f)), + bit.bor(0x80, bit.band(code, 0x3f)) + ) + elseif code <= 0x10FFFF then + return string.char( + bit.bor(0xE0, bit.rshift(code, 18)), + bit.bor(0x80, bit.band(bit.rshift(code, 12), 0x3f)), + bit.bor(0x80, bit.band(bit.rshift(code, 06), 0x3f)), + bit.bor(0x80, bit.band(code, 0x3f)) + ) + else + error 'Invalid character!' + end +end + +for line in io.stdin:lines() do + local code = tonumber(line:match('%x+'), 16) + print(line, code_to_utf8(code)) +end diff --git a/bin/unicode b/bin/unicode index d26309d..1822da8 100755 --- a/bin/unicode +++ b/bin/unicode @@ -5,10 +5,11 @@ then echo $namelist not found, downloading... mkdir -p $(dirname "$namelist") touch "$namelist" - wget 'https://www.unicode.org/Public/UCD/latest/ucd/NamesList.txt' \ - -O "$namelist" || exit + curl 'https://www.unicode.org/Public/UCD/latest/ucd/NamesList.txt' \ + | sed -n -e '/<.*>/d' -e '/^[0-9A-F]\+/p' \ + | cp2utf8 > "$namelist" || exit fi -codepoint=$(grep -i '^[0-9A-F]\+' "$namelist" | sed -e '/<.*>/d' -e 's/\t/ /g' | dmenu -i -l 20 -b | awk '{print $1}' | grep '^[0-9A-F]\+') +codepoint=$(cat "$namelist" | sed -e 's/\t/ /g' | dmenu -i -l 20 -b | awk '{print $1}' | grep '^[0-9A-F]\+') if [ -z "$codepoint" ]; then echo No codepoint selected >&2; exit fi