From 7f73bb3d4de0f0647d8358461009d0bc1c96203e Mon Sep 17 00:00:00 2001 From: 兔子 Date: Wed, 12 Apr 2023 16:47:40 +0800 Subject: [PATCH] add emoji search support --- search/search.go | 54 ++++++++++++++++++++++++++++++++++++++++++- search/search_test.go | 16 +++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 search/search_test.go diff --git a/search/search.go b/search/search.go index 26a847e..3ade3a2 100644 --- a/search/search.go +++ b/search/search.go @@ -5,22 +5,25 @@ import ( "b612.me/starlog" "b612.me/startext" "bufio" + "errors" "fmt" "github.com/spf13/cobra" "io/ioutil" "os" "path/filepath" "regexp" + "strconv" "strings" ) var stFolder string var stNum, stMax, stMin int -var stautoGBK bool +var stautoGBK, allowEmoji bool func init() { Cmd.Flags().StringVarP(&stFolder, "folder", "f", "./", "搜索的文件夹") Cmd.Flags().IntVarP(&stNum, "thread-num", "n", 5, "并发搜寻协程数") + Cmd.Flags().BoolVarP(&allowEmoji, "allow-emoji", "e", false, "使用\\U输入Emoji") Cmd.Flags().BoolVarP(&stautoGBK, "autogbk", "g", true, "自动GBK识别") Cmd.Flags().IntVar(&stMax, "max", 0, "行最大字数") Cmd.Flags().IntVar(&stMin, "min", 0, "行最小字数") @@ -35,6 +38,9 @@ var Cmd = &cobra.Command{ starlog.Errorln("应当传入两个参数,搜寻文件后缀和搜寻文本") os.Exit(1) } + if allowEmoji { + args[1], _ = replaceUnicodeEmoji(args[1]) + } err := searchText(stFolder, args[0], args[1], stNum, stautoGBK, stMax, stMin) if err != nil { os.Exit(2) @@ -43,6 +49,52 @@ var Cmd = &cobra.Command{ }, } +func replaceUnicodeEmoji(text string) (string, error) { + // 查找含有 \U 开头的代码点 + re := regexp.MustCompile(`\\U([0-9A-Fa-f]{1,8})`) + matches := re.FindAllStringSubmatch(text, -1) + + // 如果没有匹配到任何内容,则直接返回原始文本 + if matches == nil { + return text, nil + } + + // 将代码点替换为相应的表情符号 + for _, match := range matches { + emoji, err := unicodeToEmoji(match[1]) + if err != nil { + return "", err + } + text = strings.Replace(text, match[0], emoji, -1) + } + + return text, nil +} + +func unicodeToEmoji(codepoint string) (string, error) { + // 将16进制字符串转换为 uint32 类型 + hexcode, err := strconv.ParseUint(codepoint, 16, 32) + if err != nil { + return "", err + } + + // 检查代码点是否位于Unicode BMP(基本多文本平面)中 + if hexcode > 0x10FFFF { + return "", errors.New("invalid Unicode code point") + } + + // 将 uint32 类型的代码点转换为 rune 类型 + r := rune(hexcode) + + // 将 rune 格式化为 8 位宽度的 16 进制数值并前置 0 填充 + emoji := fmt.Sprintf("%08X", hexcode) + + // 将 rune 类型的字符转换为对应的字符串表情符号 + emoji = string(r) + + return emoji, nil +} + func searchText(folder string, filematch string, text string, thread int, autoGBK bool, max, min int) error { data, err := ioutil.ReadDir(folder) if err != nil { diff --git a/search/search_test.go b/search/search_test.go new file mode 100644 index 0000000..49b1a1e --- /dev/null +++ b/search/search_test.go @@ -0,0 +1,16 @@ +package search + +import ( + "fmt" + "testing" +) + +func TestEmoji(t *testing.T) { + fmt.Println(replaceUnicodeEmoji("\\U1F441\\UFE0F")) + fmt.Println("👁️") + p, _ := replaceUnicodeEmoji("\\U1F441\\UFE0F") + fmt.Println("👁️" == p) + for _, v := range p { + fmt.Printf("%U", v) + } +}