You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
star/search/search.go

151 lines
3.7 KiB
Go

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

package search
import (
"b612.me/stario"
"b612.me/starlog"
"b612.me/startext"
"bufio"
"errors"
"fmt"
"github.com/spf13/cobra"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
)
var stFolder string
var stNum, stMax, stMin int
var stautoGBK, allowEmoji bool
func init() {
Cmd.Flags().StringVarP(&stFolder, "folder", "f", "./", "搜索的文件夹")
Cmd.Flags().IntVarP(&stNum, "thread-num", "n", 5, "并发搜寻协程数")
Cmd.Flags().BoolVarP(&allowEmoji, "allow-emoji", "e", false, "使用\\U输入Emoji")
Cmd.Flags().BoolVarP(&stautoGBK, "autogbk", "g", true, "自动GBK识别")
Cmd.Flags().IntVar(&stMax, "max", 0, "行最大字数")
Cmd.Flags().IntVar(&stMin, "min", 0, "行最小字数")
}
var Cmd = &cobra.Command{
Use: "st",
Short: "搜索文件中特定字符串",
Long: "搜索文件中特定字符串",
Run: func(this *cobra.Command, args []string) {
if len(args) != 2 {
starlog.Errorln("应当传入两个参数,搜寻文件后缀和搜寻文本")
os.Exit(1)
}
if allowEmoji {
args[1], _ = replaceUnicodeEmoji(args[1])
}
err := searchText(stFolder, args[0], args[1], stNum, stautoGBK, stMax, stMin)
if err != nil {
os.Exit(2)
}
return
},
}
func replaceUnicodeEmoji(text string) (string, error) {
// 查找含有 \U 开头的代码点
re := regexp.MustCompile(`\\U([0-9A-Fa-f]{1,8})`)
matches := re.FindAllStringSubmatch(text, -1)
// 如果没有匹配到任何内容,则直接返回原始文本
if matches == nil {
return text, nil
}
// 将代码点替换为相应的表情符号
for _, match := range matches {
emoji, err := unicodeToEmoji(match[1])
if err != nil {
return "", err
}
text = strings.Replace(text, match[0], emoji, -1)
}
return text, nil
}
func unicodeToEmoji(codepoint string) (string, error) {
// 将16进制字符串转换为 uint32 类型
hexcode, err := strconv.ParseUint(codepoint, 16, 32)
if err != nil {
return "", err
}
// 检查代码点是否位于Unicode BMP基本多文本平面
if hexcode > 0x10FFFF {
return "", errors.New("invalid Unicode code point")
}
// 将 uint32 类型的代码点转换为 rune 类型
r := rune(hexcode)
// 将 rune 格式化为 8 位宽度的 16 进制数值并前置 0 填充
emoji := fmt.Sprintf("%08X", hexcode)
// 将 rune 类型的字符转换为对应的字符串表情符号
emoji = string(r)
return emoji, nil
}
func searchText(folder string, filematch string, text string, thread int, autoGBK bool, max, min int) error {
data, err := ioutil.ReadDir(folder)
if err != nil {
starlog.Errorln("read folder failed", folder, err)
return err
}
wg := stario.NewWaitGroup(thread)
searchFn := func(filepath string, text string) {
//starlog.Debugln("searching", filepath, text)
defer wg.Done()
fp, err := os.Open(filepath)
if err != nil {
starlog.Errorln("open file failed", filepath, err)
return
}
defer fp.Close()
reader := bufio.NewReader(fp)
count := 0
for {
origin, err := reader.ReadString('\n')
count++
if stautoGBK && startext.IsGBK([]byte(origin)) {
originByte, _ := startext.GBK2UTF8([]byte(origin))
origin = string(originByte)
}
origin = strings.TrimSpace(origin)
if max != 0 && len(origin) > max {
continue
}
if min != 0 && len(origin) < min {
continue
}
if strings.Contains(origin, text) {
fmt.Printf("file:%s line:%d matched:%s\n", filepath, count, origin)
}
if err != nil {
break
}
}
}
for _, v := range data {
if v.IsDir() {
searchText(filepath.Join(folder, v.Name()), filematch, text, thread, autoGBK, stMax, stMin)
}
filepath := filepath.Join(folder, v.Name())
if matched, _ := regexp.MatchString(filematch, filepath); matched {
wg.Add(1)
go searchFn(filepath, text)
}
}
wg.Wait()
return nil
}