Initial commit (code only without large binaries)
This commit is contained in:
170
EdgeNode/internal/utils/runes/runes.go
Normal file
170
EdgeNode/internal/utils/runes/runes.go
Normal file
@@ -0,0 +1,170 @@
|
||||
// Copyright 2023 GoEdge CDN goedge.cdn@gmail.com. All rights reserved. Official site: https://goedge.cn .
|
||||
|
||||
package runes
|
||||
|
||||
// ContainsAnyWordRunes 直接使用rune检查字符串是否包含任一单词
|
||||
func ContainsAnyWordRunes(s string, words [][]rune, isCaseInsensitive bool) bool {
|
||||
var allRunes = []rune(s)
|
||||
if len(allRunes) == 0 || len(words) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
var lastRune rune // last searching rune in s
|
||||
var lastIndex = -2 // -2: not started, -1: not found, >=0: rune index
|
||||
for _, wordRunes := range words {
|
||||
if len(wordRunes) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if lastIndex > -2 && lastRune == wordRunes[0] {
|
||||
if lastIndex >= 0 {
|
||||
result, _ := ContainsWordRunes(allRunes[lastIndex:], wordRunes, isCaseInsensitive)
|
||||
if result {
|
||||
return true
|
||||
}
|
||||
}
|
||||
continue
|
||||
} else {
|
||||
result, firstIndex := ContainsWordRunes(allRunes, wordRunes, isCaseInsensitive)
|
||||
lastIndex = firstIndex
|
||||
if result {
|
||||
return true
|
||||
}
|
||||
}
|
||||
lastRune = wordRunes[0]
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ContainsAnyWord 检查字符串是否包含任一单词
|
||||
func ContainsAnyWord(s string, words []string, isCaseInsensitive bool) bool {
|
||||
var allRunes = []rune(s)
|
||||
if len(allRunes) == 0 || len(words) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
var lastRune rune // last searching rune in s
|
||||
var lastIndex = -2 // -2: not started, -1: not found, >=0: rune index
|
||||
for _, word := range words {
|
||||
var wordRunes = []rune(word)
|
||||
if len(wordRunes) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if lastIndex > -2 && lastRune == wordRunes[0] {
|
||||
if lastIndex >= 0 {
|
||||
result, _ := ContainsWordRunes(allRunes[lastIndex:], wordRunes, isCaseInsensitive)
|
||||
if result {
|
||||
return true
|
||||
}
|
||||
}
|
||||
continue
|
||||
} else {
|
||||
result, firstIndex := ContainsWordRunes(allRunes, wordRunes, isCaseInsensitive)
|
||||
lastIndex = firstIndex
|
||||
if result {
|
||||
return true
|
||||
}
|
||||
}
|
||||
lastRune = wordRunes[0]
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ContainsAllWords 检查字符串是否包含所有单词
|
||||
func ContainsAllWords(s string, words []string, isCaseInsensitive bool) bool {
|
||||
var allRunes = []rune(s)
|
||||
if len(allRunes) == 0 || len(words) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, word := range words {
|
||||
if result, _ := ContainsWordRunes(allRunes, []rune(word), isCaseInsensitive); !result {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// ContainsWordRunes 检查字符列表是否包含某个单词子字符列表
|
||||
func ContainsWordRunes(allRunes []rune, subRunes []rune, isCaseInsensitive bool) (result bool, firstIndex int) {
|
||||
firstIndex = -1
|
||||
|
||||
var l = len(subRunes)
|
||||
if l == 0 {
|
||||
return false, 0
|
||||
}
|
||||
|
||||
var al = len(allRunes)
|
||||
|
||||
for index, r := range allRunes {
|
||||
if EqualRune(r, subRunes[0], isCaseInsensitive) && (index == 0 || !isChar(allRunes[index-1]) /**boundary check **/) {
|
||||
if firstIndex < 0 {
|
||||
firstIndex = index
|
||||
}
|
||||
|
||||
var found = true
|
||||
if l > 1 {
|
||||
for i := 1; i < l; i++ {
|
||||
var subIndex = index + i
|
||||
if subIndex > al-1 || !EqualRune(allRunes[subIndex], subRunes[i], isCaseInsensitive) {
|
||||
found = false
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check after charset
|
||||
if found && (al <= index+l || !isChar(allRunes[index+l]) /**boundary check **/) {
|
||||
return true, firstIndex
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false, firstIndex
|
||||
}
|
||||
|
||||
// ContainsSubRunes 检查字符列表是否包含某个子子字符列表
|
||||
// 与 ContainsWordRunes 不同,这里不需要检查边界符号
|
||||
func ContainsSubRunes(allRunes []rune, subRunes []rune, isCaseInsensitive bool) bool {
|
||||
var l = len(subRunes)
|
||||
if l == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
var al = len(allRunes)
|
||||
|
||||
for index, r := range allRunes {
|
||||
if EqualRune(r, subRunes[0], isCaseInsensitive) {
|
||||
var found = true
|
||||
if l > 1 {
|
||||
for i := 1; i < l; i++ {
|
||||
var subIndex = index + i
|
||||
if subIndex > al-1 || !EqualRune(allRunes[subIndex], subRunes[i], isCaseInsensitive) {
|
||||
found = false
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check after charset
|
||||
if found {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// EqualRune 判断两个rune是否相同
|
||||
func EqualRune(r1 rune, r2 rune, isCaseInsensitive bool) bool {
|
||||
const d = 'a' - 'A'
|
||||
return r1 == r2 ||
|
||||
(isCaseInsensitive && r1 >= 'a' && r1 <= 'z' && r1-r2 == d) ||
|
||||
(isCaseInsensitive && r1 >= 'A' && r1 <= 'Z' && r1-r2 == -d)
|
||||
}
|
||||
|
||||
func isChar(r rune) bool {
|
||||
return r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z' || r >= '0' && r <= '9'
|
||||
}
|
||||
171
EdgeNode/internal/utils/runes/runes_test.go
Normal file
171
EdgeNode/internal/utils/runes/runes_test.go
Normal file
@@ -0,0 +1,171 @@
|
||||
// Copyright 2023 GoEdge CDN goedge.cdn@gmail.com. All rights reserved. Official site: https://goedge.cn .
|
||||
|
||||
package runes_test
|
||||
|
||||
import (
|
||||
"github.com/TeaOSLab/EdgeNode/internal/utils/re"
|
||||
"github.com/TeaOSLab/EdgeNode/internal/utils/runes"
|
||||
"github.com/iwind/TeaGo/assert"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestContainsAllWords(t *testing.T) {
|
||||
var a = assert.NewAssertion(t)
|
||||
a.IsTrue(runes.ContainsAllWords("How are you?", []string{"are", "you"}, false))
|
||||
a.IsFalse(runes.ContainsAllWords("How are you?", []string{"how", "are", "you"}, false))
|
||||
a.IsTrue(runes.ContainsAllWords("How are you?", []string{"how", "are", "you"}, true))
|
||||
}
|
||||
|
||||
func TestContainsAnyWord(t *testing.T) {
|
||||
var a = assert.NewAssertion(t)
|
||||
a.IsTrue(runes.ContainsAnyWord("How are you?", []string{"are", "you"}, false))
|
||||
a.IsTrue(runes.ContainsAnyWord("How are you?", []string{"are", "you", "ok"}, false))
|
||||
a.IsFalse(runes.ContainsAnyWord("How are you?", []string{"how", "ok"}, false))
|
||||
a.IsTrue(runes.ContainsAnyWord("How are you?", []string{"how"}, true))
|
||||
a.IsTrue(runes.ContainsAnyWord("How are you?", []string{"how", "ok"}, true))
|
||||
a.IsTrue(runes.ContainsAnyWord("How-are you?", []string{"how", "ok"}, true))
|
||||
}
|
||||
|
||||
func TestContainsAnyWord_Sort(t *testing.T) {
|
||||
var a = assert.NewAssertion(t)
|
||||
a.IsTrue(runes.ContainsAnyWord("How are you?", []string{"abc", "ant", "arm", "Hit", "Hi", "Pet", "pie", "are"}, false))
|
||||
}
|
||||
|
||||
func TestContainsWordRunes(t *testing.T) {
|
||||
var a = assert.NewAssertion(t)
|
||||
a.IsFalse(runes.ContainsWordRunes([]rune(""), []rune("How"), true))
|
||||
a.IsFalse(runes.ContainsWordRunes([]rune("How are you?"), []rune(""), true))
|
||||
a.IsTrue(runes.ContainsWordRunes([]rune("How are you?"), []rune("How"), true))
|
||||
a.IsFalse(runes.ContainsWordRunes([]rune("How are you?"), []rune("how"), false))
|
||||
a.IsTrue(runes.ContainsWordRunes([]rune("How are you?"), []rune("you"), false))
|
||||
a.IsTrue(runes.ContainsWordRunes([]rune("How are you?"), []rune("are"), false))
|
||||
a.IsFalse(runes.ContainsWordRunes([]rune("How are you?"), []rune("re"), false))
|
||||
a.IsTrue(runes.ContainsWordRunes([]rune("How are you w?"), []rune("w"), false))
|
||||
a.IsTrue(runes.ContainsWordRunes([]rune("w How are you?"), []rune("w"), false))
|
||||
a.IsTrue(runes.ContainsWordRunes([]rune("How are w you?"), []rune("w"), false))
|
||||
a.IsTrue(runes.ContainsWordRunes([]rune("How are how you?"), []rune("how"), false))
|
||||
a.IsTrue(runes.ContainsWordRunes([]rune("How are you?"), []rune("how"), true))
|
||||
a.IsTrue(runes.ContainsWordRunes([]rune("How are you?"), []rune("ARE"), true))
|
||||
a.IsTrue(runes.ContainsWordRunes([]rune("How are you"), []rune("you"), false))
|
||||
a.IsTrue(runes.ContainsWordRunes([]rune("How are you"), []rune("YOU"), true))
|
||||
a.IsTrue(runes.ContainsWordRunes([]rune("How are you?"), []rune("YOU"), true))
|
||||
a.IsFalse(runes.ContainsWordRunes([]rune("How are you1?"), []rune("YOU"), true))
|
||||
a.IsFalse(runes.ContainsWordRunes([]rune("How are you1?"), []rune("YOU YOU YOU YOU YOU YOU YOU"), true))
|
||||
}
|
||||
|
||||
func TestContainsSubRunes(t *testing.T) {
|
||||
var a = assert.NewAssertion(t)
|
||||
a.IsFalse(runes.ContainsSubRunes([]rune(""), []rune("How"), true))
|
||||
a.IsFalse(runes.ContainsSubRunes([]rune("How are you?"), []rune(""), true))
|
||||
a.IsTrue(runes.ContainsSubRunes([]rune("How are you1?"), []rune("YOU"), true))
|
||||
a.IsTrue(runes.ContainsSubRunes([]rune("How are you1?"), []rune("ow"), false))
|
||||
a.IsTrue(runes.ContainsSubRunes([]rune("How are you1?"), []rune("H"), false))
|
||||
a.IsTrue(runes.ContainsSubRunes([]rune("How are you1?"), []rune("How"), false))
|
||||
a.IsTrue(runes.ContainsSubRunes([]rune("How are you doing"), []rune("oi"), false))
|
||||
a.IsTrue(runes.ContainsSubRunes([]rune("How are you doing"), []rune("g"), false))
|
||||
a.IsTrue(runes.ContainsSubRunes([]rune("How are you doing"), []rune("ing"), false))
|
||||
a.IsFalse(runes.ContainsSubRunes([]rune("How are you doing"), []rune("int"), false))
|
||||
}
|
||||
|
||||
func TestEqualRune(t *testing.T) {
|
||||
var a = assert.NewAssertion(t)
|
||||
a.IsTrue(runes.EqualRune('a', 'a', false))
|
||||
a.IsTrue(runes.EqualRune('a', 'a', true))
|
||||
a.IsFalse(runes.EqualRune('a', 'A', false))
|
||||
a.IsTrue(runes.EqualRune('a', 'A', true))
|
||||
a.IsFalse(runes.EqualRune('c', 'C', false))
|
||||
a.IsTrue(runes.EqualRune('c', 'C', true))
|
||||
a.IsTrue(runes.EqualRune('C', 'C', true))
|
||||
a.IsTrue(runes.EqualRune('C', 'c', true))
|
||||
a.IsTrue(runes.EqualRune('Z', 'z', true))
|
||||
a.IsTrue(runes.EqualRune('z', 'Z', true))
|
||||
a.IsFalse(runes.EqualRune('z', 'z'+('a'-'A'), true))
|
||||
}
|
||||
|
||||
func BenchmarkContainsWordRunes(b *testing.B) {
|
||||
runtime.GOMAXPROCS(4)
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
_, _ = runes.ContainsWordRunes([]rune("How are you"), []rune("YOU"), true)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkContainsAnyWord(b *testing.B) {
|
||||
runtime.GOMAXPROCS(4)
|
||||
|
||||
var words = strings.Split("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n")
|
||||
sort.Strings(words)
|
||||
|
||||
var wordRunes = [][]rune{}
|
||||
for _, word := range words {
|
||||
wordRunes = append(wordRunes, []rune(word))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
_ = runes.ContainsAnyWord("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_0_0) AppleWebKit/500.00 (KHTML, like Gecko) Chrome/100.0.0.0", words, true)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkContainsAnyWordRunes(b *testing.B) {
|
||||
runtime.GOMAXPROCS(4)
|
||||
|
||||
var words = strings.Split("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n")
|
||||
sort.Strings(words)
|
||||
|
||||
var wordRunes = [][]rune{}
|
||||
for _, word := range words {
|
||||
wordRunes = append(wordRunes, []rune(word))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
_ = runes.ContainsAnyWordRunes("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_0_0) AppleWebKit/500.00 (KHTML, like Gecko) Chrome/100.0.0.0", wordRunes, true)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkContainsAnyWord_Regexp(b *testing.B) {
|
||||
runtime.GOMAXPROCS(4)
|
||||
var reg = regexp.MustCompile("(?i)" + strings.ReplaceAll("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n", "|"))
|
||||
b.ResetTimer()
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
_ = reg.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_0_0) AppleWebKit/500.00 (KHTML, like Gecko) Chrome/100.0.0.0")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkContainsAnyWord_Re(b *testing.B) {
|
||||
runtime.GOMAXPROCS(4)
|
||||
var reg = re.MustCompile("(?i)" + strings.ReplaceAll("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n", "|"))
|
||||
b.ResetTimer()
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
_ = reg.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_0_0) AppleWebKit/500.00 (KHTML, like Gecko) Chrome/100.0.0.0")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkContainsSubRunes(b *testing.B) {
|
||||
runtime.GOMAXPROCS(4)
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
_ = runes.ContainsSubRunes([]rune("How are you"), []rune("YOU"), true)
|
||||
}
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user