Initial commit (code only without large binaries)
This commit is contained in:
298
EdgeNode/internal/utils/re/regexp.go
Normal file
298
EdgeNode/internal/utils/re/regexp.go
Normal file
@@ -0,0 +1,298 @@
|
||||
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved.
|
||||
|
||||
package re
|
||||
|
||||
import (
|
||||
"github.com/iwind/TeaGo/types"
|
||||
"regexp"
|
||||
"regexp/syntax"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
var prefixReg = regexp.MustCompile(`^\(\?([\w\s]+)\)`) // (?x)
|
||||
var braceZeroReg = regexp.MustCompile(`^{\s*0*\s*}`) // {0}
|
||||
var braceZeroReg2 = regexp.MustCompile(`^{\s*0*\s*,`) // {0, x}
|
||||
|
||||
var lastId uint64
|
||||
|
||||
type Regexp struct {
|
||||
exp string
|
||||
rawRegexp *regexp.Regexp
|
||||
|
||||
isStrict bool
|
||||
isCaseInsensitive bool
|
||||
keywords []string
|
||||
keywordsMap RuneMap
|
||||
|
||||
id uint64
|
||||
idString string
|
||||
}
|
||||
|
||||
func MustCompile(exp string) *Regexp {
|
||||
var reg = &Regexp{
|
||||
exp: exp,
|
||||
rawRegexp: regexp.MustCompile(exp),
|
||||
}
|
||||
reg.init()
|
||||
return reg
|
||||
}
|
||||
|
||||
func Compile(exp string) (*Regexp, error) {
|
||||
reg, err := regexp.Compile(exp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return NewRegexp(reg), nil
|
||||
}
|
||||
|
||||
func NewRegexp(rawRegexp *regexp.Regexp) *Regexp {
|
||||
var reg = &Regexp{
|
||||
exp: rawRegexp.String(),
|
||||
rawRegexp: rawRegexp,
|
||||
}
|
||||
reg.init()
|
||||
return reg
|
||||
}
|
||||
|
||||
func (this *Regexp) init() {
|
||||
this.id = atomic.AddUint64(&lastId, 1)
|
||||
this.idString = "re:" + types.String(this.id)
|
||||
|
||||
if len(this.exp) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
var exp = strings.TrimSpace(this.exp)
|
||||
|
||||
// 去掉前面的(?...)
|
||||
if prefixReg.MatchString(exp) {
|
||||
var matches = prefixReg.FindStringSubmatch(exp)
|
||||
var modifiers = matches[1]
|
||||
if strings.Contains(modifiers, "i") {
|
||||
this.isCaseInsensitive = true
|
||||
}
|
||||
exp = exp[len(matches[0]):]
|
||||
}
|
||||
|
||||
var keywords = this.ParseKeywords(exp)
|
||||
|
||||
var filteredKeywords = []string{}
|
||||
var minLength = 1
|
||||
var isValid = true
|
||||
for _, keyword := range keywords {
|
||||
if len(keyword) <= minLength {
|
||||
isValid = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if isValid {
|
||||
filteredKeywords = keywords
|
||||
}
|
||||
|
||||
this.keywords = filteredKeywords
|
||||
if len(filteredKeywords) > 0 {
|
||||
this.keywordsMap = NewRuneTree(filteredKeywords)
|
||||
}
|
||||
}
|
||||
|
||||
func (this *Regexp) Keywords() []string {
|
||||
return this.keywords
|
||||
}
|
||||
|
||||
func (this *Regexp) Raw() *regexp.Regexp {
|
||||
return this.rawRegexp
|
||||
}
|
||||
|
||||
func (this *Regexp) IsCaseInsensitive() bool {
|
||||
return this.isCaseInsensitive
|
||||
}
|
||||
|
||||
func (this *Regexp) MatchString(s string) bool {
|
||||
if this.keywordsMap != nil {
|
||||
var b = this.keywordsMap.Lookup(s, this.isCaseInsensitive)
|
||||
if !b {
|
||||
return false
|
||||
}
|
||||
if this.isStrict {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return this.rawRegexp.MatchString(s)
|
||||
}
|
||||
|
||||
func (this *Regexp) Match(s []byte) bool {
|
||||
if this.keywordsMap != nil {
|
||||
var b = this.keywordsMap.Lookup(string(s), this.isCaseInsensitive)
|
||||
if !b {
|
||||
return false
|
||||
}
|
||||
if this.isStrict {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return this.rawRegexp.Match(s)
|
||||
}
|
||||
|
||||
func (this *Regexp) FindStringSubmatch(s string) []string {
|
||||
return this.rawRegexp.FindStringSubmatch(s)
|
||||
}
|
||||
|
||||
// ParseKeywords 提取表达式中的关键词
|
||||
func (this *Regexp) ParseKeywords(exp string) (keywords []string) {
|
||||
if len(exp) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
reg, err := syntax.Parse(exp, syntax.Perl)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if len(reg.Sub) == 0 {
|
||||
var keywordRunes = this.parseKeyword(reg.String())
|
||||
if len(keywordRunes) > 0 {
|
||||
keywords = append(keywords, string(keywordRunes))
|
||||
}
|
||||
return
|
||||
}
|
||||
if len(reg.Sub) == 1 {
|
||||
if reg.Op == syntax.OpStar || reg.Op == syntax.OpQuest || reg.Op == syntax.OpRepeat {
|
||||
return nil
|
||||
}
|
||||
return this.ParseKeywords(reg.Sub[0].String())
|
||||
}
|
||||
|
||||
const maxComposedKeywords = 32
|
||||
|
||||
switch reg.Op {
|
||||
case syntax.OpConcat:
|
||||
var prevKeywords = []string{}
|
||||
var isStarted bool
|
||||
for _, sub := range reg.Sub {
|
||||
if sub.String() == `\b` {
|
||||
if isStarted {
|
||||
break
|
||||
}
|
||||
continue
|
||||
}
|
||||
if sub.Op != syntax.OpLiteral && sub.Op != syntax.OpCapture && sub.Op != syntax.OpAlternate {
|
||||
if isStarted {
|
||||
break
|
||||
}
|
||||
continue
|
||||
}
|
||||
var subKeywords = this.ParseKeywords(sub.String())
|
||||
if len(subKeywords) > 0 {
|
||||
if !isStarted {
|
||||
prevKeywords = subKeywords
|
||||
isStarted = true
|
||||
} else {
|
||||
for _, prevKeyword := range prevKeywords {
|
||||
for _, subKeyword := range subKeywords {
|
||||
keywords = append(keywords, prevKeyword+subKeyword)
|
||||
|
||||
// 限制不能超出最大关键词
|
||||
if len(keywords) > maxComposedKeywords {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
prevKeywords = keywords
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
if len(prevKeywords) > 0 && len(keywords) == 0 {
|
||||
keywords = prevKeywords
|
||||
}
|
||||
case syntax.OpAlternate:
|
||||
for _, sub := range reg.Sub {
|
||||
var subKeywords = this.ParseKeywords(sub.String())
|
||||
if len(subKeywords) == 0 {
|
||||
keywords = nil
|
||||
return
|
||||
}
|
||||
keywords = append(keywords, subKeywords...)
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (this *Regexp) IdString() string {
|
||||
return this.idString
|
||||
}
|
||||
|
||||
func (this *Regexp) parseKeyword(subExp string) (result []rune) {
|
||||
if len(subExp) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// 去除开始和结尾的()
|
||||
if subExp[0] == '(' && subExp[len(subExp)-1] == ')' {
|
||||
subExp = subExp[1 : len(subExp)-1]
|
||||
if len(subExp) == 0 {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
var runes = []rune(subExp)
|
||||
|
||||
for index, r := range runes {
|
||||
if r == '[' || r == '{' || r == '.' || r == '+' || r == '$' {
|
||||
if index == 0 {
|
||||
return
|
||||
}
|
||||
if runes[index-1] != '\\' {
|
||||
if r == '{' && (braceZeroReg.MatchString(subExp[index:])) || braceZeroReg2.MatchString(subExp[index:]) { // r {0, ...}
|
||||
if len(result) == 0 {
|
||||
return nil
|
||||
}
|
||||
return result[:len(result)-1]
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
}
|
||||
if r == '?' || r == '*' {
|
||||
if index == 0 {
|
||||
return
|
||||
}
|
||||
if runes[index-1] != '\\' {
|
||||
if len(result) > 0 {
|
||||
return result[:len(result)-1]
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if (r == 'n' || r == 't' || r == 'a' || r == 'f' || r == 'r' || r == 'v' || r == 'x') && index > 0 && runes[index-1] == '\\' {
|
||||
switch r {
|
||||
case 'n':
|
||||
r = '\n'
|
||||
case 't':
|
||||
r = '\t'
|
||||
case 'f':
|
||||
r = '\f'
|
||||
case 'r':
|
||||
r = '\r'
|
||||
case 'v':
|
||||
r = '\v'
|
||||
case 'a':
|
||||
r = '\a'
|
||||
case 'x':
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if r == '\\' {
|
||||
continue
|
||||
}
|
||||
result = append(result, r)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
263
EdgeNode/internal/utils/re/regexp_test.go
Normal file
263
EdgeNode/internal/utils/re/regexp_test.go
Normal file
@@ -0,0 +1,263 @@
|
||||
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved.
|
||||
|
||||
package re_test
|
||||
|
||||
import (
|
||||
"github.com/TeaOSLab/EdgeCommon/pkg/serverconfigs/firewallconfigs"
|
||||
"github.com/TeaOSLab/EdgeNode/internal/utils/re"
|
||||
"github.com/iwind/TeaGo/assert"
|
||||
"regexp"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRegexp(t *testing.T) {
|
||||
for _, s := range []string{"(?i)(abc|efg)", "abc|efg", "abc(.+)"} {
|
||||
var reg = regexp.MustCompile(s)
|
||||
t.Log("===" + s + "===")
|
||||
t.Log(reg.LiteralPrefix())
|
||||
t.Log(reg.NumSubexp())
|
||||
t.Log(reg.SubexpNames())
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegexp_MatchString(t *testing.T) {
|
||||
var a = assert.NewAssertion(t)
|
||||
|
||||
{
|
||||
var r = re.MustCompile("abc")
|
||||
a.IsTrue(r.MatchString("abc"))
|
||||
a.IsFalse(r.MatchString("ab"))
|
||||
a.IsFalse(r.MatchString("ABC"))
|
||||
}
|
||||
|
||||
{
|
||||
var r = re.MustCompile("(?i)abc|def|ghi")
|
||||
a.IsTrue(r.MatchString("DEF"))
|
||||
a.IsFalse(r.MatchString("ab"))
|
||||
a.IsTrue(r.MatchString("ABC"))
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegexp_Sub(t *testing.T) {
|
||||
{
|
||||
reg := regexp.MustCompile(`(a|b|c)(e|f|g)`)
|
||||
for _, subName := range reg.SubexpNames() {
|
||||
t.Log(subName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegexp_ParseKeywords(t *testing.T) {
|
||||
var r = re.MustCompile("")
|
||||
|
||||
{
|
||||
var keywords = r.ParseKeywords(`\n\t\n\f\r\v\x123`)
|
||||
t.Log(keywords)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegexp_Special(t *testing.T) {
|
||||
for _, s := range []string{
|
||||
`\\s`,
|
||||
`\s\W`,
|
||||
`aaaa/\W`,
|
||||
`aaaa\/\W`,
|
||||
`aaaa\=\W`,
|
||||
`aaaa\\=\W`,
|
||||
`aaaa\\\=\W`,
|
||||
`aaaa\\\\=\W`,
|
||||
} {
|
||||
var es = testUnescape(t, s)
|
||||
t.Log(s, "=>", es)
|
||||
_, err := re.Compile(es)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegexp_Special2(t *testing.T) {
|
||||
r, err := re.Compile(testUnescape(t, `/api/ios/a
|
||||
/api/ios/b
|
||||
/api/ios/c
|
||||
/report`))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
t.Log(r.Keywords())
|
||||
}
|
||||
|
||||
func TestRegexp_ParseKeywords2(t *testing.T) {
|
||||
var a = assert.NewAssertion(t)
|
||||
|
||||
var r = re.MustCompile("")
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords("(abc)def"), []string{"abcdef"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords("(abc)|(?:def)"), []string{"abc", "def"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords("(abc)"), []string{"abc"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords("(abc|def|ghi)"), []string{"abc", "def", "ghi"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords("(?i:abc)"), []string{}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords(`\babc`), []string{"abc"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords(` \babc`), []string{" "}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords(`\babc\b`), []string{"abc"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords(`\b(abc)`), []string{"abc"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords("abc"), []string{"abc"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords("abc|efg|hij"), []string{"abc", "efg", "hij"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg|hij`), []string{"abc|efg", "hij"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg*|hij`), []string{"abc|ef", "hij"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg?|hij`), []string{"abc|ef", "hij"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg+|hij`), []string{"abc|ef", "hij"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg{2,10}|hij`), []string{"abc|ef", "hij"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg{0,10}|hij`), []string{"abc|ef", "hij"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg.+|hij`), []string{"abc|efg", "hij"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords("A(abc|bcd)"), []string{"Aabc", "Abcd"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords("^abc"), []string{"abc"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords("abc$"), []string{"abc"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords(`abc$`), []string{"abc"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords("abc\\d"), []string{"abc"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords("abc{0,4}"), []string{"ab"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords("{0,4}"), []string{}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords("{1,4}"), []string{}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords("中文|北京|上海|golang"), []string{"中文", "北京", "上海", "golang"}))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords(`(onmouseover|onmousemove|onmousedown|onmouseup|onerror|onload|onclick|ondblclick)\s*=`), strings.Split("onmouseover|onmousemove|onmousedown|onmouseup|onerror|onload|onclick|ondblclick", "|")))
|
||||
a.IsTrue(testCompareStrings(r.ParseKeywords(`/\*(!|\x00)`), []string{"/*"}))
|
||||
}
|
||||
|
||||
func TestRegexp_ParseKeywords3(t *testing.T) {
|
||||
var r = re.MustCompile("")
|
||||
|
||||
var policy = firewallconfigs.HTTPFirewallTemplate()
|
||||
for _, group := range policy.Inbound.Groups {
|
||||
for _, set := range group.Sets {
|
||||
for _, rule := range set.Rules {
|
||||
if rule.Operator == firewallconfigs.HTTPFirewallRuleOperatorMatch || rule.Operator == firewallconfigs.HTTPFirewallRuleOperatorNotMatch {
|
||||
t.Log(set.Name+":", rule.Value, "=>", r.ParseKeywords(rule.Value))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkRegexp_MatchString(b *testing.B) {
|
||||
var r = re.MustCompile("(?i)(onmouseover|onmousemove|onmousedown|onmouseup|onerror|onload|onclick|ondblclick|onkeydown|onkeyup|onkeypress)(\\s|%09|%0A|(\\+|%20))*(=|%3D)")
|
||||
b.ResetTimer()
|
||||
|
||||
//b.Log("keywords:", r.Keywords())
|
||||
for i := 0; i < b.N; i++ {
|
||||
r.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkRegexp_MatchString2(b *testing.B) {
|
||||
var r = regexp.MustCompile(`(?i)(onmouseover|onmousemove|onmousedown|onmouseup|onerror|onload|onclick|ondblclick|onkeydown|onkeyup|onkeypress)(\s|%09|%0A|(\+|%20))*(=|%3D)`)
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
r.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkRegexp_MatchString_CaseSensitive(b *testing.B) {
|
||||
var r = re.MustCompile("(abc|def|ghi)")
|
||||
b.Log("keywords:", r.Keywords())
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
r.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkRegexp_MatchString_CaseSensitive2(b *testing.B) {
|
||||
var r = regexp.MustCompile("(abc|def|ghi)")
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
r.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkRegexp_MatchString_VS_FindSubString1(b *testing.B) {
|
||||
var r = re.MustCompile("(?i)(chrome)")
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = r.Raw().MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkRegexp_MatchString_VS_FindSubString2(b *testing.B) {
|
||||
var r = re.MustCompile("(?i)(chrome)")
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = r.Raw().FindStringSubmatch("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSplitAndJoin(t *testing.T) {
|
||||
var pieces = strings.Split(`/api/ios/a
|
||||
/api/ios/b
|
||||
/api/ios/c
|
||||
/report`, "/")
|
||||
t.Log(strings.Join(pieces, `(/|%2F)`))
|
||||
}
|
||||
|
||||
func testCompareStrings(s1 []string, s2 []string) bool {
|
||||
if len(s1) != len(s2) {
|
||||
return false
|
||||
}
|
||||
for index, s := range s1 {
|
||||
if s != s2[index] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func testUnescape(t *testing.T, v string) string {
|
||||
// replace urlencoded characters
|
||||
var unescapeChars = [][2]string{
|
||||
{`\s`, `(\s|%09|%0A|\+)`},
|
||||
{`\(`, `(\(|%28)`},
|
||||
{`=`, `(=|%3D)`},
|
||||
{`<`, `(<|%3C)`},
|
||||
{`\*`, `(\*|%2A)`},
|
||||
{`\\`, `(\\|%2F)`},
|
||||
{`!`, `(!|%21)`},
|
||||
{`/`, `(/|%2F)`},
|
||||
{`;`, `(;|%3B)`},
|
||||
{`\+`, `(\+|%20)`},
|
||||
}
|
||||
|
||||
for _, c := range unescapeChars {
|
||||
if !strings.Contains(v, c[0]) {
|
||||
continue
|
||||
}
|
||||
var pieces = strings.Split(v, c[0])
|
||||
|
||||
// 修复piece中错误的\
|
||||
for pieceIndex, piece := range pieces {
|
||||
var l = len(piece)
|
||||
if l == 0 {
|
||||
continue
|
||||
}
|
||||
if piece[l-1] != '\\' {
|
||||
continue
|
||||
}
|
||||
|
||||
// 计算\的数量
|
||||
var countBackSlashes = 0
|
||||
for i := l - 1; i >= 0; i-- {
|
||||
if piece[i] == '\\' {
|
||||
countBackSlashes++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
if countBackSlashes%2 == 1 {
|
||||
// 去掉最后一个
|
||||
pieces[pieceIndex] = piece[:len(piece)-1]
|
||||
}
|
||||
}
|
||||
|
||||
v = strings.Join(pieces, c[1])
|
||||
}
|
||||
|
||||
return v
|
||||
}
|
||||
74
EdgeNode/internal/utils/re/rune_tree.go
Normal file
74
EdgeNode/internal/utils/re/rune_tree.go
Normal file
@@ -0,0 +1,74 @@
|
||||
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved.
|
||||
|
||||
package re
|
||||
|
||||
type RuneMap map[rune]*RuneTree
|
||||
|
||||
func (this RuneMap) Lookup(s string, caseInsensitive bool) bool {
|
||||
return this.lookup([]rune(s), caseInsensitive, 0)
|
||||
}
|
||||
|
||||
func (this RuneMap) lookup(runes []rune, caseInsensitive bool, depth int) bool {
|
||||
if len(runes) == 0 {
|
||||
return false
|
||||
}
|
||||
for i, r := range runes {
|
||||
tree, ok := this[r]
|
||||
if !ok {
|
||||
if caseInsensitive {
|
||||
if r >= 'a' && r <= 'z' {
|
||||
r -= 32
|
||||
tree, ok = this[r]
|
||||
} else if r >= 'A' && r <= 'Z' {
|
||||
r += 32
|
||||
tree, ok = this[r]
|
||||
}
|
||||
}
|
||||
if !ok {
|
||||
if depth > 0 {
|
||||
return false
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
if tree.IsEnd {
|
||||
return true
|
||||
}
|
||||
b := tree.Children.lookup(runes[i+1:], caseInsensitive, depth+1)
|
||||
if b {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type RuneTree struct {
|
||||
Children RuneMap
|
||||
IsEnd bool
|
||||
}
|
||||
|
||||
func NewRuneTree(list []string) RuneMap {
|
||||
var rootMap = RuneMap{}
|
||||
for _, s := range list {
|
||||
if len(s) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
var lastMap = rootMap
|
||||
var runes = []rune(s)
|
||||
for index, r := range runes {
|
||||
tree, ok := lastMap[r]
|
||||
if !ok {
|
||||
tree = &RuneTree{
|
||||
Children: RuneMap{},
|
||||
}
|
||||
lastMap[r] = tree
|
||||
}
|
||||
if index == len(runes)-1 {
|
||||
tree.IsEnd = true
|
||||
}
|
||||
lastMap = tree.Children
|
||||
}
|
||||
}
|
||||
return rootMap
|
||||
}
|
||||
52
EdgeNode/internal/utils/re/rune_tree_test.go
Normal file
52
EdgeNode/internal/utils/re/rune_tree_test.go
Normal file
@@ -0,0 +1,52 @@
|
||||
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved.
|
||||
|
||||
package re_test
|
||||
|
||||
import (
|
||||
"github.com/TeaOSLab/EdgeNode/internal/utils/re"
|
||||
"github.com/iwind/TeaGo/assert"
|
||||
"regexp"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestNewRuneTree(t *testing.T) {
|
||||
var a = assert.NewAssertion(t)
|
||||
|
||||
var tree = re.NewRuneTree([]string{"abc", "abd", "def", "GHI", "中国", "@"})
|
||||
a.IsTrue(tree.Lookup("ABC", true))
|
||||
a.IsTrue(tree.Lookup("ABC1", true))
|
||||
a.IsTrue(tree.Lookup("1ABC", true))
|
||||
a.IsTrue(tree.Lookup("def", true))
|
||||
a.IsTrue(tree.Lookup("ghI", true))
|
||||
a.IsFalse(tree.Lookup("d ef", true))
|
||||
a.IsFalse(tree.Lookup("de", true))
|
||||
a.IsFalse(tree.Lookup("de f", true))
|
||||
a.IsTrue(tree.Lookup("我是中国人", true))
|
||||
a.IsTrue(tree.Lookup("iwind.liu@gmail.com", true))
|
||||
}
|
||||
|
||||
func TestNewRuneTree2(t *testing.T) {
|
||||
var tree = re.NewRuneTree([]string{"abc", "abd", "def", "GHI", "中国", "@"})
|
||||
tree.Lookup("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36", true)
|
||||
}
|
||||
|
||||
func BenchmarkRuneMap_Lookup(b *testing.B) {
|
||||
var tree = re.NewRuneTree([]string{"abc", "abd", "def", "ghi", "中国"})
|
||||
for i := 0; i < b.N; i++ {
|
||||
tree.Lookup("我来自中国", true)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkRuneMap_Lookup2_NOT_FOUND(b *testing.B) {
|
||||
var tree = re.NewRuneTree([]string{"abc", "abd", "cde", "GHI"})
|
||||
for i := 0; i < b.N; i++ {
|
||||
tree.Lookup("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36", true)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkRune_Regexp_FOUND(b *testing.B) {
|
||||
var reg = regexp.MustCompile("(?i)abc|abd|cde|GHI")
|
||||
for i := 0; i < b.N; i++ {
|
||||
reg.MatchString("HELLO WORLD ABC 123 456 abc HELLO WORLD HELLO WORLD ABC 123 456 abc HELLO WORLD HELLO WORLD ABC 123 456 abc HELLO WORLD")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user