Initial commit (code only without large binaries)

This commit is contained in:
robin
2026-02-15 18:58:44 +08:00
commit 35df75498f
9442 changed files with 1495866 additions and 0 deletions

View File

@@ -0,0 +1,298 @@
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved.
package re
import (
"github.com/iwind/TeaGo/types"
"regexp"
"regexp/syntax"
"strings"
"sync/atomic"
)
var prefixReg = regexp.MustCompile(`^\(\?([\w\s]+)\)`) // (?x)
var braceZeroReg = regexp.MustCompile(`^{\s*0*\s*}`) // {0}
var braceZeroReg2 = regexp.MustCompile(`^{\s*0*\s*,`) // {0, x}
var lastId uint64
type Regexp struct {
exp string
rawRegexp *regexp.Regexp
isStrict bool
isCaseInsensitive bool
keywords []string
keywordsMap RuneMap
id uint64
idString string
}
func MustCompile(exp string) *Regexp {
var reg = &Regexp{
exp: exp,
rawRegexp: regexp.MustCompile(exp),
}
reg.init()
return reg
}
func Compile(exp string) (*Regexp, error) {
reg, err := regexp.Compile(exp)
if err != nil {
return nil, err
}
return NewRegexp(reg), nil
}
func NewRegexp(rawRegexp *regexp.Regexp) *Regexp {
var reg = &Regexp{
exp: rawRegexp.String(),
rawRegexp: rawRegexp,
}
reg.init()
return reg
}
func (this *Regexp) init() {
this.id = atomic.AddUint64(&lastId, 1)
this.idString = "re:" + types.String(this.id)
if len(this.exp) == 0 {
return
}
var exp = strings.TrimSpace(this.exp)
// 去掉前面的(?...)
if prefixReg.MatchString(exp) {
var matches = prefixReg.FindStringSubmatch(exp)
var modifiers = matches[1]
if strings.Contains(modifiers, "i") {
this.isCaseInsensitive = true
}
exp = exp[len(matches[0]):]
}
var keywords = this.ParseKeywords(exp)
var filteredKeywords = []string{}
var minLength = 1
var isValid = true
for _, keyword := range keywords {
if len(keyword) <= minLength {
isValid = false
break
}
}
if isValid {
filteredKeywords = keywords
}
this.keywords = filteredKeywords
if len(filteredKeywords) > 0 {
this.keywordsMap = NewRuneTree(filteredKeywords)
}
}
func (this *Regexp) Keywords() []string {
return this.keywords
}
func (this *Regexp) Raw() *regexp.Regexp {
return this.rawRegexp
}
func (this *Regexp) IsCaseInsensitive() bool {
return this.isCaseInsensitive
}
func (this *Regexp) MatchString(s string) bool {
if this.keywordsMap != nil {
var b = this.keywordsMap.Lookup(s, this.isCaseInsensitive)
if !b {
return false
}
if this.isStrict {
return true
}
}
return this.rawRegexp.MatchString(s)
}
func (this *Regexp) Match(s []byte) bool {
if this.keywordsMap != nil {
var b = this.keywordsMap.Lookup(string(s), this.isCaseInsensitive)
if !b {
return false
}
if this.isStrict {
return true
}
}
return this.rawRegexp.Match(s)
}
func (this *Regexp) FindStringSubmatch(s string) []string {
return this.rawRegexp.FindStringSubmatch(s)
}
// ParseKeywords 提取表达式中的关键词
func (this *Regexp) ParseKeywords(exp string) (keywords []string) {
if len(exp) == 0 {
return nil
}
reg, err := syntax.Parse(exp, syntax.Perl)
if err != nil {
return nil
}
if len(reg.Sub) == 0 {
var keywordRunes = this.parseKeyword(reg.String())
if len(keywordRunes) > 0 {
keywords = append(keywords, string(keywordRunes))
}
return
}
if len(reg.Sub) == 1 {
if reg.Op == syntax.OpStar || reg.Op == syntax.OpQuest || reg.Op == syntax.OpRepeat {
return nil
}
return this.ParseKeywords(reg.Sub[0].String())
}
const maxComposedKeywords = 32
switch reg.Op {
case syntax.OpConcat:
var prevKeywords = []string{}
var isStarted bool
for _, sub := range reg.Sub {
if sub.String() == `\b` {
if isStarted {
break
}
continue
}
if sub.Op != syntax.OpLiteral && sub.Op != syntax.OpCapture && sub.Op != syntax.OpAlternate {
if isStarted {
break
}
continue
}
var subKeywords = this.ParseKeywords(sub.String())
if len(subKeywords) > 0 {
if !isStarted {
prevKeywords = subKeywords
isStarted = true
} else {
for _, prevKeyword := range prevKeywords {
for _, subKeyword := range subKeywords {
keywords = append(keywords, prevKeyword+subKeyword)
// 限制不能超出最大关键词
if len(keywords) > maxComposedKeywords {
return nil
}
}
}
prevKeywords = keywords
}
} else {
break
}
}
if len(prevKeywords) > 0 && len(keywords) == 0 {
keywords = prevKeywords
}
case syntax.OpAlternate:
for _, sub := range reg.Sub {
var subKeywords = this.ParseKeywords(sub.String())
if len(subKeywords) == 0 {
keywords = nil
return
}
keywords = append(keywords, subKeywords...)
}
}
return
}
func (this *Regexp) IdString() string {
return this.idString
}
func (this *Regexp) parseKeyword(subExp string) (result []rune) {
if len(subExp) == 0 {
return nil
}
// 去除开始和结尾的()
if subExp[0] == '(' && subExp[len(subExp)-1] == ')' {
subExp = subExp[1 : len(subExp)-1]
if len(subExp) == 0 {
return
}
}
var runes = []rune(subExp)
for index, r := range runes {
if r == '[' || r == '{' || r == '.' || r == '+' || r == '$' {
if index == 0 {
return
}
if runes[index-1] != '\\' {
if r == '{' && (braceZeroReg.MatchString(subExp[index:])) || braceZeroReg2.MatchString(subExp[index:]) { // r {0, ...}
if len(result) == 0 {
return nil
}
return result[:len(result)-1]
}
return
}
}
if r == '?' || r == '*' {
if index == 0 {
return
}
if runes[index-1] != '\\' {
if len(result) > 0 {
return result[:len(result)-1]
}
return
}
}
if (r == 'n' || r == 't' || r == 'a' || r == 'f' || r == 'r' || r == 'v' || r == 'x') && index > 0 && runes[index-1] == '\\' {
switch r {
case 'n':
r = '\n'
case 't':
r = '\t'
case 'f':
r = '\f'
case 'r':
r = '\r'
case 'v':
r = '\v'
case 'a':
r = '\a'
case 'x':
return
}
}
if r == '\\' {
continue
}
result = append(result, r)
}
return
}

View File

@@ -0,0 +1,263 @@
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved.
package re_test
import (
"github.com/TeaOSLab/EdgeCommon/pkg/serverconfigs/firewallconfigs"
"github.com/TeaOSLab/EdgeNode/internal/utils/re"
"github.com/iwind/TeaGo/assert"
"regexp"
"strings"
"testing"
)
func TestRegexp(t *testing.T) {
for _, s := range []string{"(?i)(abc|efg)", "abc|efg", "abc(.+)"} {
var reg = regexp.MustCompile(s)
t.Log("===" + s + "===")
t.Log(reg.LiteralPrefix())
t.Log(reg.NumSubexp())
t.Log(reg.SubexpNames())
}
}
func TestRegexp_MatchString(t *testing.T) {
var a = assert.NewAssertion(t)
{
var r = re.MustCompile("abc")
a.IsTrue(r.MatchString("abc"))
a.IsFalse(r.MatchString("ab"))
a.IsFalse(r.MatchString("ABC"))
}
{
var r = re.MustCompile("(?i)abc|def|ghi")
a.IsTrue(r.MatchString("DEF"))
a.IsFalse(r.MatchString("ab"))
a.IsTrue(r.MatchString("ABC"))
}
}
func TestRegexp_Sub(t *testing.T) {
{
reg := regexp.MustCompile(`(a|b|c)(e|f|g)`)
for _, subName := range reg.SubexpNames() {
t.Log(subName)
}
}
}
func TestRegexp_ParseKeywords(t *testing.T) {
var r = re.MustCompile("")
{
var keywords = r.ParseKeywords(`\n\t\n\f\r\v\x123`)
t.Log(keywords)
}
}
func TestRegexp_Special(t *testing.T) {
for _, s := range []string{
`\\s`,
`\s\W`,
`aaaa/\W`,
`aaaa\/\W`,
`aaaa\=\W`,
`aaaa\\=\W`,
`aaaa\\\=\W`,
`aaaa\\\\=\W`,
} {
var es = testUnescape(t, s)
t.Log(s, "=>", es)
_, err := re.Compile(es)
if err != nil {
t.Fatal(err)
}
}
}
func TestRegexp_Special2(t *testing.T) {
r, err := re.Compile(testUnescape(t, `/api/ios/a
/api/ios/b
/api/ios/c
/report`))
if err != nil {
t.Fatal(err)
}
t.Log(r.Keywords())
}
func TestRegexp_ParseKeywords2(t *testing.T) {
var a = assert.NewAssertion(t)
var r = re.MustCompile("")
a.IsTrue(testCompareStrings(r.ParseKeywords("(abc)def"), []string{"abcdef"}))
a.IsTrue(testCompareStrings(r.ParseKeywords("(abc)|(?:def)"), []string{"abc", "def"}))
a.IsTrue(testCompareStrings(r.ParseKeywords("(abc)"), []string{"abc"}))
a.IsTrue(testCompareStrings(r.ParseKeywords("(abc|def|ghi)"), []string{"abc", "def", "ghi"}))
a.IsTrue(testCompareStrings(r.ParseKeywords("(?i:abc)"), []string{}))
a.IsTrue(testCompareStrings(r.ParseKeywords(`\babc`), []string{"abc"}))
a.IsTrue(testCompareStrings(r.ParseKeywords(` \babc`), []string{" "}))
a.IsTrue(testCompareStrings(r.ParseKeywords(`\babc\b`), []string{"abc"}))
a.IsTrue(testCompareStrings(r.ParseKeywords(`\b(abc)`), []string{"abc"}))
a.IsTrue(testCompareStrings(r.ParseKeywords("abc"), []string{"abc"}))
a.IsTrue(testCompareStrings(r.ParseKeywords("abc|efg|hij"), []string{"abc", "efg", "hij"}))
a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg|hij`), []string{"abc|efg", "hij"}))
a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg*|hij`), []string{"abc|ef", "hij"}))
a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg?|hij`), []string{"abc|ef", "hij"}))
a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg+|hij`), []string{"abc|ef", "hij"}))
a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg{2,10}|hij`), []string{"abc|ef", "hij"}))
a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg{0,10}|hij`), []string{"abc|ef", "hij"}))
a.IsTrue(testCompareStrings(r.ParseKeywords(`abc\|efg.+|hij`), []string{"abc|efg", "hij"}))
a.IsTrue(testCompareStrings(r.ParseKeywords("A(abc|bcd)"), []string{"Aabc", "Abcd"}))
a.IsTrue(testCompareStrings(r.ParseKeywords("^abc"), []string{"abc"}))
a.IsTrue(testCompareStrings(r.ParseKeywords("abc$"), []string{"abc"}))
a.IsTrue(testCompareStrings(r.ParseKeywords(`abc$`), []string{"abc"}))
a.IsTrue(testCompareStrings(r.ParseKeywords("abc\\d"), []string{"abc"}))
a.IsTrue(testCompareStrings(r.ParseKeywords("abc{0,4}"), []string{"ab"}))
a.IsTrue(testCompareStrings(r.ParseKeywords("{0,4}"), []string{}))
a.IsTrue(testCompareStrings(r.ParseKeywords("{1,4}"), []string{}))
a.IsTrue(testCompareStrings(r.ParseKeywords("中文|北京|上海|golang"), []string{"中文", "北京", "上海", "golang"}))
a.IsTrue(testCompareStrings(r.ParseKeywords(`(onmouseover|onmousemove|onmousedown|onmouseup|onerror|onload|onclick|ondblclick)\s*=`), strings.Split("onmouseover|onmousemove|onmousedown|onmouseup|onerror|onload|onclick|ondblclick", "|")))
a.IsTrue(testCompareStrings(r.ParseKeywords(`/\*(!|\x00)`), []string{"/*"}))
}
func TestRegexp_ParseKeywords3(t *testing.T) {
var r = re.MustCompile("")
var policy = firewallconfigs.HTTPFirewallTemplate()
for _, group := range policy.Inbound.Groups {
for _, set := range group.Sets {
for _, rule := range set.Rules {
if rule.Operator == firewallconfigs.HTTPFirewallRuleOperatorMatch || rule.Operator == firewallconfigs.HTTPFirewallRuleOperatorNotMatch {
t.Log(set.Name+":", rule.Value, "=>", r.ParseKeywords(rule.Value))
}
}
}
}
}
func BenchmarkRegexp_MatchString(b *testing.B) {
var r = re.MustCompile("(?i)(onmouseover|onmousemove|onmousedown|onmouseup|onerror|onload|onclick|ondblclick|onkeydown|onkeyup|onkeypress)(\\s|%09|%0A|(\\+|%20))*(=|%3D)")
b.ResetTimer()
//b.Log("keywords:", r.Keywords())
for i := 0; i < b.N; i++ {
r.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")
}
}
func BenchmarkRegexp_MatchString2(b *testing.B) {
var r = regexp.MustCompile(`(?i)(onmouseover|onmousemove|onmousedown|onmouseup|onerror|onload|onclick|ondblclick|onkeydown|onkeyup|onkeypress)(\s|%09|%0A|(\+|%20))*(=|%3D)`)
b.ResetTimer()
for i := 0; i < b.N; i++ {
r.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")
}
}
func BenchmarkRegexp_MatchString_CaseSensitive(b *testing.B) {
var r = re.MustCompile("(abc|def|ghi)")
b.Log("keywords:", r.Keywords())
b.ResetTimer()
for i := 0; i < b.N; i++ {
r.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")
}
}
func BenchmarkRegexp_MatchString_CaseSensitive2(b *testing.B) {
var r = regexp.MustCompile("(abc|def|ghi)")
b.ResetTimer()
for i := 0; i < b.N; i++ {
r.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")
}
}
func BenchmarkRegexp_MatchString_VS_FindSubString1(b *testing.B) {
var r = re.MustCompile("(?i)(chrome)")
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = r.Raw().MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")
}
}
func BenchmarkRegexp_MatchString_VS_FindSubString2(b *testing.B) {
var r = re.MustCompile("(?i)(chrome)")
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = r.Raw().FindStringSubmatch("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")
}
}
func TestSplitAndJoin(t *testing.T) {
var pieces = strings.Split(`/api/ios/a
/api/ios/b
/api/ios/c
/report`, "/")
t.Log(strings.Join(pieces, `(/|%2F)`))
}
func testCompareStrings(s1 []string, s2 []string) bool {
if len(s1) != len(s2) {
return false
}
for index, s := range s1 {
if s != s2[index] {
return false
}
}
return true
}
func testUnescape(t *testing.T, v string) string {
// replace urlencoded characters
var unescapeChars = [][2]string{
{`\s`, `(\s|%09|%0A|\+)`},
{`\(`, `(\(|%28)`},
{`=`, `(=|%3D)`},
{`<`, `(<|%3C)`},
{`\*`, `(\*|%2A)`},
{`\\`, `(\\|%2F)`},
{`!`, `(!|%21)`},
{`/`, `(/|%2F)`},
{`;`, `(;|%3B)`},
{`\+`, `(\+|%20)`},
}
for _, c := range unescapeChars {
if !strings.Contains(v, c[0]) {
continue
}
var pieces = strings.Split(v, c[0])
// 修复piece中错误的\
for pieceIndex, piece := range pieces {
var l = len(piece)
if l == 0 {
continue
}
if piece[l-1] != '\\' {
continue
}
// 计算\的数量
var countBackSlashes = 0
for i := l - 1; i >= 0; i-- {
if piece[i] == '\\' {
countBackSlashes++
} else {
break
}
}
if countBackSlashes%2 == 1 {
// 去掉最后一个
pieces[pieceIndex] = piece[:len(piece)-1]
}
}
v = strings.Join(pieces, c[1])
}
return v
}

View File

@@ -0,0 +1,74 @@
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved.
package re
type RuneMap map[rune]*RuneTree
func (this RuneMap) Lookup(s string, caseInsensitive bool) bool {
return this.lookup([]rune(s), caseInsensitive, 0)
}
func (this RuneMap) lookup(runes []rune, caseInsensitive bool, depth int) bool {
if len(runes) == 0 {
return false
}
for i, r := range runes {
tree, ok := this[r]
if !ok {
if caseInsensitive {
if r >= 'a' && r <= 'z' {
r -= 32
tree, ok = this[r]
} else if r >= 'A' && r <= 'Z' {
r += 32
tree, ok = this[r]
}
}
if !ok {
if depth > 0 {
return false
}
continue
}
}
if tree.IsEnd {
return true
}
b := tree.Children.lookup(runes[i+1:], caseInsensitive, depth+1)
if b {
return true
}
}
return false
}
type RuneTree struct {
Children RuneMap
IsEnd bool
}
func NewRuneTree(list []string) RuneMap {
var rootMap = RuneMap{}
for _, s := range list {
if len(s) == 0 {
continue
}
var lastMap = rootMap
var runes = []rune(s)
for index, r := range runes {
tree, ok := lastMap[r]
if !ok {
tree = &RuneTree{
Children: RuneMap{},
}
lastMap[r] = tree
}
if index == len(runes)-1 {
tree.IsEnd = true
}
lastMap = tree.Children
}
}
return rootMap
}

View File

@@ -0,0 +1,52 @@
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved.
package re_test
import (
"github.com/TeaOSLab/EdgeNode/internal/utils/re"
"github.com/iwind/TeaGo/assert"
"regexp"
"testing"
)
func TestNewRuneTree(t *testing.T) {
var a = assert.NewAssertion(t)
var tree = re.NewRuneTree([]string{"abc", "abd", "def", "GHI", "中国", "@"})
a.IsTrue(tree.Lookup("ABC", true))
a.IsTrue(tree.Lookup("ABC1", true))
a.IsTrue(tree.Lookup("1ABC", true))
a.IsTrue(tree.Lookup("def", true))
a.IsTrue(tree.Lookup("ghI", true))
a.IsFalse(tree.Lookup("d ef", true))
a.IsFalse(tree.Lookup("de", true))
a.IsFalse(tree.Lookup("de f", true))
a.IsTrue(tree.Lookup("我是中国人", true))
a.IsTrue(tree.Lookup("iwind.liu@gmail.com", true))
}
func TestNewRuneTree2(t *testing.T) {
var tree = re.NewRuneTree([]string{"abc", "abd", "def", "GHI", "中国", "@"})
tree.Lookup("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36", true)
}
func BenchmarkRuneMap_Lookup(b *testing.B) {
var tree = re.NewRuneTree([]string{"abc", "abd", "def", "ghi", "中国"})
for i := 0; i < b.N; i++ {
tree.Lookup("我来自中国", true)
}
}
func BenchmarkRuneMap_Lookup2_NOT_FOUND(b *testing.B) {
var tree = re.NewRuneTree([]string{"abc", "abd", "cde", "GHI"})
for i := 0; i < b.N; i++ {
tree.Lookup("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36", true)
}
}
func BenchmarkRune_Regexp_FOUND(b *testing.B) {
var reg = regexp.MustCompile("(?i)abc|abd|cde|GHI")
for i := 0; i < b.N; i++ {
reg.MatchString("HELLO WORLD ABC 123 456 abc HELLO WORLD HELLO WORLD ABC 123 456 abc HELLO WORLD HELLO WORLD ABC 123 456 abc HELLO WORLD")
}
}