revise digest add more complex config
This commit is contained in:
parent
a78815f3d3
commit
ee9ba3fcf0
|
@ -22,6 +22,24 @@ var more = regexp.MustCompile("<!--more(.*?)?-->")
|
||||||
|
|
||||||
var removeWpBlock = regexp.MustCompile("<!-- /?wp:.*-->")
|
var removeWpBlock = regexp.MustCompile("<!-- /?wp:.*-->")
|
||||||
|
|
||||||
|
type DigestConfig struct {
|
||||||
|
DigestWordCount int `yaml:"digestWordCount"`
|
||||||
|
DigestAllowTag string `yaml:"digestAllowTag"`
|
||||||
|
DigestRegex string `yaml:"digestRegex"`
|
||||||
|
DigestTagOccupyNum []struct {
|
||||||
|
Tag string `yaml:"tag"`
|
||||||
|
Num int `yaml:"num"`
|
||||||
|
ChuckOvered bool `yaml:"chuckOvered"`
|
||||||
|
EscapeCharacter []struct {
|
||||||
|
Tags string `yaml:"tags"`
|
||||||
|
Character []string `yaml:"character"`
|
||||||
|
Num int `yaml:"num"`
|
||||||
|
ChuckOvered bool `yaml:"chuckOvered"`
|
||||||
|
} `yaml:"escapeCharacter"`
|
||||||
|
} `yaml:"digestTagOccupyNum"`
|
||||||
|
specialSolve map[string]digest.SpecialSolveConf
|
||||||
|
}
|
||||||
|
|
||||||
var digestConfig *safety.Var[DigestConfig]
|
var digestConfig *safety.Var[DigestConfig]
|
||||||
|
|
||||||
func InitDigestCache() {
|
func InitDigestCache() {
|
||||||
|
@ -37,15 +55,57 @@ func InitDigestCache() {
|
||||||
c.DigestAllowTag = config.GetConfig().DigestAllowTag
|
c.DigestAllowTag = config.GetConfig().DigestAllowTag
|
||||||
return c
|
return c
|
||||||
}
|
}
|
||||||
if len(c.DigestTagOccupyNum) > 0 {
|
if c.DigestRegex != "" {
|
||||||
c.tagNum = map[string]int{}
|
digest.SetQutos(c.DigestRegex)
|
||||||
for _, item := range c.DigestTagOccupyNum {
|
}
|
||||||
tags := strings.Split(item.Tag, "<")
|
if len(c.DigestTagOccupyNum) <= 1 {
|
||||||
for _, tag := range tags {
|
return c
|
||||||
if tag == "" {
|
}
|
||||||
continue
|
c.specialSolve = map[string]digest.SpecialSolveConf{}
|
||||||
|
for _, item := range c.DigestTagOccupyNum {
|
||||||
|
tags := strings.Split(strings.ReplaceAll(item.Tag, " ", ""), "<")
|
||||||
|
for _, tag := range tags {
|
||||||
|
if tag == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
tag = str.Join("<", tag)
|
||||||
|
var ec map[rune]digest.SpecialSolve
|
||||||
|
var specialTags map[string]digest.SpecialSolve
|
||||||
|
if len(item.EscapeCharacter) > 0 {
|
||||||
|
ec = make(map[rune]digest.SpecialSolve)
|
||||||
|
for _, esc := range item.EscapeCharacter {
|
||||||
|
for _, i := range esc.Character {
|
||||||
|
s := []rune(i)
|
||||||
|
if len(s) == 1 {
|
||||||
|
ec[s[0]] = digest.SpecialSolve{
|
||||||
|
Num: esc.Num,
|
||||||
|
ChuckOvered: esc.ChuckOvered,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if esc.Tags == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
tagss := strings.Split(strings.ReplaceAll(esc.Tags, " ", ""), "<")
|
||||||
|
specialTags = make(map[string]digest.SpecialSolve)
|
||||||
|
for _, t := range tagss {
|
||||||
|
if t == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
t = str.Join("<", t)
|
||||||
|
specialTags[t] = digest.SpecialSolve{
|
||||||
|
Num: esc.Num,
|
||||||
|
ChuckOvered: esc.ChuckOvered,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
c.tagNum[str.Join("<", tag)] = item.Num
|
|
||||||
|
}
|
||||||
|
c.specialSolve[tag] = digest.SpecialSolveConf{
|
||||||
|
Num: item.Num,
|
||||||
|
ChuckOvered: item.ChuckOvered,
|
||||||
|
EscapeCharacter: ec,
|
||||||
|
Tags: specialTags,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -53,16 +113,6 @@ func InitDigestCache() {
|
||||||
}, "digestConfig")
|
}, "digestConfig")
|
||||||
}
|
}
|
||||||
|
|
||||||
type DigestConfig struct {
|
|
||||||
DigestWordCount int `yaml:"digestWordCount"`
|
|
||||||
DigestAllowTag string `yaml:"digestAllowTag"`
|
|
||||||
DigestTagOccupyNum []struct {
|
|
||||||
Tag string `yaml:"tag"`
|
|
||||||
Num int `yaml:"num"`
|
|
||||||
} `yaml:"digestTagOccupyNum"`
|
|
||||||
tagNum map[string]int
|
|
||||||
}
|
|
||||||
|
|
||||||
func RemoveWpBlock(s string) string {
|
func RemoveWpBlock(s string) string {
|
||||||
return removeWpBlock.ReplaceAllString(s, "")
|
return removeWpBlock.ReplaceAllString(s, "")
|
||||||
}
|
}
|
||||||
|
@ -94,7 +144,12 @@ func Digests(content string, id uint64, limit int, fn func(id uint64, content, c
|
||||||
if length <= limit {
|
if length <= limit {
|
||||||
return content
|
return content
|
||||||
}
|
}
|
||||||
content, closeTag = digest.Html(content, limit, c.tagNum)
|
if len(c.specialSolve) > 0 {
|
||||||
|
content, closeTag = digest.CustomizeHtml(content, limit, c.specialSolve)
|
||||||
|
} else {
|
||||||
|
content, closeTag = digest.Html(content, limit)
|
||||||
|
}
|
||||||
|
|
||||||
if fn == nil {
|
if fn == nil {
|
||||||
return PostsMore(id, content, closeTag)
|
return PostsMore(id, content, closeTag)
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,13 +67,46 @@ digestWordCount: 300
|
||||||
# 摘要允许的标签 默认为<a><b><blockquote><br><cite><code><dd><del><div><dl><dt><em><h1><h2><h3><h4><h5><h6><i><img><li><ol><p><pre><span><strong><ul>
|
# 摘要允许的标签 默认为<a><b><blockquote><br><cite><code><dd><del><div><dl><dt><em><h1><h2><h3><h4><h5><h6><i><img><li><ol><p><pre><span><strong><ul>
|
||||||
digestTag: "<a><b><blockquote><br><cite><code><dd><del><div><dl><dt><em><h1><h2><h3><h4><h5><h6><i><img><li><ol><p><pre><span><strong><ul>"
|
digestTag: "<a><b><blockquote><br><cite><code><dd><del><div><dl><dt><em><h1><h2><h3><h4><h5><h6><i><img><li><ol><p><pre><span><strong><ul>"
|
||||||
|
|
||||||
# 可以设置每个标签占用的字数,默认都为0 set tag occupied num, default every tag occupied 0
|
# 设置html转义实体正则 the html coded character set regex file: plugin/digest/digest.go:12
|
||||||
digestTagOccupyNum: [
|
#digestRegex: ""*|&*|<*|>*| *|[*|]*| *"
|
||||||
{
|
|
||||||
tag: "<img><table>",
|
# 可以设置每个标签或者转义字符占用的字数,默认都为0 set tag or escape character occupied num, default every tag occupied 0
|
||||||
num: 2
|
#digestTagOccupyNum: [
|
||||||
},
|
# {
|
||||||
]
|
# tag: "<top>", # 最外层固定tag outermost immovable tag
|
||||||
|
# num: 0,
|
||||||
|
# chuckOvered: false,
|
||||||
|
# escapeCharacter: [
|
||||||
|
# {
|
||||||
|
# character: [ "\n","\r","\t" ],
|
||||||
|
# num: 0
|
||||||
|
# },
|
||||||
|
# ]
|
||||||
|
# },{
|
||||||
|
# tag: "<img>",
|
||||||
|
# num: 1,
|
||||||
|
# chuckOvered: false
|
||||||
|
# },
|
||||||
|
# {
|
||||||
|
# tag: "<pre><code>",
|
||||||
|
# num: 0,
|
||||||
|
# escapeCharacter: [
|
||||||
|
# {
|
||||||
|
# character: ["\t"],
|
||||||
|
# num: 4,
|
||||||
|
# chuckOvered: false,
|
||||||
|
# },
|
||||||
|
# {
|
||||||
|
# character: ["\n","\r"],
|
||||||
|
# num: 1
|
||||||
|
# },
|
||||||
|
# {
|
||||||
|
# tags: "<br>",
|
||||||
|
# num: 1
|
||||||
|
# },
|
||||||
|
# ]
|
||||||
|
# },
|
||||||
|
#]
|
||||||
|
|
||||||
# 到达指定并发请求数时随机sleep
|
# 到达指定并发请求数时随机sleep
|
||||||
maxRequestSleepNum: 100
|
maxRequestSleepNum: 100
|
||||||
|
|
|
@ -133,6 +133,13 @@ func StripTagsX(str, allowable string) string {
|
||||||
|
|
||||||
var selfCloseTags = map[string]string{"area": "", "base": "", "basefont": "", "br": "", "col": "", "command": "", "fecolormatrix": "", "embed": "", "frame": "", "hr": "", "img": "", "input": "", "isindex": "", "link": "", "fecomposite": "", "fefuncr": "", "fefuncg": "", "fefuncb": "", "fefunca": "", "meta": "", "param": "", "!doctype": "", "source": "", "track": "", "wbr": ""}
|
var selfCloseTags = map[string]string{"area": "", "base": "", "basefont": "", "br": "", "col": "", "command": "", "fecolormatrix": "", "embed": "", "frame": "", "hr": "", "img": "", "input": "", "isindex": "", "link": "", "fecomposite": "", "fefuncr": "", "fefuncg": "", "fefuncb": "", "fefunca": "", "meta": "", "param": "", "!doctype": "", "source": "", "track": "", "wbr": ""}
|
||||||
|
|
||||||
|
func GetSelfCloseTags() map[string]string {
|
||||||
|
return selfCloseTags
|
||||||
|
}
|
||||||
|
func SetSelfCloseTags(m map[string]string) {
|
||||||
|
selfCloseTags = m
|
||||||
|
}
|
||||||
|
|
||||||
func CloseTag(str string) string {
|
func CloseTag(str string) string {
|
||||||
tags := tag.FindAllString(str, -1)
|
tags := tag.FindAllString(str, -1)
|
||||||
if len(tags) < 1 {
|
if len(tags) < 1 {
|
||||||
|
|
|
@ -9,7 +9,24 @@ import (
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
|
|
||||||
var quto = regexp.MustCompile(`" *|& *|< *|> ?| *`)
|
var quto = regexp.MustCompile(`"*|&*|<*|>*| *|[*|]*| *`)
|
||||||
|
|
||||||
|
func SetQutos(reg string) {
|
||||||
|
quto = regexp.MustCompile(reg)
|
||||||
|
}
|
||||||
|
|
||||||
|
type SpecialSolveConf struct {
|
||||||
|
Num int
|
||||||
|
ChuckOvered bool
|
||||||
|
EscapeCharacter map[rune]SpecialSolve
|
||||||
|
Tags map[string]SpecialSolve
|
||||||
|
}
|
||||||
|
type SpecialSolve struct {
|
||||||
|
Num int
|
||||||
|
ChuckOvered bool
|
||||||
|
}
|
||||||
|
|
||||||
|
var selfCloseTags = html.GetSelfCloseTags()
|
||||||
|
|
||||||
func StripTags(content, allowTag string) string {
|
func StripTags(content, allowTag string) string {
|
||||||
content = strings.Trim(content, " \t\n\r\000\x0B")
|
content = strings.Trim(content, " \t\n\r\000\x0B")
|
||||||
|
@ -18,7 +35,7 @@ func StripTags(content, allowTag string) string {
|
||||||
return content
|
return content
|
||||||
}
|
}
|
||||||
|
|
||||||
func Html(content string, limit int, m map[string]int) (string, string) {
|
func Html(content string, limit int) (string, string) {
|
||||||
closeTag := ""
|
closeTag := ""
|
||||||
length := utf8.RuneCountInString(content) + 1
|
length := utf8.RuneCountInString(content) + 1
|
||||||
if length <= limit {
|
if length <= limit {
|
||||||
|
@ -39,19 +56,16 @@ func Html(content string, limit int, m map[string]int) (string, string) {
|
||||||
total := len(ru)
|
total := len(ru)
|
||||||
l, r := '<', '>'
|
l, r := '<', '>'
|
||||||
i := -1
|
i := -1
|
||||||
var tag []rune
|
|
||||||
for {
|
for {
|
||||||
i++
|
i++
|
||||||
for len(runeIndex) > 0 && i >= runeIndex[0][0] {
|
if end >= limit || i >= total {
|
||||||
ints := runeIndex[0]
|
break
|
||||||
if ints[0] <= i {
|
}
|
||||||
i = ints[1]
|
for len(runeIndex) > 0 && i == runeIndex[0][0] {
|
||||||
runeIndex = runeIndex[1:]
|
i = runeIndex[0][1]
|
||||||
end++
|
runeIndex = runeIndex[1:]
|
||||||
continue
|
end++
|
||||||
} else {
|
continue
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if end >= limit || i >= total {
|
if end >= limit || i >= total {
|
||||||
|
@ -62,19 +76,10 @@ func Html(content string, limit int, m map[string]int) (string, string) {
|
||||||
continue
|
continue
|
||||||
} else if ru[i] == r {
|
} else if ru[i] == r {
|
||||||
tagIn = false
|
tagIn = false
|
||||||
if len(m) > 0 {
|
|
||||||
tags := str.Join("<", strings.Split(string(tag), " ")[0], ">")
|
|
||||||
tag = tag[:0]
|
|
||||||
if n, ok := m[tags]; ok && n > 0 {
|
|
||||||
end += n
|
|
||||||
}
|
|
||||||
}
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if tagIn == false {
|
if tagIn == false && ru[i] != '\n' {
|
||||||
end++
|
end++
|
||||||
} else if len(m) > 0 {
|
|
||||||
tag = append(tag, ru[i])
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if i > total {
|
if i > total {
|
||||||
|
@ -84,3 +89,119 @@ func Html(content string, limit int, m map[string]int) (string, string) {
|
||||||
closeTag = html.CloseTag(content)
|
closeTag = html.CloseTag(content)
|
||||||
return content, closeTag
|
return content, closeTag
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CustomizeHtml(content string, limit int, m map[string]SpecialSolveConf) (string, string) {
|
||||||
|
closeTag := ""
|
||||||
|
length := utf8.RuneCountInString(content) + 1
|
||||||
|
if length <= limit {
|
||||||
|
return content, ""
|
||||||
|
}
|
||||||
|
index := quto.FindAllStringIndex(content, -1)
|
||||||
|
var runeIndex [][]int
|
||||||
|
if len(index) > 0 {
|
||||||
|
runeIndex = slice.Map(index, func(t []int) []int {
|
||||||
|
return slice.Map(t, func(i int) int {
|
||||||
|
return utf8.RuneCountInString(content[:i])
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
count := 0
|
||||||
|
runeContent := []rune(content)
|
||||||
|
tagIn := false
|
||||||
|
runeTotal := len(runeContent)
|
||||||
|
l, r := '<', '>'
|
||||||
|
i := -1
|
||||||
|
var currentTag, parentTag string
|
||||||
|
var allTags = []string{"<top>"}
|
||||||
|
var tag []rune
|
||||||
|
var tagLocal = 0
|
||||||
|
for {
|
||||||
|
i++
|
||||||
|
if count >= limit || i >= runeTotal {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
for len(runeIndex) > 0 && i == runeIndex[0][0] {
|
||||||
|
i = runeIndex[0][1]
|
||||||
|
runeIndex = runeIndex[1:]
|
||||||
|
count++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if count >= limit || i >= runeTotal {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if runeContent[i] == l {
|
||||||
|
tagLocal = i
|
||||||
|
tagIn = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if tagIn && runeContent[i] == r {
|
||||||
|
tagIn = false
|
||||||
|
tags := str.Join("<", string(tag), ">")
|
||||||
|
if strings.Contains(tags, " ") {
|
||||||
|
tags = str.Join("<", strings.Split(string(tag), " ")[0], ">")
|
||||||
|
}
|
||||||
|
currentTag = tags
|
||||||
|
rawTag := strings.ReplaceAll(strings.Trim(tags, "<>"), "/", "")
|
||||||
|
_, ok := selfCloseTags[rawTag]
|
||||||
|
if !ok {
|
||||||
|
if '/' == tags[1] {
|
||||||
|
parentTag = allTags[len(allTags)-2]
|
||||||
|
allTags = allTags[:len(allTags)-1]
|
||||||
|
} else {
|
||||||
|
parentTag = allTags[len(allTags)-1]
|
||||||
|
allTags = append(allTags, currentTag)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
parentTag = allTags[len(allTags)-1]
|
||||||
|
}
|
||||||
|
tag = tag[:0]
|
||||||
|
if len(m) > 0 {
|
||||||
|
nn, ok := m[parentTag]
|
||||||
|
if ok {
|
||||||
|
if n, ok := nn.Tags[tags]; ok {
|
||||||
|
if (count+n.Num) > limit && n.ChuckOvered {
|
||||||
|
i = tagLocal
|
||||||
|
break
|
||||||
|
}
|
||||||
|
count += n.Num
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if n, ok := m[tags]; ok {
|
||||||
|
if (count+n.Num) > limit && n.ChuckOvered {
|
||||||
|
i = tagLocal
|
||||||
|
break
|
||||||
|
}
|
||||||
|
count += n.Num
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if tagIn {
|
||||||
|
tag = append(tag, runeContent[i])
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
currentTags := allTags[len(allTags)-1]
|
||||||
|
mm, ok := m[currentTags]
|
||||||
|
if !ok {
|
||||||
|
count++
|
||||||
|
} else if len(mm.EscapeCharacter) > 0 {
|
||||||
|
if n, ok := mm.EscapeCharacter[runeContent[i]]; ok {
|
||||||
|
if (count+n.Num) > limit && n.ChuckOvered {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
count += n.Num
|
||||||
|
} else {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if i > runeTotal {
|
||||||
|
i = runeTotal
|
||||||
|
}
|
||||||
|
content = string(runeContent[:i])
|
||||||
|
closeTag = html.CloseTag(content)
|
||||||
|
return content, closeTag
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user