摘要调整

This commit is contained in:
xing 2023-04-18 21:05:37 +08:00
parent 9e71de8e97
commit 8da369b166
5 changed files with 104 additions and 82 deletions

View File

@ -60,9 +60,10 @@ cacheTime:
commentsCacheTime: 24h
# 随机sleep时间
sleepTime: [ 1s,3s ]
# 摘要字数
# 摘要字数 >0截取指定字数 =0输出出空字符 <0为不截取,原样输出
digestWordCount: 300
# 摘要允许的标签 默认为<a><b><blockquote><br><cite><code><dd><del><div><dl><dt><em><h1><h2><h3><h4><h5><h6><i><img><li><ol><p><pre><span><strong><ul>
digestTag: "<a><b><blockquote><br><cite><code><dd><del><div><dl><dt><em><h1><h2><h3><h4><h5><h6><i><img><li><ol><p><pre><span><strong><ul>"
# 到达指定并发请求数时随机sleep
maxRequestSleepNum: 100
# 全局最大请求数超过直接403

View File

@ -7,6 +7,7 @@ import (
str "github.com/fthvgb1/wp-go/helper/strings"
"github.com/fthvgb1/wp-go/internal/pkg/logs"
"github.com/fthvgb1/wp-go/internal/pkg/models"
"github.com/fthvgb1/wp-go/internal/plugins"
"github.com/fthvgb1/wp-go/internal/plugins/wpposts"
"github.com/fthvgb1/wp-go/internal/wpconfig"
"github.com/fthvgb1/wp-go/plugin/digest"
@ -63,7 +64,7 @@ func feed(arg ...any) (xml []string, err error) {
wpposts.PasswordProjectTitle(&t)
wpposts.PasswdProjectContent(&t)
} else {
desc = digest.Raw(t.PostContent, 55, fmt.Sprintf("/p/%d", t.Id))
desc = plugins.Digests(t.PostContent, t.Id, 55, nil)
}
l := ""
if t.CommentStatus == "open" && t.CommentCount > 0 {
@ -172,8 +173,7 @@ func commentsFeed(args ...any) (r []string, err error) {
wpposts.PasswdProjectContent(&post)
content = post.PostContent
} else {
desc = digest.ClearHtml(t.CommentContent)
content = desc
content = digest.StripTags(t.CommentContent, "")
}
return rss2.Item{
Title: fmt.Sprintf("%s对《%s》的评论", t.CommentAuthor, post.PostTitle),

View File

@ -20,6 +20,7 @@ type Config struct {
Mail Mail `yaml:"mail" json:"mail"`
CacheTime CacheTime `yaml:"cacheTime" json:"cacheTime"`
DigestWordCount int `yaml:"digestWordCount" json:"digestWordCount,omitempty"`
DigestAllowTag string `yaml:"digestAllowTag" json:"digestAllowTag"`
MaxRequestSleepNum int64 `yaml:"maxRequestSleepNum" json:"maxRequestSleepNum,omitempty"`
MaxRequestNum int64 `yaml:"maxRequestNum" json:"maxRequestNum,omitempty"`
SingleIpSearchNum int64 `yaml:"singleIpSearchNum" json:"singleIpSearchNum,omitempty"`

View File

@ -4,34 +4,72 @@ import (
"context"
"fmt"
"github.com/fthvgb1/wp-go/cache"
"github.com/fthvgb1/wp-go/helper"
"github.com/fthvgb1/wp-go/internal/cmd/cachemanager"
"github.com/fthvgb1/wp-go/internal/pkg/config"
"github.com/fthvgb1/wp-go/internal/pkg/models"
"github.com/fthvgb1/wp-go/plugin/digest"
"regexp"
"strings"
"time"
)
var digestCache *cache.MapCache[uint64, string]
var more = regexp.MustCompile("<!--more(.*?)?-->")
var removeWpBlock = regexp.MustCompile("<!-- /?wp:.*-->")
func InitDigestCache() {
digestCache = cachemanager.MapCacheBy[uint64](digestRaw, config.GetConfig().CacheTime.DigestCacheTime)
}
func RemoveWpBlock(s string) string {
return removeWpBlock.ReplaceAllString(s, "")
}
func digestRaw(arg ...any) (string, error) {
str := arg[0].(string)
id := arg[1].(uint64)
limit := arg[2].(int)
ctx := arg[0].(context.Context)
s := arg[1].(string)
id := arg[2].(uint64)
limit := arg[3].(int)
if limit < 0 {
return str, nil
return s, nil
} else if limit == 0 {
return "", nil
}
return digest.Raw(str, limit, fmt.Sprintf("/p/%d", id)), nil
s = more.ReplaceAllString(s, "")
fn := helper.GetContextVal(ctx, "postMoreFn", PostsMore)
return Digests(s, id, limit, fn), nil
}
func Digests(content string, id uint64, limit int, fn func(id uint64, content, closeTag string) string) string {
closeTag := ""
content = RemoveWpBlock(content)
tag := config.GetConfig().DigestAllowTag
if tag == "" {
tag = "<a><b><blockquote><br><cite><code><dd><del><div><dl><dt><em><h1><h2><h3><h4><h5><h6><i><img><li><ol><p><pre><span><strong><ul>"
}
content = digest.StripTags(content, tag)
content, closeTag = digest.Html(content, limit)
if fn == nil {
return PostsMore(id, content, closeTag)
}
return fn(id, content, closeTag)
}
func PostsMore(id uint64, content, closeTag string) string {
tmp := `%s......%s<p class="read-more"><a href="/p/%d">继续阅读</a></p>`
if strings.Contains(closeTag, "pre") || strings.Contains(closeTag, "code") {
tmp = `%s%s......<p class="read-more"><a href="/p/%d">继续阅读</a></p>`
}
content = fmt.Sprintf(tmp, content, closeTag, id)
return content
}
func Digest(ctx context.Context, post *models.Posts, limit int) {
content, _ := digestCache.GetCache(ctx, post.Id, time.Second, post.PostContent, post.Id, limit)
content, _ := digestCache.GetCache(ctx, post.Id, time.Second, ctx, post.PostContent, post.Id, limit)
post.PostContent = content
}

View File

@ -1,7 +1,6 @@
package digest
import (
"fmt"
"github.com/fthvgb1/wp-go/helper/html"
"github.com/fthvgb1/wp-go/helper/slice"
"regexp"
@ -9,79 +8,62 @@ import (
"unicode/utf8"
)
var removeWpBlock = regexp.MustCompile("<!-- /?wp:.*-->")
var more = regexp.MustCompile("<!--more(.*?)?-->")
var quto = regexp.MustCompile(`&quot; *|&amp; *|&lt; *|&gt; ?|&nbsp; *`)
func ClearHtml(str string) string {
content := removeWpBlock.ReplaceAllString(str, "")
func StripTags(content, allowTag string) string {
content = strings.Trim(content, " \t\n\r\000\x0B")
content = strings.Replace(content, "]]>", "]]&gt;", -1)
content = html.StripTagsX(content, "<a><b><blockquote><br><cite><code><dd><del><div><dl><dt><em><h1><h2><h3><h4><h5><h6><i><img><li><ol><p><pre><span><strong><ul>")
return str
}
func Raw(str string, limit int, u string) string {
if r := more.FindString(str); r != "" {
m := strings.Split(str, r)
str = m[0]
return ""
}
content := removeWpBlock.ReplaceAllString(str, "")
content = strings.Trim(content, " \t\n\r\000\x0B")
content = strings.Replace(content, "]]>", "]]&gt;", -1)
content = html.StripTagsX(content, "<a><b><blockquote><br><cite><code><dd><del><div><dl><dt><em><h1><h2><h3><h4><h5><h6><i><img><li><ol><p><pre><span><strong><ul>")
length := utf8.RuneCountInString(content) + 1
if length > limit {
index := quto.FindAllStringIndex(content, -1)
end := 0
ru := []rune(content)
tagIn := false
total := len(ru)
l, r := '<', '>'
i := -1
for {
i++
for len(index) > 0 {
ints := slice.Map(index[0], func(t int) int {
return utf8.RuneCountInString(content[:t])
})
if ints[0] <= i {
i = i + i - ints[0] + ints[1] - ints[0]
index = index[1:]
end++
continue
} else {
break
}
}
if end >= limit || i >= total {
break
}
if ru[i] == l {
tagIn = true
continue
} else if ru[i] == r {
tagIn = false
continue
}
if tagIn == false {
end++
}
}
if i > total {
i = total
}
content = string(ru[:i])
closeTag := html.CloseTag(content)
tmp := `%s......%s<p class="read-more"><a href="%s">继续阅读</a></p>`
if strings.Contains(closeTag, "pre") || strings.Contains(closeTag, "code") {
tmp = `%s%s......<p class="read-more"><a href="%s">继续阅读</a></p>`
}
content = fmt.Sprintf(tmp, content, closeTag, u)
}
content = html.StripTagsX(content, allowTag)
return content
}
func Html(content string, limit int) (string, string) {
closeTag := ""
length := utf8.RuneCountInString(content) + 1
if length <= limit {
return content, ""
}
index := quto.FindAllStringIndex(content, -1)
end := 0
ru := []rune(content)
tagIn := false
total := len(ru)
l, r := '<', '>'
i := -1
for {
i++
for len(index) > 0 {
ints := slice.Map(index[0], func(t int) int {
return utf8.RuneCountInString(content[:t])
})
if ints[0] <= i {
i = i + i - ints[0] + ints[1] - ints[0]
index = index[1:]
end++
continue
} else {
break
}
}
if end >= limit || i >= total {
break
}
if ru[i] == l {
tagIn = true
continue
} else if ru[i] == r {
tagIn = false
continue
}
if tagIn == false {
end++
}
}
if i > total {
i = total
}
content = string(ru[:i])
closeTag = html.CloseTag(content)
return content, closeTag
}