rss/bbclearn/bbc.go
2023-04-14 02:48:44 +08:00

100 lines
2.3 KiB
Go

package bbclearn
import (
"fmt"
"github.com/PuerkitoBio/goquery"
strings2 "github.com/fthvgb1/wp-go/helper/strings"
"github.com/fthvgb1/wp-go/rss2"
"net/http"
"strings"
"time"
)
func LearnParse(s string, recentDay int) string {
document, err := goquery.NewDocumentFromReader(strings.NewReader(s))
if err != nil {
return ""
}
var item []rss2.Item
item = append(item, full(document, recentDay))
item = append(item, items(document, recentDay)...)
rss := rss2.Rss2{
Title: "BBC 英语教学",
Link: "https://www.bbc.co.uk/learningenglish/chinese/",
LastBuildDate: time.Now().Format(time.RFC1123Z),
Items: item,
}
return rss.GetXML()
}
func dateFilter(u string, recentDay int) (r bool) {
uu := strings.Split(u, "-")
if len(uu) < 2 {
return
}
date, err := time.Parse("060102", uu[len(uu)-1])
if err != nil {
return
}
t := time.Now()
if t.Year() != date.Year() || t.Month() != date.Month() {
return
}
fmt.Println(time.Now().Day()-recentDay, date.Day())
if t.Day()-recentDay > date.Day() {
return
}
r = true
return
}
func fetch(u string) (r rss2.Item) {
res, err := http.Get(u)
if err != nil {
return
}
dom, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return
}
s := dom.Find("#bbcle-content .widget-container-left")
content, err := goquery.OuterHtml(s.Find(".widget-list,.widget-pagelink").Remove().End())
if err != nil {
return
}
r.Title = s.Find(`div[data-widget-index="3"] h3`).Text()
r.PubDate = strings.TrimSpace(s.Find(".widget-bbcle-featuresubheader").Text())
r.PubDate = strings2.Replace(r.PubDate, map[string]string{
"\n": "",
})
r.PubDate = strings2.CutSpecialDuplicate(r.PubDate, " ")
r.Guid = u
r.Description = content
return
}
func full(doc *goquery.Document, recentDay int) (r rss2.Item) {
a := doc.Find("#bbcle-content .widget-container-full a")
u, ok := a.Attr("href")
if !ok {
return
}
if !dateFilter(u, recentDay) {
return
}
r = fetch(u)
return
}
func items(doc *goquery.Document, recentDay int) (r []rss2.Item) {
doc.Find("#bbcle-content > div > div.widget-container.widget-container-full > div.widget.widget-image.widget-image-two_column > div a").Each(func(i int, s *goquery.Selection) {
u, ok := s.Attr("href")
if !ok || !dateFilter(u, recentDay) {
return
}
r = append(r, fetch(u))
})
return
}