101 lines
2.1 KiB
Go
101 lines
2.1 KiB
Go
package bbclearn
|
|
|
|
import (
|
|
"github.com/PuerkitoBio/goquery"
|
|
"github.com/fthvgb1/wp-go/rss2"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
func LearnParse(s string, recentDay int) string {
|
|
document, err := goquery.NewDocumentFromReader(strings.NewReader(s))
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
var item []rss2.Item
|
|
item = append(item, full(document, recentDay))
|
|
item = append(item, items(document, recentDay)...)
|
|
if len(item) < 1 {
|
|
return ""
|
|
}
|
|
rss := rss2.Rss2{
|
|
Title: "BBC 英语教学",
|
|
Link: "https://www.bbc.co.uk/learningenglish/chinese/",
|
|
LastBuildDate: time.Now().Format(time.RFC1123Z),
|
|
Items: item,
|
|
}
|
|
return rss.GetXML()
|
|
}
|
|
|
|
func parseTime(u string) (date time.Time, err error) {
|
|
uu := strings.Split(u, "-")
|
|
if len(uu) < 2 {
|
|
return
|
|
}
|
|
date, err = time.Parse("060102", uu[len(uu)-1])
|
|
if err != nil {
|
|
return
|
|
}
|
|
return
|
|
}
|
|
|
|
func dateFilter(u string, recentDay int) (r bool) {
|
|
date, err := parseTime(u)
|
|
if err != nil {
|
|
return
|
|
}
|
|
t := time.Now()
|
|
if t.Sub(date).Hours()/24-float64(recentDay) > 0 {
|
|
return
|
|
}
|
|
r = true
|
|
return
|
|
}
|
|
|
|
func fetch(u string) (r rss2.Item) {
|
|
res, err := http.Get(u)
|
|
if err != nil {
|
|
return
|
|
}
|
|
dom, err := goquery.NewDocumentFromReader(res.Body)
|
|
if err != nil {
|
|
return
|
|
}
|
|
s := dom.Find("#bbcle-content .widget-container-left")
|
|
content, err := goquery.OuterHtml(s.Find(".widget-list,.widget-pagelink").Remove().End())
|
|
if err != nil {
|
|
return
|
|
}
|
|
r.Title = s.Find(`div[data-widget-index="3"] h3`).Text()
|
|
date, _ := parseTime(u)
|
|
r.PubDate = date.Format(time.RFC1123Z)
|
|
r.Guid = u
|
|
r.Description = content
|
|
return
|
|
}
|
|
|
|
func full(doc *goquery.Document, recentDay int) (r rss2.Item) {
|
|
a := doc.Find("#bbcle-content .widget-container-full a")
|
|
u, ok := a.Attr("href")
|
|
if !ok {
|
|
return
|
|
}
|
|
if !dateFilter(u, recentDay) {
|
|
return
|
|
}
|
|
r = fetch(u)
|
|
return
|
|
}
|
|
|
|
func items(doc *goquery.Document, recentDay int) (r []rss2.Item) {
|
|
doc.Find("#bbcle-content > div > div.widget-container.widget-container-full > div.widget.widget-image.widget-image-two_column > div a").Each(func(i int, s *goquery.Selection) {
|
|
u, ok := s.Attr("href")
|
|
if !ok || !dateFilter(u, recentDay) {
|
|
return
|
|
}
|
|
r = append(r, fetch(u))
|
|
})
|
|
return
|
|
}
|