2023-04-13 18:03:56 +00:00
package bbclearn
import (
"fmt"
"github.com/PuerkitoBio/goquery"
strings2 "github.com/fthvgb1/wp-go/helper/strings"
"github.com/fthvgb1/wp-go/rss2"
"net/http"
"strings"
"time"
)
func LearnParse ( s string , recentDay int ) string {
document , err := goquery . NewDocumentFromReader ( strings . NewReader ( s ) )
if err != nil {
return ""
}
var item [ ] rss2 . Item
item = append ( item , full ( document , recentDay ) )
item = append ( item , items ( document , recentDay ) ... )
2023-04-14 14:46:06 +00:00
if len ( item ) < 1 {
return ""
}
2023-04-13 18:03:56 +00:00
rss := rss2 . Rss2 {
Title : "BBC 英语教学" ,
2023-04-13 18:48:44 +00:00
Link : "https://www.bbc.co.uk/learningenglish/chinese/" ,
2023-04-13 18:03:56 +00:00
LastBuildDate : time . Now ( ) . Format ( time . RFC1123Z ) ,
Items : item ,
}
return rss . GetXML ( )
}
func dateFilter ( u string , recentDay int ) ( r bool ) {
uu := strings . Split ( u , "-" )
if len ( uu ) < 2 {
return
}
date , err := time . Parse ( "060102" , uu [ len ( uu ) - 1 ] )
if err != nil {
return
}
t := time . Now ( )
if t . Year ( ) != date . Year ( ) || t . Month ( ) != date . Month ( ) {
return
}
fmt . Println ( time . Now ( ) . Day ( ) - recentDay , date . Day ( ) )
if t . Day ( ) - recentDay > date . Day ( ) {
return
}
r = true
return
}
func fetch ( u string ) ( r rss2 . Item ) {
res , err := http . Get ( u )
if err != nil {
return
}
dom , err := goquery . NewDocumentFromReader ( res . Body )
if err != nil {
return
}
s := dom . Find ( "#bbcle-content .widget-container-left" )
content , err := goquery . OuterHtml ( s . Find ( ".widget-list,.widget-pagelink" ) . Remove ( ) . End ( ) )
if err != nil {
return
}
r . Title = s . Find ( ` div[data-widget-index="3"] h3 ` ) . Text ( )
r . PubDate = strings . TrimSpace ( s . Find ( ".widget-bbcle-featuresubheader" ) . Text ( ) )
r . PubDate = strings2 . Replace ( r . PubDate , map [ string ] string {
"\n" : "" ,
} )
r . PubDate = strings2 . CutSpecialDuplicate ( r . PubDate , " " )
r . Guid = u
r . Description = content
return
}
func full ( doc * goquery . Document , recentDay int ) ( r rss2 . Item ) {
a := doc . Find ( "#bbcle-content .widget-container-full a" )
u , ok := a . Attr ( "href" )
if ! ok {
return
}
if ! dateFilter ( u , recentDay ) {
return
}
r = fetch ( u )
return
}
func items ( doc * goquery . Document , recentDay int ) ( r [ ] rss2 . Item ) {
doc . Find ( "#bbcle-content > div > div.widget-container.widget-container-full > div.widget.widget-image.widget-image-two_column > div a" ) . Each ( func ( i int , s * goquery . Selection ) {
u , ok := s . Attr ( "href" )
if ! ok || ! dateFilter ( u , recentDay ) {
return
}
r = append ( r , fetch ( u ) )
} )
return
}