2023-04-13 18:03:56 +00:00
package bbclearn
import (
"github.com/PuerkitoBio/goquery"
"github.com/fthvgb1/wp-go/rss2"
2023-07-09 15:02:46 +00:00
"github.com/fthvgb1/wp-go/safety"
2023-07-09 14:57:47 +00:00
"io"
"log"
2023-04-13 18:03:56 +00:00
"net/http"
2023-07-09 14:57:47 +00:00
"os"
"path/filepath"
"rss/mail"
2023-04-13 18:03:56 +00:00
"strings"
"time"
)
func LearnParse ( s string , recentDay int ) string {
document , err := goquery . NewDocumentFromReader ( strings . NewReader ( s ) )
if err != nil {
return ""
}
var item [ ] rss2 . Item
item = append ( item , full ( document , recentDay ) )
item = append ( item , items ( document , recentDay ) ... )
2023-04-14 14:46:06 +00:00
if len ( item ) < 1 {
return ""
}
2023-04-13 18:03:56 +00:00
rss := rss2 . Rss2 {
Title : "BBC 英语教学" ,
2023-04-13 18:48:44 +00:00
Link : "https://www.bbc.co.uk/learningenglish/chinese/" ,
2023-04-13 18:03:56 +00:00
LastBuildDate : time . Now ( ) . Format ( time . RFC1123Z ) ,
Items : item ,
}
return rss . GetXML ( )
}
2023-04-14 15:26:53 +00:00
func parseTime ( u string ) ( date time . Time , err error ) {
2023-04-13 18:03:56 +00:00
uu := strings . Split ( u , "-" )
if len ( uu ) < 2 {
return
}
2023-04-14 15:26:53 +00:00
date , err = time . Parse ( "060102" , uu [ len ( uu ) - 1 ] )
if err != nil {
return
}
return
}
func dateFilter ( u string , recentDay int ) ( r bool ) {
date , err := parseTime ( u )
2023-04-13 18:03:56 +00:00
if err != nil {
return
}
t := time . Now ( )
2023-05-02 16:31:40 +00:00
if t . Sub ( date ) . Hours ( ) / 24 - float64 ( recentDay ) > 0 {
2023-04-13 18:03:56 +00:00
return
}
r = true
return
}
func fetch ( u string ) ( r rss2 . Item ) {
res , err := http . Get ( u )
if err != nil {
return
}
dom , err := goquery . NewDocumentFromReader ( res . Body )
if err != nil {
return
}
s := dom . Find ( "#bbcle-content .widget-container-left" )
content , err := goquery . OuterHtml ( s . Find ( ".widget-list,.widget-pagelink" ) . Remove ( ) . End ( ) )
if err != nil {
return
}
r . Title = s . Find ( ` div[data-widget-index="3"] h3 ` ) . Text ( )
2023-04-14 15:26:53 +00:00
date , _ := parseTime ( u )
r . PubDate = date . Format ( time . RFC1123Z )
2023-04-13 18:03:56 +00:00
r . Guid = u
r . Description = content
2023-07-09 15:02:46 +00:00
if _ , ok := hadSend . Load ( r . Title ) ; ! ok {
go downAndSendMail ( dom , r . Title )
}
2023-04-13 18:03:56 +00:00
return
}
2023-07-09 15:02:46 +00:00
var hadSend = safety . NewMap [ string , struct { } ] ( )
2023-07-09 14:57:47 +00:00
func downAndSendMail ( doc * goquery . Document , title string ) {
type m struct {
tit string
content string
f [ ] string
}
mm := m { }
var err error
mm . tit = title
mm . content , err = doc . Find ( ".widget-richtext .text" ) . Html ( )
if err != nil {
return
}
for _ , ss := range [ ] string { ".bbcle-download-extension-pdf" , ".bbcle-download-extension-mp3" } {
uu , ok := doc . Find ( ss ) . Attr ( "href" )
if ok {
response , err := http . Get ( uu )
if err != nil {
continue
}
name := filepath . Base ( uu )
file , err := os . OpenFile ( name , os . O_CREATE | os . O_WRONLY , 0755 )
if err != nil {
continue
}
_ , err = io . Copy ( file , response . Body )
if err != nil {
continue
}
mm . f = append ( mm . f , name )
}
}
if len ( mm . f ) < 1 {
return
}
2023-07-11 04:12:03 +00:00
err = mail . SendMail ( mm . tit , mm . content , mm . f ... )
if err != nil {
log . Println ( "err" , err )
return
}
2023-07-09 15:02:46 +00:00
hadSend . Store ( mm . tit , struct { } { } )
2023-07-09 14:57:47 +00:00
for _ , s := range mm . f {
err := os . Remove ( s )
if err != nil {
log . Printf ( "delete file %s err:%v\n" , s , err )
}
}
}
2023-04-13 18:03:56 +00:00
func full ( doc * goquery . Document , recentDay int ) ( r rss2 . Item ) {
a := doc . Find ( "#bbcle-content .widget-container-full a" )
u , ok := a . Attr ( "href" )
if ! ok {
return
}
if ! dateFilter ( u , recentDay ) {
return
}
r = fetch ( u )
return
}
func items ( doc * goquery . Document , recentDay int ) ( r [ ] rss2 . Item ) {
doc . Find ( "#bbcle-content > div > div.widget-container.widget-container-full > div.widget.widget-image.widget-image-two_column > div a" ) . Each ( func ( i int , s * goquery . Selection ) {
u , ok := s . Attr ( "href" )
if ! ok || ! dateFilter ( u , recentDay ) {
return
}
r = append ( r , fetch ( u ) )
} )
return
}