package bbclearn import ( "github.com/PuerkitoBio/goquery" "github.com/fthvgb1/wp-go/rss2" "io" "log" "net/http" "os" "path/filepath" "rss/mail" "strings" "time" ) func LearnParse(s string, recentDay int) string { document, err := goquery.NewDocumentFromReader(strings.NewReader(s)) if err != nil { return "" } var item []rss2.Item item = append(item, full(document, recentDay)) item = append(item, items(document, recentDay)...) if len(item) < 1 { return "" } rss := rss2.Rss2{ Title: "BBC 英语教学", Link: "https://www.bbc.co.uk/learningenglish/chinese/", LastBuildDate: time.Now().Format(time.RFC1123Z), Items: item, } return rss.GetXML() } func parseTime(u string) (date time.Time, err error) { uu := strings.Split(u, "-") if len(uu) < 2 { return } date, err = time.Parse("060102", uu[len(uu)-1]) if err != nil { return } return } func dateFilter(u string, recentDay int) (r bool) { date, err := parseTime(u) if err != nil { return } t := time.Now() if t.Sub(date).Hours()/24-float64(recentDay) > 0 { return } r = true return } func fetch(u string) (r rss2.Item) { res, err := http.Get(u) if err != nil { return } dom, err := goquery.NewDocumentFromReader(res.Body) if err != nil { return } s := dom.Find("#bbcle-content .widget-container-left") content, err := goquery.OuterHtml(s.Find(".widget-list,.widget-pagelink").Remove().End()) if err != nil { return } r.Title = s.Find(`div[data-widget-index="3"] h3`).Text() date, _ := parseTime(u) r.PubDate = date.Format(time.RFC1123Z) r.Guid = u r.Description = content go downAndSendMail(dom, r.Title) return } func downAndSendMail(doc *goquery.Document, title string) { type m struct { tit string content string f []string } mm := m{} var err error mm.tit = title mm.content, err = doc.Find(".widget-richtext .text").Html() if err != nil { return } for _, ss := range []string{".bbcle-download-extension-pdf", ".bbcle-download-extension-mp3"} { uu, ok := doc.Find(ss).Attr("href") if ok { response, err := http.Get(uu) if err != nil { continue } name := filepath.Base(uu) file, err := os.OpenFile(name, os.O_CREATE|os.O_WRONLY, 0755) if err != nil { continue } _, err = io.Copy(file, response.Body) if err != nil { continue } mm.f = append(mm.f, name) } } if len(mm.f) < 1 { return } mail.SendMail(mm.tit, mm.content, mm.f...) for _, s := range mm.f { err := os.Remove(s) if err != nil { log.Printf("delete file %s err:%v\n", s, err) } } } func full(doc *goquery.Document, recentDay int) (r rss2.Item) { a := doc.Find("#bbcle-content .widget-container-full a") u, ok := a.Attr("href") if !ok { return } if !dateFilter(u, recentDay) { return } r = fetch(u) return } func items(doc *goquery.Document, recentDay int) (r []rss2.Item) { doc.Find("#bbcle-content > div > div.widget-container.widget-container-full > div.widget.widget-image.widget-image-two_column > div a").Each(func(i int, s *goquery.Selection) { u, ok := s.Attr("href") if !ok || !dateFilter(u, recentDay) { return } r = append(r, fetch(u)) }) return }