From ddd4bc86f3469c18882e292577c6863091450a42 Mon Sep 17 00:00:00 2001 From: xing Date: Fri, 14 Apr 2023 02:03:56 +0800 Subject: [PATCH] init --- .gitignore | 4 +- Dockerfile | 3 + bbclearn/a.html | 12542 +++++++++++++++++++++++++++++++++++++++++ bbclearn/b.html | 7471 ++++++++++++++++++++++++ bbclearn/bbc.go | 99 + bbclearn/bbc_test.go | 70 + go.mod | 10 +- main.go | 94 + 8 files changed, 20291 insertions(+), 2 deletions(-) create mode 100644 Dockerfile create mode 100644 bbclearn/a.html create mode 100644 bbclearn/b.html create mode 100644 bbclearn/bbc.go create mode 100644 bbclearn/bbc_test.go create mode 100644 main.go diff --git a/.gitignore b/.gitignore index 7ea6692..604e828 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .idea .gitignore -fetchdapenti.iml \ No newline at end of file +rss.iml +rss +fetchdapenti \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6dda421 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,3 @@ +FROM alpine:latest +COPY rss /opt/rss +CMD ["/opt/rss"] \ No newline at end of file diff --git a/bbclearn/a.html b/bbclearn/a.html new file mode 100644 index 0000000..17f8491 --- /dev/null +++ b/bbclearn/a.html @@ -0,0 +1,12542 @@ + + + + BBC Learning English - 英语大破解 / Gigantic dinosaur skeleton on show in London + 巨型恐龙骨架模型在伦敦展出 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ +
+
+
+ + + + +
+
+
+

+ 英语大破解

+

中级

+

Gigantic dinosaur skeleton on show in London + 巨型恐龙骨架模型在伦敦展出

+
+
+
+

+ Episode 230413 + + / 13 Apr 2023 +

+
+
+
+
+
+
+ +
+
+
+
+

本集内容

+

Gigantic dinosaur skeleton on show in London 巨型恐龙骨架模型在伦敦展出

+

文字稿

+

Piece by piece, bone by colossal bone, a creature from one hundred million years ago, begins + to emerge.

+

一块块化石模型、一根根巨骨被组装起来,这只一亿年前的恐龙渐渐成形。

+

This is Patagotitan, one of the largest dinosaurs ever to walk the Earth.

+

这是巴塔哥巨龙,是地球上体型最大的恐龙之一。

+

Rebecca Morelle, BBC reporter
"Assembling this dinosaur is like putting together a giant + 3D jigsaw puzzle. There are more than 500 bones and fixings, but no instruction manual. This + beast measures 37 metres from the tip of its nose all the way down to its tail. And it just + about squeezes into this room with a few twists and turns along the way."

+

丽贝卡·莫雷尔       BBC通讯员
“组装这个恐龙骨架模型就像在拼装一个巨大的三维拼图,整个恐龙骨架模型有500多块骨头和紧固件,但不像拼图,这个恐龙骨架模型没有配说明书。这个大家伙从鼻尖到尾巴有37米长,骨架模型刚好塞进展厅,但也是几经挪动才摆好。” +

+

The Titanosaur is a cast, an exact replica of the original fossilised bones.

+

这个泰坦龙骨架是一个模型,是原骨架化石的精确复制品。

+

And getting a creature like this into a 140-year-old building has been a challenge.

+

将这样的恐龙骨架模型搬进一座有140年历史的建筑着实是一项挑战。

+

Sinéad Marron, Exhibition Manager, Natural History Museum
"We've had to take some + of the doors off in order to get the crates in and different parts of the dinosaur in. Some + of the bones and the cast itself are quite heavy. So, we've had to reinforce our floors. + That sense of awe at standing under one of the largest animals to have walked on land and + trying to imagine it as a living, breathing creature is amazing."

+

希妮德·马伦       伦敦自然历史博物馆展览经理
“我们不得不拆掉博物馆的几扇门,才把货箱和恐龙骨架模型的不同部位搬进去。有一些骨头模型本身就很重。所以我们还必须加固地板。站在陆地上行走过的最大的动物之一下方、把它试想成一个活生生的动物,这种敬畏之感令人惊叹。” +

+

Now, the exhibition's complete, the dinosaur's ready for its moment in the spotlight.

+

现在,展览已准备就绪,这只 “恐龙” 就等着在万众瞩目下登场了。

+

Professor Paul Barrett, Palaeontologist, Natural History Museum
"So, one of the first + things you notice is a huge toothy grin with these pencil-like teeth. They're constantly on + the move. [They] would have been just feeding machines, constantly using those heads to + stuff more and more leaves and twigs and so on into its body just in order to keep it moving + around."

+

保罗·巴雷特教授       伦敦自然历史博物馆古生物学家
“你最先注意到的是这种恐龙的笑脸,满嘴都是像铅笔一样的细长牙齿。它们一直在移动,而且非常能吃,不停地张开大口,吞下树叶和树枝等食物,就是为了能有力气继续到处移动。” +

+

Patagotitan still holds some mysteries.

+

关于巴塔哥巨龙仍存在一些谜团。

+

No one knows why these creatures grew so big – or how they came to die out. But their + size and their success for millions of years puts our own existence into sharp + perspective.

+

+ 没人知道这种动物为什么能长这么大,也没人知道导致它们灭绝的原因。但巴塔哥巨龙的庞大体型和它们得以繁衍生息数百万年的事实让我们对人类自己的存在有了更深刻的认识。

+
+
+

最新 + 英语大破解

+ + +
+ +
+
+ +
+
+
+
+ +
+ + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/bbclearn/b.html b/bbclearn/b.html new file mode 100644 index 0000000..d780717 --- /dev/null +++ b/bbclearn/b.html @@ -0,0 +1,7471 @@ + + + + BBC Learning English - China Home Page + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ +
+
+
+ + + + + + +
+ + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/bbclearn/bbc.go b/bbclearn/bbc.go new file mode 100644 index 0000000..66134cc --- /dev/null +++ b/bbclearn/bbc.go @@ -0,0 +1,99 @@ +package bbclearn + +import ( + "fmt" + "github.com/PuerkitoBio/goquery" + strings2 "github.com/fthvgb1/wp-go/helper/strings" + "github.com/fthvgb1/wp-go/rss2" + "net/http" + "strings" + "time" +) + +func LearnParse(s string, recentDay int) string { + document, err := goquery.NewDocumentFromReader(strings.NewReader(s)) + if err != nil { + return "" + } + var item []rss2.Item + item = append(item, full(document, recentDay)) + item = append(item, items(document, recentDay)...) + rss := rss2.Rss2{ + Title: "BBC 英语教学", + Link: "http://www.bbc.co.uk/learningenglish/chinese/", + LastBuildDate: time.Now().Format(time.RFC1123Z), + Items: item, + } + return rss.GetXML() +} + +func dateFilter(u string, recentDay int) (r bool) { + uu := strings.Split(u, "-") + if len(uu) < 2 { + return + } + date, err := time.Parse("060102", uu[len(uu)-1]) + if err != nil { + return + } + t := time.Now() + if t.Year() != date.Year() || t.Month() != date.Month() { + return + } + fmt.Println(time.Now().Day()-recentDay, date.Day()) + if t.Day()-recentDay > date.Day() { + return + } + r = true + return +} + +func fetch(u string) (r rss2.Item) { + res, err := http.Get(u) + if err != nil { + return + } + dom, err := goquery.NewDocumentFromReader(res.Body) + if err != nil { + return + } + s := dom.Find("#bbcle-content .widget-container-left") + content, err := goquery.OuterHtml(s.Find(".widget-list,.widget-pagelink").Remove().End()) + if err != nil { + return + } + r.Title = s.Find(`div[data-widget-index="3"] h3`).Text() + + r.PubDate = strings.TrimSpace(s.Find(".widget-bbcle-featuresubheader").Text()) + r.PubDate = strings2.Replace(r.PubDate, map[string]string{ + "\n": "", + }) + r.PubDate = strings2.CutSpecialDuplicate(r.PubDate, " ") + r.Guid = u + r.Description = content + return +} + +func full(doc *goquery.Document, recentDay int) (r rss2.Item) { + a := doc.Find("#bbcle-content .widget-container-full a") + u, ok := a.Attr("href") + if !ok { + return + } + if !dateFilter(u, recentDay) { + return + } + r = fetch(u) + return +} + +func items(doc *goquery.Document, recentDay int) (r []rss2.Item) { + doc.Find("#bbcle-content > div > div.widget-container.widget-container-full > div.widget.widget-image.widget-image-two_column > div a").Each(func(i int, s *goquery.Selection) { + u, ok := s.Attr("href") + if !ok || !dateFilter(u, recentDay) { + return + } + r = append(r, fetch(u)) + }) + return +} diff --git a/bbclearn/bbc_test.go b/bbclearn/bbc_test.go new file mode 100644 index 0000000..b87e341 --- /dev/null +++ b/bbclearn/bbc_test.go @@ -0,0 +1,70 @@ +package bbclearn + +import ( + "github.com/PuerkitoBio/goquery" + "github.com/fthvgb1/wp-go/rss2" + "os" + "reflect" + "strings" + "testing" +) + +func Test_full(t *testing.T) { + type args struct { + doc *goquery.Document + recentDay int + } + tests := []struct { + name string + args args + want rss2.Item + }{ + { + name: "t1", + args: args{doc: func() *goquery.Document { + f, _ := os.ReadFile("b.html") + d, _ := goquery.NewDocumentFromReader(strings.NewReader(string(f))) + return d + }(), recentDay: 1}, + //want: "", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := full(tt.args.doc, tt.args.recentDay); got != tt.want { + t.Errorf("full() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_items(t *testing.T) { + type args struct { + doc *goquery.Document + recentDay int + } + tests := []struct { + name string + args args + wantR rss2.Item + }{ + { + name: "t1", + args: args{ + doc: func() *goquery.Document { + f, _ := os.ReadFile("b.html") + d, _ := goquery.NewDocumentFromReader(strings.NewReader(string(f))) + return d + }(), + recentDay: 1, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if gotR := items(tt.args.doc, tt.args.recentDay); !reflect.DeepEqual(gotR, tt.wantR) { + t.Errorf("items() = %v, want %v", gotR, tt.wantR) + } + }) + } +} diff --git a/go.mod b/go.mod index 425f003..69275c8 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,11 @@ -module fetchdapenti +module rss go 1.19 + +require ( + github.com/PuerkitoBio/goquery v1.8.1 // indirect + github.com/andybalholm/cascadia v1.3.1 // indirect + github.com/fthvgb1/wp-go v0.0.0-20230411054214-125764711dbd // indirect + golang.org/x/exp v0.0.0-20230203172020-98cc5a0785f9 // indirect + golang.org/x/net v0.7.0 // indirect +) diff --git a/main.go b/main.go new file mode 100644 index 0000000..fa247a4 --- /dev/null +++ b/main.go @@ -0,0 +1,94 @@ +package main + +import ( + "io" + "log" + "net/http" + "os" + "regexp" + "rss/bbclearn" + "strings" + "time" +) + +var zhihuReg = regexp.MustCompile(`(?is:((.*?)))`) +var date = regexp.MustCompile(`(.*)`) + +func fetch(u string, fn ...func(s string) string) string { + res, err := http.Get(u) + if err != nil { + return "" + } + s, err := io.ReadAll(res.Body) + if err != nil { + return "" + } + html := string(s) + for _, f := range fn { + html = f(html) + } + return html +} + +func dayLimit(today, forwardDay int, s string) string { + da := date.FindStringSubmatch(s) + if len(da) <= 1 { + return s + } + t, err := time.Parse(time.RFC1123Z, da[1]) + if err != nil { + return s + } + if today-forwardDay > t.Day() { + return "" + } + return s +} + +func filterItem(html string, today, recentDay int) string { + return zhihuReg.ReplaceAllStringFunc(html, func(s string) string { + return dayLimit(today, recentDay, s) + }) +} + +func penti(w http.ResponseWriter, req *http.Request) { + io.WriteString(w, fetch("https://feedx.best/rss/pentitugua.xml", func(s string) string { + return strings.ReplaceAll(s, "www.dapenti.com:99", "imgc.1see.org") + })) +} + +func zhihuDaily(w http.ResponseWriter, req *http.Request) { + io.WriteString(w, fetch("https://feedx.best/rss/zhihudaily.xml", func(s string) string { + return filterItem(s, time.Now().Day(), 1) + })) +} + +func tjxz(w http.ResponseWriter, r *http.Request) { + io.WriteString(w, fetch("https://feedx.best/rss/tjxz.xml", func(s string) string { + return filterItem(s, time.Now().Day(), 0) + })) +} +func bbcLearn(w http.ResponseWriter, _ *http.Request) { + io.WriteString(w, fetch("https://www.bbc.co.uk/learningenglish/chinese", func(s string) string { + return bbclearn.LearnParse(s, 1) + })) +} + +func theNewYorker(w http.ResponseWriter, r *http.Request) { + io.WriteString(w, fetch("https://feedx.best/rss/newyorker.xml", func(s string) string { + return filterItem(s, time.Now().Day(), 1) + })) +} + +func main() { + port := os.Getenv("port") + if port == "" { + port = ":80" + } + http.HandleFunc("/pentitugua", penti) + http.HandleFunc("/zhihuDaily", zhihuDaily) + http.HandleFunc("/tjxz", tjxz) + http.HandleFunc("/bbcLearn", bbcLearn) + http.HandleFunc("/theNewYorker", theNewYorker) + log.Fatal(http.ListenAndServe(port, nil)) +}