diff --git a/main.go b/main.go index a037d70..5406e1c 100644 --- a/main.go +++ b/main.go @@ -10,6 +10,7 @@ import ( "github.com/gorilla/websocket" "github/fthvgb1/newsfetch/data" "github/fthvgb1/newsfetch/newsource" + "github/fthvgb1/newsfetch/tools" "io/fs" "io/ioutil" "log" @@ -168,7 +169,7 @@ func (f *fetchHandler) handle(conn string) { for _, sourceName := range (*f.searchSource.mapX)[conn] { source := f.sourceMap[sourceName] r := f.fetch2(source, key) - if r != nil { + if r != nil && r.StatusCode == 200 { if strings.ToUpper(source.Type) == "HTML" { f.parsesDom(r, conn, source) } else { @@ -318,6 +319,9 @@ func (f *fetchHandler) parseAjax(response *http.Response, source newsource.Sourc if newFetch[i].Source == "" { newFetch[i].Source = source.Name } + if !tools.IsInToday(newFetch[i].Date) { + continue + } if _, ok := (*f.hadFetchedMap.mapX)[k]; !ok { f.hadFetchData = append(f.hadFetchData, fetchData) setMap(&f.hadFetchedMap, k, 1) @@ -353,6 +357,10 @@ func (f *fetchHandler) parsesDom(html *http.Response, conn string, source newsou Source: source.Name, } source.QueryHandler(i, selection, &fetchData) + if !tools.IsInToday(fetchData.Date) { + return + } + k := conn + "_" + fetchData.Url + "_" + fetchData.Title if _, ok := (*f.hadFetchedMap.mapX)[k]; !ok { f.hadFetchData = append(f.hadFetchData, fetchData) diff --git a/newsource/model.go b/newsource/model.go index 03fc69d..14bf9f5 100644 --- a/newsource/model.go +++ b/newsource/model.go @@ -285,7 +285,7 @@ func GetSource() []Source { }, { Name: "环球网", - SearchUrl: "https://www.baidu.com/s?wd=site:huanqiu.com%20", + SearchUrl: "https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&tn=baidu&wd=site%3Ahuanqiu.com%20${keyword}&oq=site%3Ahuanqiu.com%20${keyword}&rsv_pq=fd7641bf0000c95a&rsv_t=9e2bJvQ2hjc8VfH%2F%2BWNQlLfJTQiVdsd2EOjNTtBJWMNJwqOgrJBau3sV408&rqlang=cn&rsv_enter=1&gpc=stf%3D1658739916%2C1658826316%7Cstftype%3D1&tfflag=1&si=huanqiu.com&ct=2097152&bs=site%3Ahuanqiu.com%20${keyword}", Method: "get", ListQuery: "div[class=\"result c-container xpath-log new-pmd\"]", QueryHandler: func(i int, selection *goquery.Selection, data *data.FetchData) { @@ -317,23 +317,21 @@ func GetSource() []Source { }, Type: "html", HeaderFun: func(req *http.Request) { - req.Header.Add("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9") - req.Header.Add("accept-language", "zh-CN,zh;q=0.9") - req.Header.Add("cache-control", "no-cache") - req.Header.Add("connection", "keep-alive") - req.Header.Add("cookie", "BIDUPSID=844E3DCAA2EEBF5C872DC99B967B6B7B; PSTM=1655872163; BAIDUID=844E3DCAA2EEBF5CB3E1D79750162204:FG=1; BD_UPN=123353; ORIGIN=2; ISSW=1; ISSW=1; BAIDUID_BFESS=844E3DCAA2EEBF5CB3E1D79750162204:FG=1; ZFY=jWFAySgO:AoQfb6emY9vnmEdptVao:Anj0FFkp028wFws:C; BD_HOME=1; delPer=0; BD_CK_SAM=1; PSINO=3; COOKIE_SESSION=42_0_2_2_3_0_1_0_2_0_0_0_18_0_51_0_1655888428_0_1655888377%7C3%230_0_1655888377%7C1; BAIDU_WISE_UID=wapp_1655902298617_702; ZD_ENTRY=google; channel=baidusearch; baikeVisitId=b3b23509-9330-4d33-82ae-b8eb37895917; BA_HECTOR=8k2g2g218ga40181ak1hbgg1n14; BDRCVFR[C0p6oIjvx-c]=mbxnW11j9Dfmh7GuZR8mvqV; BDSVRTM=1011; H_PS_PSSID=36550_36459_36673_36455_36453_36692_36165_36695_36697_36569_36075_36467_36316_36651") - req.Header.Add("referer", "http://news.baidu.com/") - req.Header.Add("sec-fetch-dest", "document") - req.Header.Add("sec-fetch-mode", "navigate") - req.Header.Add("sec-fetch-site", "cross-site") - req.Header.Add("sec-fetch-user", "?1") - req.Header.Add("upgrade-insecure-requests", "1") - req.Header.Add("user-agent", "Mozilla/5.0 (X11; Windows x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36") - req.Header.Add("#sec-ch-ua", "\".Not/A)Brand\";v=\"99\", \"Google Chrome\";v=\"103\", \"Chromium\";v=\"103\"") + req.Header.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9") + req.Header.Add("Accept-Language", "zh-CN,zh;q=0.9") + req.Header.Add("Cache-Control", "max-age=0") + req.Header.Add("Connection", "keep-alive") + req.Header.Add("Cookie", "ORIGIN=2; ISSW=1; ISSW=1; BAIDUID=D027CF9B0DA3E84567371CDDB93354D9:FG=1; BIDUPSID=D027CF9B0DA3E84567371CDDB93354D9; PSTM=1658826597; delPer=0; BD_CK_SAM=1; PSINO=3; H_PS_PSSID=36826_36557_36624_36726_36413_36841_36949_36166_36919_36816_36569_36803_36742_26350_36930; kleck=f7cc9cf597ac3b3b07c0e39685f3866d; BD_UPN=123353; H_PS_645EC=7751lTb3sntYDTYnQ3dYOUsQm%2F33Fj1OieW5PEXQtyfdM%2BQr%2FIJXHy6B8Go; BA_HECTOR=8k8g04048505a0ag0l813l5e1hdvbr617; ZFY=GeArY5Cvc06Apm6W3TYqUXvnnz4AJG0RjaCGRoJjzTY:C") + req.Header.Add("Referer", "https://wappass.baidu.com/") + req.Header.Add("Sec-Fetch-Dest", "document") + req.Header.Add("Sec-Fetch-Mode", "navigate") + req.Header.Add("Sec-Fetch-Site", "same-site") + req.Header.Add("Sec-Fetch-User", "?1") + req.Header.Add("Upgrade-Insecure-Requests", "1") + req.Header.Add("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36") + req.Header.Add("sec-ch-ua", "\".Not/A)Brand\";v=\"99\", \"Google Chrome\";v=\"103\", \"Chromium\";v=\"103\"") req.Header.Add("sec-ch-ua-mobile", "?0") - req.Header.Add("sec-ch-ua-platform", "\"Windows\"") - req.Header.Add("postman-token", "81407fbc-2b96-54a7-0193-f640156714ab") - + req.Header.Add("sec-ch-ua-platform", "\"Linux\"") }, }, { diff --git a/static/static.go b/static/static.go new file mode 100644 index 0000000..c2f9b89 --- /dev/null +++ b/static/static.go @@ -0,0 +1,7 @@ +package static + +const TIMESTAMPFORMAT = "2006-01-02 15:04:05" +const TIMESTAMPMinFORMAT = "2006-01-02 15:04" +const TIMEDATE = "2006-01-02" +const TIMEDATEC = "2006年01月02日" +const TIMEDATECC = "2006年1月2日" diff --git a/tools/time.go b/tools/time.go new file mode 100644 index 0000000..732e52a --- /dev/null +++ b/tools/time.go @@ -0,0 +1,54 @@ +package tools + +import ( + "github/fthvgb1/newsfetch/static" + "log" + "strings" + "time" +) + +func IsInToday(t string) bool { + if t == "" { + return true + } + t = strings.Trim(t, " ") + t = strings.Trim(t, "\xc2\xa0\xc2\xa0") + t = strings.Replace(t, "\n", "", -1) + t = strings.Replace(t, "\t", "", -1) + if strings.Contains(t, "小时前") { + return true + } + if strings.Contains(t, "天前") { + return false + } + today, _ := time.ParseInLocation(static.TIMEDATE, time.Now().Format(static.TIMEDATE), time.Local) + if len(strings.Fields(t)) > 1 { + nz, err := time.ParseInLocation(static.TIMESTAMPFORMAT, t, time.Local) + if err != nil { + nz, err = time.ParseInLocation(static.TIMESTAMPMinFORMAT, t, time.Local) + } + if err != nil { + log.Printf("can't parse time err[%s]", err) + return true + } + if today.Unix() > nz.Unix() { + return false + } + } else { + nz, err := time.ParseInLocation(static.TIMEDATE, t, time.Local) + if err != nil { + nz, err = time.ParseInLocation(static.TIMEDATEC, t, time.Local) + if err != nil { + nz, err = time.ParseInLocation(static.TIMEDATECC, t, time.Local) + } + } + if err != nil { + log.Printf("can't parse time err[%s]", err) + return true + } + if today.Unix() > nz.Unix() { + return false + } + } + return true +}