完善 只查今天内的新闻
This commit is contained in:
parent
eceeae2444
commit
59d9acc4a4
10
main.go
10
main.go
|
@ -10,6 +10,7 @@ import (
|
||||||
"github.com/gorilla/websocket"
|
"github.com/gorilla/websocket"
|
||||||
"github/fthvgb1/newsfetch/data"
|
"github/fthvgb1/newsfetch/data"
|
||||||
"github/fthvgb1/newsfetch/newsource"
|
"github/fthvgb1/newsfetch/newsource"
|
||||||
|
"github/fthvgb1/newsfetch/tools"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"log"
|
"log"
|
||||||
|
@ -168,7 +169,7 @@ func (f *fetchHandler) handle(conn string) {
|
||||||
for _, sourceName := range (*f.searchSource.mapX)[conn] {
|
for _, sourceName := range (*f.searchSource.mapX)[conn] {
|
||||||
source := f.sourceMap[sourceName]
|
source := f.sourceMap[sourceName]
|
||||||
r := f.fetch2(source, key)
|
r := f.fetch2(source, key)
|
||||||
if r != nil {
|
if r != nil && r.StatusCode == 200 {
|
||||||
if strings.ToUpper(source.Type) == "HTML" {
|
if strings.ToUpper(source.Type) == "HTML" {
|
||||||
f.parsesDom(r, conn, source)
|
f.parsesDom(r, conn, source)
|
||||||
} else {
|
} else {
|
||||||
|
@ -318,6 +319,9 @@ func (f *fetchHandler) parseAjax(response *http.Response, source newsource.Sourc
|
||||||
if newFetch[i].Source == "" {
|
if newFetch[i].Source == "" {
|
||||||
newFetch[i].Source = source.Name
|
newFetch[i].Source = source.Name
|
||||||
}
|
}
|
||||||
|
if !tools.IsInToday(newFetch[i].Date) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
if _, ok := (*f.hadFetchedMap.mapX)[k]; !ok {
|
if _, ok := (*f.hadFetchedMap.mapX)[k]; !ok {
|
||||||
f.hadFetchData = append(f.hadFetchData, fetchData)
|
f.hadFetchData = append(f.hadFetchData, fetchData)
|
||||||
setMap(&f.hadFetchedMap, k, 1)
|
setMap(&f.hadFetchedMap, k, 1)
|
||||||
|
@ -353,6 +357,10 @@ func (f *fetchHandler) parsesDom(html *http.Response, conn string, source newsou
|
||||||
Source: source.Name,
|
Source: source.Name,
|
||||||
}
|
}
|
||||||
source.QueryHandler(i, selection, &fetchData)
|
source.QueryHandler(i, selection, &fetchData)
|
||||||
|
if !tools.IsInToday(fetchData.Date) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
k := conn + "_" + fetchData.Url + "_" + fetchData.Title
|
k := conn + "_" + fetchData.Url + "_" + fetchData.Title
|
||||||
if _, ok := (*f.hadFetchedMap.mapX)[k]; !ok {
|
if _, ok := (*f.hadFetchedMap.mapX)[k]; !ok {
|
||||||
f.hadFetchData = append(f.hadFetchData, fetchData)
|
f.hadFetchData = append(f.hadFetchData, fetchData)
|
||||||
|
|
|
@ -285,7 +285,7 @@ func GetSource() []Source {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "环球网",
|
Name: "环球网",
|
||||||
SearchUrl: "https://www.baidu.com/s?wd=site:huanqiu.com%20",
|
SearchUrl: "https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&tn=baidu&wd=site%3Ahuanqiu.com%20${keyword}&oq=site%3Ahuanqiu.com%20${keyword}&rsv_pq=fd7641bf0000c95a&rsv_t=9e2bJvQ2hjc8VfH%2F%2BWNQlLfJTQiVdsd2EOjNTtBJWMNJwqOgrJBau3sV408&rqlang=cn&rsv_enter=1&gpc=stf%3D1658739916%2C1658826316%7Cstftype%3D1&tfflag=1&si=huanqiu.com&ct=2097152&bs=site%3Ahuanqiu.com%20${keyword}",
|
||||||
Method: "get",
|
Method: "get",
|
||||||
ListQuery: "div[class=\"result c-container xpath-log new-pmd\"]",
|
ListQuery: "div[class=\"result c-container xpath-log new-pmd\"]",
|
||||||
QueryHandler: func(i int, selection *goquery.Selection, data *data.FetchData) {
|
QueryHandler: func(i int, selection *goquery.Selection, data *data.FetchData) {
|
||||||
|
@ -317,23 +317,21 @@ func GetSource() []Source {
|
||||||
},
|
},
|
||||||
Type: "html",
|
Type: "html",
|
||||||
HeaderFun: func(req *http.Request) {
|
HeaderFun: func(req *http.Request) {
|
||||||
req.Header.Add("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
|
req.Header.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
|
||||||
req.Header.Add("accept-language", "zh-CN,zh;q=0.9")
|
req.Header.Add("Accept-Language", "zh-CN,zh;q=0.9")
|
||||||
req.Header.Add("cache-control", "no-cache")
|
req.Header.Add("Cache-Control", "max-age=0")
|
||||||
req.Header.Add("connection", "keep-alive")
|
req.Header.Add("Connection", "keep-alive")
|
||||||
req.Header.Add("cookie", "BIDUPSID=844E3DCAA2EEBF5C872DC99B967B6B7B; PSTM=1655872163; BAIDUID=844E3DCAA2EEBF5CB3E1D79750162204:FG=1; BD_UPN=123353; ORIGIN=2; ISSW=1; ISSW=1; BAIDUID_BFESS=844E3DCAA2EEBF5CB3E1D79750162204:FG=1; ZFY=jWFAySgO:AoQfb6emY9vnmEdptVao:Anj0FFkp028wFws:C; BD_HOME=1; delPer=0; BD_CK_SAM=1; PSINO=3; COOKIE_SESSION=42_0_2_2_3_0_1_0_2_0_0_0_18_0_51_0_1655888428_0_1655888377%7C3%230_0_1655888377%7C1; BAIDU_WISE_UID=wapp_1655902298617_702; ZD_ENTRY=google; channel=baidusearch; baikeVisitId=b3b23509-9330-4d33-82ae-b8eb37895917; BA_HECTOR=8k2g2g218ga40181ak1hbgg1n14; BDRCVFR[C0p6oIjvx-c]=mbxnW11j9Dfmh7GuZR8mvqV; BDSVRTM=1011; H_PS_PSSID=36550_36459_36673_36455_36453_36692_36165_36695_36697_36569_36075_36467_36316_36651")
|
req.Header.Add("Cookie", "ORIGIN=2; ISSW=1; ISSW=1; BAIDUID=D027CF9B0DA3E84567371CDDB93354D9:FG=1; BIDUPSID=D027CF9B0DA3E84567371CDDB93354D9; PSTM=1658826597; delPer=0; BD_CK_SAM=1; PSINO=3; H_PS_PSSID=36826_36557_36624_36726_36413_36841_36949_36166_36919_36816_36569_36803_36742_26350_36930; kleck=f7cc9cf597ac3b3b07c0e39685f3866d; BD_UPN=123353; H_PS_645EC=7751lTb3sntYDTYnQ3dYOUsQm%2F33Fj1OieW5PEXQtyfdM%2BQr%2FIJXHy6B8Go; BA_HECTOR=8k8g04048505a0ag0l813l5e1hdvbr617; ZFY=GeArY5Cvc06Apm6W3TYqUXvnnz4AJG0RjaCGRoJjzTY:C")
|
||||||
req.Header.Add("referer", "http://news.baidu.com/")
|
req.Header.Add("Referer", "https://wappass.baidu.com/")
|
||||||
req.Header.Add("sec-fetch-dest", "document")
|
req.Header.Add("Sec-Fetch-Dest", "document")
|
||||||
req.Header.Add("sec-fetch-mode", "navigate")
|
req.Header.Add("Sec-Fetch-Mode", "navigate")
|
||||||
req.Header.Add("sec-fetch-site", "cross-site")
|
req.Header.Add("Sec-Fetch-Site", "same-site")
|
||||||
req.Header.Add("sec-fetch-user", "?1")
|
req.Header.Add("Sec-Fetch-User", "?1")
|
||||||
req.Header.Add("upgrade-insecure-requests", "1")
|
req.Header.Add("Upgrade-Insecure-Requests", "1")
|
||||||
req.Header.Add("user-agent", "Mozilla/5.0 (X11; Windows x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36")
|
req.Header.Add("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36")
|
||||||
req.Header.Add("#sec-ch-ua", "\".Not/A)Brand\";v=\"99\", \"Google Chrome\";v=\"103\", \"Chromium\";v=\"103\"")
|
req.Header.Add("sec-ch-ua", "\".Not/A)Brand\";v=\"99\", \"Google Chrome\";v=\"103\", \"Chromium\";v=\"103\"")
|
||||||
req.Header.Add("sec-ch-ua-mobile", "?0")
|
req.Header.Add("sec-ch-ua-mobile", "?0")
|
||||||
req.Header.Add("sec-ch-ua-platform", "\"Windows\"")
|
req.Header.Add("sec-ch-ua-platform", "\"Linux\"")
|
||||||
req.Header.Add("postman-token", "81407fbc-2b96-54a7-0193-f640156714ab")
|
|
||||||
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
7
static/static.go
Normal file
7
static/static.go
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
package static
|
||||||
|
|
||||||
|
const TIMESTAMPFORMAT = "2006-01-02 15:04:05"
|
||||||
|
const TIMESTAMPMinFORMAT = "2006-01-02 15:04"
|
||||||
|
const TIMEDATE = "2006-01-02"
|
||||||
|
const TIMEDATEC = "2006年01月02日"
|
||||||
|
const TIMEDATECC = "2006年1月2日"
|
54
tools/time.go
Normal file
54
tools/time.go
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
package tools
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github/fthvgb1/newsfetch/static"
|
||||||
|
"log"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func IsInToday(t string) bool {
|
||||||
|
if t == "" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
t = strings.Trim(t, " ")
|
||||||
|
t = strings.Trim(t, "\xc2\xa0\xc2\xa0")
|
||||||
|
t = strings.Replace(t, "\n", "", -1)
|
||||||
|
t = strings.Replace(t, "\t", "", -1)
|
||||||
|
if strings.Contains(t, "小时前") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if strings.Contains(t, "天前") {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
today, _ := time.ParseInLocation(static.TIMEDATE, time.Now().Format(static.TIMEDATE), time.Local)
|
||||||
|
if len(strings.Fields(t)) > 1 {
|
||||||
|
nz, err := time.ParseInLocation(static.TIMESTAMPFORMAT, t, time.Local)
|
||||||
|
if err != nil {
|
||||||
|
nz, err = time.ParseInLocation(static.TIMESTAMPMinFORMAT, t, time.Local)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("can't parse time err[%s]", err)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if today.Unix() > nz.Unix() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
nz, err := time.ParseInLocation(static.TIMEDATE, t, time.Local)
|
||||||
|
if err != nil {
|
||||||
|
nz, err = time.ParseInLocation(static.TIMEDATEC, t, time.Local)
|
||||||
|
if err != nil {
|
||||||
|
nz, err = time.ParseInLocation(static.TIMEDATECC, t, time.Local)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("can't parse time err[%s]", err)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if today.Unix() > nz.Unix() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user