package main import ( "embed" "errors" "fmt" "github.com/PuerkitoBio/goquery" "github.com/gin-gonic/gin" "github.com/gorilla/websocket" "github/fthvgb1/newsfetch/data" "github/fthvgb1/newsfetch/newsource" "io/fs" "io/ioutil" "log" "net/http" "net/http/cookiejar" "net/url" "os/exec" "path/filepath" "runtime" "strings" "sync" "time" ) type connChan struct { conn string msg message } type dataChan struct { conn string item []data.FetchData } type fetchHandler struct { fetchUrl string hadFetchData []data.FetchData cronTime mapXS[time.Duration] keyword mapXS[string] hadFetchedMap mapXS[int] reloadCron mapXS[chan int] isOff chan int rMsgChan chan connChan newFetchItem chan dataChan connMap mapXS[*websocket.Conn] } type setting struct { Keyword string `json:"keyword"` TimeStep int `json:"timeStep"` } type message struct { Status bool Action string Message string Data interface{} } type dist struct { embed.FS path string } //go:embed dist/* var st embed.FS func (r dist) Open(name string) (fs.File, error) { if filepath.Separator != '/' && strings.ContainsRune(name, filepath.Separator) { return nil, errors.New("http: invalid character in file path") } fullName := strings.TrimLeft(name, "/") prifix := strings.Split(fullName, ".") l := len(prifix) p := prifix[l-1] if p == "js" || p == "css" { fullName = p + "/" + fullName } else if p == "map" { fullName = "js/" + fullName } fullName = r.path + "/" + fullName file, err := r.FS.Open(fullName) return file, err } func setMap[T mapT](obj *mapXS[T], key string, v T) { obj.Lock() (*obj.mapX)[key] = v obj.Unlock() } func delMap[T mapT](obj *mapXS[T], key string) { obj.Lock() delete(*obj.mapX, key) obj.Unlock() } type mapT interface { string | int | time.Duration | *websocket.Conn | chan int } type mapX[T mapT] map[string]T type mapXS[T mapT] struct { *mapX[T] *sync.Mutex } func newFetchHandler(fetchUrl string) *fetchHandler { return &fetchHandler{ fetchUrl: fetchUrl, keyword: mapXS[string]{ &mapX[string]{}, &sync.Mutex{}, }, hadFetchedMap: mapXS[int]{ &mapX[int]{}, &sync.Mutex{}, }, cronTime: mapXS[time.Duration]{ &mapX[time.Duration]{}, &sync.Mutex{}, }, reloadCron: mapXS[chan int]{ &mapX[chan int]{}, &sync.Mutex{}, }, isOff: make(chan int), rMsgChan: make(chan connChan, 10), newFetchItem: make(chan dataChan, 10), connMap: mapXS[*websocket.Conn]{ &mapX[*websocket.Conn]{}, &sync.Mutex{}, }, } } func (f *fetchHandler) handle(conn string) { key := "纪检" if kk, ok := (*f.keyword.mapX)[conn]; ok && kk != "" { key = kk } for _, source := range newsource.GetSource() { r := f.fetch2(source, key) if strings.ToUpper(source.Type) == "HTML" { f.parsesDom(r, conn, source) } else { f.parseAjax(r, source, conn) } } } func (f *fetchHandler) receiveMsg() { for { r := <-f.rMsgChan switch r.msg.Action { case "search": if t, ok := r.msg.Data.(*setting); ok { (*f.reloadCron.mapX)[r.conn] <- t.TimeStep setMap[string](&f.keyword, r.conn, t.Keyword) f.handle(r.conn) } } } } func (f *fetchHandler) fetch2(source newsource.Source, key string) *http.Response { jar, _ := cookiejar.New(nil) client := http.Client{ Transport: nil, CheckRedirect: nil, Jar: jar, Timeout: 10 * time.Second, } searchUrl := source.SearchUrl source.Method = strings.ToUpper(source.Method) if source.Method == "GET" { if !strings.Contains(searchUrl, "?") { searchUrl += "?" + source.KeywordField + "=" + url.QueryEscape(key) } else { searchUrl += "&" + source.KeywordField + "=" + url.QueryEscape(key) } } var req *http.Request if source.Method == "POST" { body := source.KeywordField + "=" + key if nil != source.ExternParam { body += "&" for s, s2 := range source.ExternParam { body += s + "=" + s2 + "&" } body = strings.TrimRight(body, "&") } req, _ = http.NewRequest(source.Method, searchUrl, strings.NewReader(body)) req.Header.Set("Content-Type", "application/x-www-form-urlencoded") } else { req, _ = http.NewRequest(source.Method, searchUrl, nil) } if source.Header != nil { for s, s2 := range source.Header { req.Header.Set(s, s2) } } if source.HeaderFun != nil { source.HeaderFun(req) } client.CheckRedirect = func(req *http.Request, via []*http.Request) error { if len(via) > 0 && via[0].URL.Scheme == "https" && req.URL.Scheme != "https" { lastHop := via[len(via)-1].URL return fmt.Errorf("redirected from secure URL %s to insecure URL %s", lastHop, req.URL) } // Go's http.DefaultClient allows 10 redirects before returning an error. // The securityPreservingHTTPClient also uses this default policy to avoid // Go command hangs. if len(via) >= 3 { return errors.New("stopped after 3 redirects") } return nil } response, err := client.Do(req) if err != nil { panic(err) } return response } func (f *fetchHandler) fetch(url string) *http.Response { defer func() { if r := recover(); r != nil { log.Println(r) } }() client := http.Client{ Transport: nil, CheckRedirect: nil, Jar: nil, Timeout: 10 * time.Second, } req, _ := http.NewRequest("GET", url, nil) req.Header.Add("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9") req.Header.Add("accept-language", "zh-CN,zh;q=0.9") req.Header.Add("cache-control", "no-cache") req.Header.Add("connection", "keep-alive") req.Header.Add("cookie", "BIDUPSID=844E3DCAA2EEBF5C872DC99B967B6B7B; PSTM=1655872163; BAIDUID=844E3DCAA2EEBF5CB3E1D79750162204:FG=1; BD_UPN=123353; ORIGIN=2; ISSW=1; ISSW=1; BAIDUID_BFESS=844E3DCAA2EEBF5CB3E1D79750162204:FG=1; ZFY=jWFAySgO:AoQfb6emY9vnmEdptVao:Anj0FFkp028wFws:C; BD_HOME=1; delPer=0; BD_CK_SAM=1; PSINO=3; COOKIE_SESSION=42_0_2_2_3_0_1_0_2_0_0_0_18_0_51_0_1655888428_0_1655888377%7C3%230_0_1655888377%7C1; BAIDU_WISE_UID=wapp_1655902298617_702; ZD_ENTRY=google; channel=baidusearch; baikeVisitId=b3b23509-9330-4d33-82ae-b8eb37895917; BA_HECTOR=8k2g2g218ga40181ak1hbgg1n14; BDRCVFR[C0p6oIjvx-c]=mbxnW11j9Dfmh7GuZR8mvqV; BDSVRTM=1011; H_PS_PSSID=36550_36459_36673_36455_36453_36692_36165_36695_36697_36569_36075_36467_36316_36651") req.Header.Add("referer", "http://news.baidu.com/") req.Header.Add("sec-fetch-dest", "document") req.Header.Add("sec-fetch-mode", "navigate") req.Header.Add("sec-fetch-site", "cross-site") req.Header.Add("sec-fetch-user", "?1") req.Header.Add("upgrade-insecure-requests", "1") req.Header.Add("user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36") req.Header.Add("#sec-ch-ua", "\".Not/A)Brand\";v=\"99\", \"Google Chrome\";v=\"103\", \"Chromium\";v=\"103\"") req.Header.Add("sec-ch-ua-mobile", "?0") req.Header.Add("sec-ch-ua-platform", "\"Linux\"") req.Header.Add("postman-token", "81407fbc-2b96-54a7-0193-f640156714ab") response, err := client.Do(req) if err != nil { panic(err) } return response } func (f *fetchHandler) parseAjax(response *http.Response, source newsource.Source, conn string) { defer func() { if r := recover(); r != nil { log.Println(r) } }() var newFetch []data.FetchData source.AjaxDealFun(&newFetch, response) if len(newFetch) > 0 { for i, fetchData := range newFetch { k := conn + "_" + fetchData.Url + "_" + fetchData.Title if _, ok := (*f.hadFetchedMap.mapX)[k]; !ok { f.hadFetchData = append(f.hadFetchData, fetchData) setMap(&f.hadFetchedMap, k, 1) } else { newFetch = newFetch[:i+copy(newFetch[i:], newFetch[i+1:])] // 删除中间1个元素 } } f.newFetchItem <- dataChan{ conn: conn, item: newFetch, } } err := response.Body.Close() if err != nil { panic(err) } } func (f *fetchHandler) parsesDom(html *http.Response, conn string, source newsource.Source) { defer func() { if r := recover(); r != nil { log.Println(r) } }() doc, err := goquery.NewDocumentFromReader(html.Body) if err != nil { panic(err) } var newFetch []data.FetchData nowDate := time.Now().Format("2006-01-02 15:04:05") doc.Find(source.ListQuery).Each(func(i int, selection *goquery.Selection) { fetchData := data.FetchData{ CreatedTime: nowDate, } source.QueryHandler(i, selection, &fetchData) k := conn + "_" + fetchData.Url + "_" + fetchData.Title if _, ok := (*f.hadFetchedMap.mapX)[k]; !ok { f.hadFetchData = append(f.hadFetchData, fetchData) setMap(&f.hadFetchedMap, k, 1) newFetch = append(newFetch, fetchData) } }) if len(newFetch) > 0 { f.newFetchItem <- dataChan{ conn: conn, item: newFetch, } } err = html.Body.Close() if err != nil { panic(err) } } func (f *fetchHandler) sendFetchData() { for { dataFetch := <-f.newFetchItem err := (*f.connMap.mapX)[dataFetch.conn].WriteJSON(message{ Status: true, Action: "newData", Message: "", Data: dataFetch.item, }) if err != nil { log.Println(err) } } } func (f *fetchHandler) cronFetch(conn string, c chan int) { step, ok := (*f.cronTime.mapX)[conn] if !ok { step = time.Second * 60 } t := time.NewTicker(step) if _, ok := (*f.cronTime.mapX)[conn]; !ok { setMap(&f.reloadCron, conn, make(chan int)) } defer t.Stop() for { select { case <-t.C: f.handle(conn) case tt := <-(*f.reloadCron.mapX)[conn]: setMap(&f.cronTime, conn, time.Duration(tt)*time.Second) go f.cronFetch(conn, c) return case <-c: close(c) return } } } func main() { h := newFetchHandler("https://www.baidu.com/s?rtt=1&bsst=1&cl=2&tn=news&rsv_dl=ns_pc&word=") router := gin.Default() static := dist{ FS: st, path: "dist", } router.StaticFS("/js", http.FS(static)) router.StaticFS("/css", http.FS(static)) var upgrader = websocket.Upgrader{ CheckOrigin: func(r *http.Request) bool { return true }, } go h.sendFetchData() go h.receiveMsg() router.GET("/", func(c *gin.Context) { file, err := static.Open("index.html") if err != nil { c.String(404, "%s", err) return } bytes, err := ioutil.ReadAll(file) if err != nil { c.String(404, "%s", err) return } c.Data(200, "text/html", bytes) }) router.GET("ws", func(c *gin.Context) { conn, err := upgrader.Upgrade(c.Writer, c.Request, nil) if err != nil { c.JSON(201, message{ Status: false, Message: err.Error(), Data: nil, Action: "upgradeWs", }) log.Println(err) return } remote := conn.RemoteAddr().String() if _, ok := (*h.connMap.mapX)[remote]; !ok { setMap(&h.connMap, remote, conn) } cc := make(chan int) go h.cronFetch(remote, cc) go func() { msg := connChan{ conn: remote, msg: message{ Data: &setting{}, }, } for { err := conn.ReadJSON(&msg.msg) if err != nil { if _, ok := (*h.connMap.mapX)[remote]; ok && !websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway) { delMap(&h.connMap, remote) cc <- 1 return } log.Println(err) } else { h.rMsgChan <- msg } } }() }) go func() { time.Sleep(2 * time.Second) url := "http://127.0.0.1:8080" switch runtime.GOOS { case "linux": exec.Command(`xdg-open`, url).Start() case "windows": exec.Command(`cmd`, `/c`, `start`, url).Start() } }() err := router.Run(":8080") if err != nil { panic(err) } }