快完工
This commit is contained in:
parent
a969e9cf9e
commit
ab97f5c624
115
main.go
115
main.go
|
@ -2,6 +2,7 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"embed"
|
"embed"
|
||||||
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
@ -17,7 +18,9 @@ import (
|
||||||
"net/url"
|
"net/url"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"reflect"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"runtime/debug"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
@ -141,12 +144,14 @@ func (f *fetchHandler) handle(conn string) {
|
||||||
}
|
}
|
||||||
for _, source := range newsource.GetSource() {
|
for _, source := range newsource.GetSource() {
|
||||||
r := f.fetch2(source, key)
|
r := f.fetch2(source, key)
|
||||||
|
if r != nil {
|
||||||
if strings.ToUpper(source.Type) == "HTML" {
|
if strings.ToUpper(source.Type) == "HTML" {
|
||||||
f.parsesDom(r, conn, source)
|
f.parsesDom(r, conn, source)
|
||||||
} else {
|
} else {
|
||||||
f.parseAjax(r, source, conn)
|
f.parseAjax(r, source, conn)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *fetchHandler) receiveMsg() {
|
func (f *fetchHandler) receiveMsg() {
|
||||||
|
@ -164,6 +169,11 @@ func (f *fetchHandler) receiveMsg() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *fetchHandler) fetch2(source newsource.Source, key string) *http.Response {
|
func (f *fetchHandler) fetch2(source newsource.Source, key string) *http.Response {
|
||||||
|
defer func() {
|
||||||
|
if r := recover(); r != nil {
|
||||||
|
log.Printf("err:%s. stack:%s", r, debug.Stack())
|
||||||
|
}
|
||||||
|
}()
|
||||||
jar, _ := cookiejar.New(nil)
|
jar, _ := cookiejar.New(nil)
|
||||||
client := http.Client{
|
client := http.Client{
|
||||||
Transport: nil,
|
Transport: nil,
|
||||||
|
@ -173,23 +183,41 @@ func (f *fetchHandler) fetch2(source newsource.Source, key string) *http.Respons
|
||||||
}
|
}
|
||||||
searchUrl := source.SearchUrl
|
searchUrl := source.SearchUrl
|
||||||
source.Method = strings.ToUpper(source.Method)
|
source.Method = strings.ToUpper(source.Method)
|
||||||
if source.Method == "GET" {
|
if source.Method == "GET" && source.KeywordField != "" {
|
||||||
if !strings.Contains(searchUrl, "?") {
|
if !strings.Contains(searchUrl, "?") {
|
||||||
searchUrl += "?" + source.KeywordField + "=" + url.QueryEscape(key)
|
searchUrl += "?" + source.KeywordField + "=" + url.QueryEscape(key)
|
||||||
} else {
|
} else {
|
||||||
searchUrl += "&" + source.KeywordField + "=" + url.QueryEscape(key)
|
searchUrl += "&" + source.KeywordField + "=" + url.QueryEscape(key)
|
||||||
}
|
}
|
||||||
|
} else if source.Method == "GET" && source.KeywordField == "" {
|
||||||
|
if strings.Contains(searchUrl, "${keyword}") {
|
||||||
|
searchUrl = strings.Replace(searchUrl, "${keyword}", url.QueryEscape(key), -1)
|
||||||
|
} else {
|
||||||
|
searchUrl += url.QueryEscape(key)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
var req *http.Request
|
var req *http.Request
|
||||||
if source.Method == "POST" {
|
if source.Method == "POST" {
|
||||||
body := source.KeywordField + "=" + key
|
body := ""
|
||||||
if nil != source.ExternParam {
|
if nil != source.ExternParam {
|
||||||
|
if source.IsJson {
|
||||||
|
t := source.ExternParam
|
||||||
|
t[source.KeywordField] = key
|
||||||
|
bytes, err := json.Marshal(t)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("build post json param err:[%s]", err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
body = string(bytes)
|
||||||
|
} else {
|
||||||
|
body = source.KeywordField + "=" + key
|
||||||
body += "&"
|
body += "&"
|
||||||
for s, s2 := range source.ExternParam {
|
for s, s2 := range source.ExternParam {
|
||||||
body += s + "=" + s2 + "&"
|
body += s + "=" + s2 + "&"
|
||||||
}
|
}
|
||||||
body = strings.TrimRight(body, "&")
|
body = strings.TrimRight(body, "&")
|
||||||
}
|
}
|
||||||
|
}
|
||||||
req, _ = http.NewRequest(source.Method, searchUrl, strings.NewReader(body))
|
req, _ = http.NewRequest(source.Method, searchUrl, strings.NewReader(body))
|
||||||
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||||
} else {
|
} else {
|
||||||
|
@ -220,46 +248,8 @@ func (f *fetchHandler) fetch2(source newsource.Source, key string) *http.Respons
|
||||||
response, err := client.Do(req)
|
response, err := client.Do(req)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
log.Printf("request %s err: %s", req.URL, err)
|
||||||
}
|
return nil
|
||||||
return response
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *fetchHandler) fetch(url string) *http.Response {
|
|
||||||
defer func() {
|
|
||||||
if r := recover(); r != nil {
|
|
||||||
log.Println(r)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
client := http.Client{
|
|
||||||
Transport: nil,
|
|
||||||
CheckRedirect: nil,
|
|
||||||
Jar: nil,
|
|
||||||
Timeout: 10 * time.Second,
|
|
||||||
}
|
|
||||||
req, _ := http.NewRequest("GET", url, nil)
|
|
||||||
|
|
||||||
req.Header.Add("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
|
|
||||||
req.Header.Add("accept-language", "zh-CN,zh;q=0.9")
|
|
||||||
req.Header.Add("cache-control", "no-cache")
|
|
||||||
req.Header.Add("connection", "keep-alive")
|
|
||||||
req.Header.Add("cookie", "BIDUPSID=844E3DCAA2EEBF5C872DC99B967B6B7B; PSTM=1655872163; BAIDUID=844E3DCAA2EEBF5CB3E1D79750162204:FG=1; BD_UPN=123353; ORIGIN=2; ISSW=1; ISSW=1; BAIDUID_BFESS=844E3DCAA2EEBF5CB3E1D79750162204:FG=1; ZFY=jWFAySgO:AoQfb6emY9vnmEdptVao:Anj0FFkp028wFws:C; BD_HOME=1; delPer=0; BD_CK_SAM=1; PSINO=3; COOKIE_SESSION=42_0_2_2_3_0_1_0_2_0_0_0_18_0_51_0_1655888428_0_1655888377%7C3%230_0_1655888377%7C1; BAIDU_WISE_UID=wapp_1655902298617_702; ZD_ENTRY=google; channel=baidusearch; baikeVisitId=b3b23509-9330-4d33-82ae-b8eb37895917; BA_HECTOR=8k2g2g218ga40181ak1hbgg1n14; BDRCVFR[C0p6oIjvx-c]=mbxnW11j9Dfmh7GuZR8mvqV; BDSVRTM=1011; H_PS_PSSID=36550_36459_36673_36455_36453_36692_36165_36695_36697_36569_36075_36467_36316_36651")
|
|
||||||
req.Header.Add("referer", "http://news.baidu.com/")
|
|
||||||
req.Header.Add("sec-fetch-dest", "document")
|
|
||||||
req.Header.Add("sec-fetch-mode", "navigate")
|
|
||||||
req.Header.Add("sec-fetch-site", "cross-site")
|
|
||||||
req.Header.Add("sec-fetch-user", "?1")
|
|
||||||
req.Header.Add("upgrade-insecure-requests", "1")
|
|
||||||
req.Header.Add("user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36")
|
|
||||||
req.Header.Add("#sec-ch-ua", "\".Not/A)Brand\";v=\"99\", \"Google Chrome\";v=\"103\", \"Chromium\";v=\"103\"")
|
|
||||||
req.Header.Add("sec-ch-ua-mobile", "?0")
|
|
||||||
req.Header.Add("sec-ch-ua-platform", "\"Linux\"")
|
|
||||||
req.Header.Add("postman-token", "81407fbc-2b96-54a7-0193-f640156714ab")
|
|
||||||
|
|
||||||
response, err := client.Do(req)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
}
|
||||||
return response
|
return response
|
||||||
}
|
}
|
||||||
|
@ -267,15 +257,41 @@ func (f *fetchHandler) fetch(url string) *http.Response {
|
||||||
func (f *fetchHandler) parseAjax(response *http.Response, source newsource.Source, conn string) {
|
func (f *fetchHandler) parseAjax(response *http.Response, source newsource.Source, conn string) {
|
||||||
defer func() {
|
defer func() {
|
||||||
if r := recover(); r != nil {
|
if r := recover(); r != nil {
|
||||||
log.Println(r)
|
log.Printf("parse ajax response err[%s]. stack:[%s]", r, debug.Stack())
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
nowDate := time.Now().Format("2006-01-02 15:04:06")
|
||||||
var newFetch []data.FetchData
|
var newFetch []data.FetchData
|
||||||
source.AjaxDealFun(&newFetch, response)
|
|
||||||
if len(newFetch) > 0 {
|
|
||||||
|
|
||||||
for i, fetchData := range newFetch {
|
if source.AjaxSimpleDeal != nil && source.AjaxDealFun == nil {
|
||||||
|
bytes, err := ioutil.ReadAll(response.Body)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("read response body err:[%s]", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if source.AjaxSimpleDeal != nil && source.Target != nil {
|
||||||
|
dst := reflect.New(source.Target).Elem()
|
||||||
|
err = json.Unmarshal(bytes, dst.Addr().Interface())
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("jsondecode err:[%s]", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
source.AjaxSimpleDeal(dst.Interface(), &newFetch)
|
||||||
|
}
|
||||||
|
} else if source.AjaxDealFun != nil && source.AjaxSimpleDeal == nil {
|
||||||
|
source.AjaxDealFun(&newFetch, response)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(newFetch) > 0 {
|
||||||
|
for i := 0; i < len(newFetch); i++ {
|
||||||
|
fetchData := newFetch[i]
|
||||||
k := conn + "_" + fetchData.Url + "_" + fetchData.Title
|
k := conn + "_" + fetchData.Url + "_" + fetchData.Title
|
||||||
|
if newFetch[i].CreatedTime == "" {
|
||||||
|
newFetch[i].CreatedTime = nowDate
|
||||||
|
}
|
||||||
|
if newFetch[i].Source == "" {
|
||||||
|
newFetch[i].Source = source.Name
|
||||||
|
}
|
||||||
if _, ok := (*f.hadFetchedMap.mapX)[k]; !ok {
|
if _, ok := (*f.hadFetchedMap.mapX)[k]; !ok {
|
||||||
f.hadFetchData = append(f.hadFetchData, fetchData)
|
f.hadFetchData = append(f.hadFetchData, fetchData)
|
||||||
setMap(&f.hadFetchedMap, k, 1)
|
setMap(&f.hadFetchedMap, k, 1)
|
||||||
|
@ -297,7 +313,7 @@ func (f *fetchHandler) parseAjax(response *http.Response, source newsource.Sourc
|
||||||
func (f *fetchHandler) parsesDom(html *http.Response, conn string, source newsource.Source) {
|
func (f *fetchHandler) parsesDom(html *http.Response, conn string, source newsource.Source) {
|
||||||
defer func() {
|
defer func() {
|
||||||
if r := recover(); r != nil {
|
if r := recover(); r != nil {
|
||||||
log.Println(r)
|
log.Printf("parse html err:[%s]. stack:[%s]", r, debug.Stack())
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
doc, err := goquery.NewDocumentFromReader(html.Body)
|
doc, err := goquery.NewDocumentFromReader(html.Body)
|
||||||
|
@ -309,6 +325,7 @@ func (f *fetchHandler) parsesDom(html *http.Response, conn string, source newsou
|
||||||
doc.Find(source.ListQuery).Each(func(i int, selection *goquery.Selection) {
|
doc.Find(source.ListQuery).Each(func(i int, selection *goquery.Selection) {
|
||||||
fetchData := data.FetchData{
|
fetchData := data.FetchData{
|
||||||
CreatedTime: nowDate,
|
CreatedTime: nowDate,
|
||||||
|
Source: source.Name,
|
||||||
}
|
}
|
||||||
source.QueryHandler(i, selection, &fetchData)
|
source.QueryHandler(i, selection, &fetchData)
|
||||||
k := conn + "_" + fetchData.Url + "_" + fetchData.Title
|
k := conn + "_" + fetchData.Url + "_" + fetchData.Title
|
||||||
|
@ -341,7 +358,7 @@ func (f *fetchHandler) sendFetchData() {
|
||||||
Data: dataFetch.item,
|
Data: dataFetch.item,
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Println(err)
|
log.Printf("send new fetch data err:[%s]", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -434,7 +451,7 @@ func main() {
|
||||||
cc <- 1
|
cc <- 1
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
log.Println(err)
|
log.Printf("websocket read client msg err:[%s]", err)
|
||||||
} else {
|
} else {
|
||||||
h.rMsgChan <- msg
|
h.rMsgChan <- msg
|
||||||
}
|
}
|
||||||
|
|
43
newsource/bjnews.go
Normal file
43
newsource/bjnews.go
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
package newsource
|
||||||
|
|
||||||
|
type BjNewsResponse struct {
|
||||||
|
Data struct {
|
||||||
|
Data []BjNewsItem `json:"data"`
|
||||||
|
} `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type BjNewsDetailURL struct {
|
||||||
|
MURL string `json:"m_url"`
|
||||||
|
PcURL string `json:"pc_url"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type BjNewsItem struct {
|
||||||
|
Source BjNewsSource `json:"_source"`
|
||||||
|
Highlight BjNewsHighlight `json:"highlight"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type BjNewsSource struct {
|
||||||
|
SpecialType string `json:"special_type"`
|
||||||
|
PublishTime string `json:"publish_time"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
Type int `json:"type"`
|
||||||
|
Keyword []interface{} `json:"keyword"`
|
||||||
|
UUID string `json:"uuid"`
|
||||||
|
CoverList string `json:"cover_list"`
|
||||||
|
HasVideo int `json:"has_video"`
|
||||||
|
ImgList []string `json:"img_list"`
|
||||||
|
Cover string `json:"cover"`
|
||||||
|
BigCover string `json:"big_cover"`
|
||||||
|
CommentNum int `json:"comment_num"`
|
||||||
|
ZanNum int `json:"zan_num"`
|
||||||
|
ShareNum string `json:"share_num"`
|
||||||
|
LiveStatus string `json:"live_status"`
|
||||||
|
XjhID string `json:"xjh_id"`
|
||||||
|
DetailURL BjNewsDetailURL `json:"detail_url"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type BjNewsHighlight struct {
|
||||||
|
Title string `json:"title"`
|
||||||
|
Desc string `json:"desc"`
|
||||||
|
Content string `json:"content"`
|
||||||
|
}
|
|
@ -1,10 +1,5 @@
|
||||||
package newsource
|
package newsource
|
||||||
|
|
||||||
import (
|
|
||||||
"regexp"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
type CCDIPAGERInfo struct {
|
type CCDIPAGERInfo struct {
|
||||||
BM string `json:"BM"`
|
BM string `json:"BM"`
|
||||||
BC string `json:"BC"`
|
BC string `json:"BC"`
|
||||||
|
@ -27,9 +22,3 @@ type CCDIPAGERData struct {
|
||||||
Info []CCDIPAGERInfo `json:"info"`
|
Info []CCDIPAGERInfo `json:"info"`
|
||||||
NextPage int `json:"nextPage"`
|
NextPage int `json:"nextPage"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func stripTags(content string) string {
|
|
||||||
content = strings.Replace(content, " ", "", -1)
|
|
||||||
re := regexp.MustCompile(`<(.|\n)*?>`)
|
|
||||||
return re.ReplaceAllString(content, "")
|
|
||||||
}
|
|
||||||
|
|
14
newsource/ifeng.go
Normal file
14
newsource/ifeng.go
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
package newsource
|
||||||
|
|
||||||
|
type IFENGResponse struct {
|
||||||
|
Data struct {
|
||||||
|
Items []IFENGItem `json:"items"`
|
||||||
|
} `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type IFENGItem struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Source string `json:"source"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
}
|
|
@ -8,6 +8,7 @@ import (
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"reflect"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -28,6 +29,15 @@ type Source struct {
|
||||||
HeaderFun func(r *http.Request)
|
HeaderFun func(r *http.Request)
|
||||||
ExternParam map[string]string
|
ExternParam map[string]string
|
||||||
AjaxDealFun func(*[]data.FetchData, *http.Response)
|
AjaxDealFun func(*[]data.FetchData, *http.Response)
|
||||||
|
Target reflect.Type
|
||||||
|
UnmarshalerFun func([]byte) interface{}
|
||||||
|
AjaxSimpleDeal func(interface{}, *[]data.FetchData)
|
||||||
|
IsJson bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func StripTags(content string) string {
|
||||||
|
re := regexp.MustCompile(`<(.|\n)*?>`)
|
||||||
|
return re.ReplaceAllString(content, "")
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetSource() []Source {
|
func GetSource() []Source {
|
||||||
|
@ -36,7 +46,7 @@ func GetSource() []Source {
|
||||||
nowDate := ti.Format("2006-01-02 15:04:05")
|
nowDate := ti.Format("2006-01-02 15:04:05")
|
||||||
return []Source{
|
return []Source{
|
||||||
{
|
{
|
||||||
Name: "中央纪委监察部",
|
Name: "中央纪委国家监委网站",
|
||||||
SearchUrl: "https://www.ccdi.gov.cn/was5/web/search",
|
SearchUrl: "https://www.ccdi.gov.cn/was5/web/search",
|
||||||
Method: "post",
|
Method: "post",
|
||||||
ListQuery: ".center_box0 li",
|
ListQuery: ".center_box0 li",
|
||||||
|
@ -80,6 +90,7 @@ func GetSource() []Source {
|
||||||
t := selection.Find(".news-title-font_1xS-F").First()
|
t := selection.Find(".news-title-font_1xS-F").First()
|
||||||
data.Title = t.Text()
|
data.Title = t.Text()
|
||||||
data.Desc = selection.Find(".c-row .c-color-text").First().Text()
|
data.Desc = selection.Find(".c-row .c-color-text").First().Text()
|
||||||
|
data.Source = selection.Find("span[aria-label*=\"新闻来源\"]").First().Text()
|
||||||
data.Date = selection.Find("span[class=\"c-color-gray2 c-font-normal c-gap-right-xsmall\"]").First().Text()
|
data.Date = selection.Find("span[class=\"c-color-gray2 c-font-normal c-gap-right-xsmall\"]").First().Text()
|
||||||
n := compile.FindAllStringSubmatch(data.Date, -1)
|
n := compile.FindAllStringSubmatch(data.Date, -1)
|
||||||
if nil != n {
|
if nil != n {
|
||||||
|
@ -151,7 +162,7 @@ func GetSource() []Source {
|
||||||
if "" == v.YT {
|
if "" == v.YT {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
desc := stripTags(v.IRCONTENT)
|
desc := StripTags(v.IRCONTENT)
|
||||||
l := utf8.RuneCountInString(desc)
|
l := utf8.RuneCountInString(desc)
|
||||||
if l > 30 {
|
if l > 30 {
|
||||||
l = 30
|
l = 30
|
||||||
|
@ -159,7 +170,7 @@ func GetSource() []Source {
|
||||||
desc = string([]rune(desc)[:30])
|
desc = string([]rune(desc)[:30])
|
||||||
d := data.FetchData{
|
d := data.FetchData{
|
||||||
Url: fmt.Sprintf("https://jjjcb.ccdi.gov.cn/epaper/index.html?guid=%s", v.ZBGUID),
|
Url: fmt.Sprintf("https://jjjcb.ccdi.gov.cn/epaper/index.html?guid=%s", v.ZBGUID),
|
||||||
Title: v.YT,
|
Title: v.DOCTITLE,
|
||||||
Desc: desc,
|
Desc: desc,
|
||||||
Date: v.DOCPUBTIME,
|
Date: v.DOCPUBTIME,
|
||||||
CreatedTime: nowDate,
|
CreatedTime: nowDate,
|
||||||
|
@ -169,5 +180,230 @@ func GetSource() []Source {
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Name: "中新网搜索",
|
||||||
|
SearchUrl: "https://sou.chinanews.com.cn/search.do",
|
||||||
|
KeywordField: "q",
|
||||||
|
Method: "get",
|
||||||
|
Type: "html",
|
||||||
|
ListQuery: "#news_list table",
|
||||||
|
QueryHandler: func(i int, selection *goquery.Selection, fetchData *data.FetchData) {
|
||||||
|
t := selection.Find(".news_item a").First()
|
||||||
|
fetchData.Title = t.Text()
|
||||||
|
fetchData.Url, _ = t.Attr("href")
|
||||||
|
fetchData.Desc = selection.Find(".news_content").First().Text()
|
||||||
|
tt := selection.Find(".news_other").First().Text()
|
||||||
|
fet := strings.Split(tt, "html")
|
||||||
|
fetchData.Date = fet[len(fet)-1]
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "新浪新闻搜索",
|
||||||
|
SearchUrl: "https://search.sina.com.cn/?range=title&c=news&time=&ie=utf-8",
|
||||||
|
Method: "get",
|
||||||
|
Type: "html",
|
||||||
|
KeywordField: "q",
|
||||||
|
ListQuery: "#wrap .box-result",
|
||||||
|
Header: map[string]string{
|
||||||
|
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36",
|
||||||
|
"authority": "search.sina.com.cn",
|
||||||
|
"referer": "https://news.sina.com.cn/rol",
|
||||||
|
},
|
||||||
|
QueryHandler: func(i int, selection *goquery.Selection, fetchData *data.FetchData) {
|
||||||
|
t := selection.Find("h2>a").First()
|
||||||
|
fetchData.Title = t.Text()
|
||||||
|
fetchData.Url, _ = t.Attr("href")
|
||||||
|
fetchData.Desc = selection.Find(".r-info .content").Text()
|
||||||
|
s := selection.Find("h2 >.fgray_time").First().Text()
|
||||||
|
ll := strings.Fields(s)
|
||||||
|
if len(ll) > 2 {
|
||||||
|
fetchData.Date = ll[1] + " " + ll[2]
|
||||||
|
fetchData.Source = ll[0]
|
||||||
|
} else {
|
||||||
|
fetchData.Date = ll[1]
|
||||||
|
fetchData.Source = ll[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "联合早报",
|
||||||
|
SearchUrl: "https://www.zaobao.com/search?pageNo=1&pageSize=10",
|
||||||
|
Method: "get",
|
||||||
|
Type: "ajax",
|
||||||
|
Header: map[string]string{
|
||||||
|
"authority": "www.zaobao.com",
|
||||||
|
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36",
|
||||||
|
},
|
||||||
|
KeywordField: "keywords",
|
||||||
|
AjaxDealFun: func(i *[]data.FetchData, response *http.Response) {
|
||||||
|
bytes, err := ioutil.ReadAll(response.Body)
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var r ZaoBaoResponse
|
||||||
|
err = json.Unmarshal(bytes, &r)
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, datum := range r.Result.Data {
|
||||||
|
t := time.UnixMilli(datum.PublicationDate)
|
||||||
|
v := data.FetchData{
|
||||||
|
Url: "https://www.zaobao.com" + datum.URL,
|
||||||
|
Title: datum.Title,
|
||||||
|
Desc: datum.ContentPreview,
|
||||||
|
Date: t.Format("2006-01-02 15:04:05"),
|
||||||
|
}
|
||||||
|
*i = append(*i, v)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "新京报",
|
||||||
|
SearchUrl: "https://s.bjnews.com.cn/bjnews/getlist?from=bw&page=1&orderby=0",
|
||||||
|
KeywordField: "bwsk",
|
||||||
|
Type: "ajax",
|
||||||
|
Method: "get",
|
||||||
|
Header: map[string]string{
|
||||||
|
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36",
|
||||||
|
},
|
||||||
|
Target: reflect.TypeOf(BjNewsResponse{}),
|
||||||
|
AjaxSimpleDeal: func(rr interface{}, i *[]data.FetchData) {
|
||||||
|
r := rr.(BjNewsResponse)
|
||||||
|
for _, v := range r.Data.Data {
|
||||||
|
item := data.FetchData{
|
||||||
|
Url: v.Source.DetailURL.PcURL,
|
||||||
|
Title: v.Source.Title,
|
||||||
|
Desc: v.Highlight.Desc,
|
||||||
|
Date: v.Source.PublishTime,
|
||||||
|
}
|
||||||
|
*i = append(*i, item)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "环球网",
|
||||||
|
SearchUrl: "https://www.baidu.com/s?wd=site:huanqiu.com%20",
|
||||||
|
Method: "get",
|
||||||
|
ListQuery: "div[class=\"result c-container xpath-log new-pmd\"]",
|
||||||
|
QueryHandler: func(i int, selection *goquery.Selection, data *data.FetchData) {
|
||||||
|
data.Url, _ = selection.Attr("mu")
|
||||||
|
t := selection.Find("h3[class='c-title t t tts-title'] a").First()
|
||||||
|
data.Title = t.Text()
|
||||||
|
data.Desc = selection.Find(".content-right_8Zs40").First().Text()
|
||||||
|
data.Source = selection.Find(".source_1Vdff .c-color-gray").First().Text()
|
||||||
|
data.Date = selection.Find("span[class=\"c-color-gray2\"]").First().Text()
|
||||||
|
n := compile.FindAllStringSubmatch(data.Date, -1)
|
||||||
|
if nil != n {
|
||||||
|
nn, _ := strconv.Atoi(n[0][0])
|
||||||
|
if strings.Contains(data.Date, "小时") {
|
||||||
|
data.Date = ti.Add(-time.Duration(nn) * time.Hour).Format("2006-01-02 15:04")
|
||||||
|
}
|
||||||
|
if strings.Contains(data.Date, "分钟") {
|
||||||
|
data.Date = ti.Add(-time.Duration(nn) * time.Minute).Format("2006-01-02 15:04")
|
||||||
|
}
|
||||||
|
if strings.Contains(data.Date, "天") {
|
||||||
|
data.Date = ti.Add(-time.Duration(nn) * time.Hour * 24).Format("2006-01-02 15:04")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if strings.Contains(data.Date, "昨天") {
|
||||||
|
data.Date = ti.Add(-time.Duration(24) * time.Hour).Format("2006-01-02 15:04")
|
||||||
|
}
|
||||||
|
if strings.Contains(data.Date, "前天") {
|
||||||
|
data.Date = ti.Add(-time.Duration(48) * time.Hour).Format("2006-01-02 15:04")
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Type: "html",
|
||||||
|
HeaderFun: func(req *http.Request) {
|
||||||
|
req.Header.Add("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
|
||||||
|
req.Header.Add("accept-language", "zh-CN,zh;q=0.9")
|
||||||
|
req.Header.Add("cache-control", "no-cache")
|
||||||
|
req.Header.Add("connection", "keep-alive")
|
||||||
|
req.Header.Add("cookie", "BIDUPSID=844E3DCAA2EEBF5C872DC99B967B6B7B; PSTM=1655872163; BAIDUID=844E3DCAA2EEBF5CB3E1D79750162204:FG=1; BD_UPN=123353; ORIGIN=2; ISSW=1; ISSW=1; BAIDUID_BFESS=844E3DCAA2EEBF5CB3E1D79750162204:FG=1; ZFY=jWFAySgO:AoQfb6emY9vnmEdptVao:Anj0FFkp028wFws:C; BD_HOME=1; delPer=0; BD_CK_SAM=1; PSINO=3; COOKIE_SESSION=42_0_2_2_3_0_1_0_2_0_0_0_18_0_51_0_1655888428_0_1655888377%7C3%230_0_1655888377%7C1; BAIDU_WISE_UID=wapp_1655902298617_702; ZD_ENTRY=google; channel=baidusearch; baikeVisitId=b3b23509-9330-4d33-82ae-b8eb37895917; BA_HECTOR=8k2g2g218ga40181ak1hbgg1n14; BDRCVFR[C0p6oIjvx-c]=mbxnW11j9Dfmh7GuZR8mvqV; BDSVRTM=1011; H_PS_PSSID=36550_36459_36673_36455_36453_36692_36165_36695_36697_36569_36075_36467_36316_36651")
|
||||||
|
req.Header.Add("referer", "http://news.baidu.com/")
|
||||||
|
req.Header.Add("sec-fetch-dest", "document")
|
||||||
|
req.Header.Add("sec-fetch-mode", "navigate")
|
||||||
|
req.Header.Add("sec-fetch-site", "cross-site")
|
||||||
|
req.Header.Add("sec-fetch-user", "?1")
|
||||||
|
req.Header.Add("upgrade-insecure-requests", "1")
|
||||||
|
req.Header.Add("user-agent", "Mozilla/5.0 (X11; Windows x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36")
|
||||||
|
req.Header.Add("#sec-ch-ua", "\".Not/A)Brand\";v=\"99\", \"Google Chrome\";v=\"103\", \"Chromium\";v=\"103\"")
|
||||||
|
req.Header.Add("sec-ch-ua-mobile", "?0")
|
||||||
|
req.Header.Add("sec-ch-ua-platform", "\"Windows\"")
|
||||||
|
req.Header.Add("postman-token", "81407fbc-2b96-54a7-0193-f640156714ab")
|
||||||
|
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "凤凰新闻",
|
||||||
|
SearchUrl: "https://shankapi.ifeng.com/season/getSoFengData/all/${keyword}/1/getSoFengDataCallback?callback=getSoFengDataCallback2",
|
||||||
|
Type: "ajax",
|
||||||
|
Method: "get",
|
||||||
|
AjaxDealFun: func(i *[]data.FetchData, response *http.Response) {
|
||||||
|
bytes, err := ioutil.ReadAll(response.Body)
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
r := string(bytes)
|
||||||
|
r = strings.Replace(r, "getSoFengDataCallback(", "", 1)
|
||||||
|
r = strings.TrimRight(r, ")")
|
||||||
|
var res IFENGResponse
|
||||||
|
err = json.Unmarshal([]byte(r), &res)
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, v := range res.Data.Items {
|
||||||
|
*i = append(*i, data.FetchData{
|
||||||
|
Url: "https:" + v.URL,
|
||||||
|
Title: StripTags(v.Title),
|
||||||
|
Source: v.Source,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "人民网",
|
||||||
|
SearchUrl: "http://search.people.cn/search-platform/front/search",
|
||||||
|
Type: "ajax",
|
||||||
|
Method: "post",
|
||||||
|
IsJson: true,
|
||||||
|
KeywordField: "key",
|
||||||
|
Target: reflect.TypeOf(PeopleResponse{}),
|
||||||
|
ExternParam: map[string]string{
|
||||||
|
"page": "1",
|
||||||
|
"limit": "10",
|
||||||
|
"hasTitle": "true",
|
||||||
|
"hasContent": "true",
|
||||||
|
"isFuzzy": "false",
|
||||||
|
"type": "0",
|
||||||
|
"sortType": "0",
|
||||||
|
"startTime": "0",
|
||||||
|
"endTime": "0",
|
||||||
|
},
|
||||||
|
AjaxSimpleDeal: func(i interface{}, v *[]data.FetchData) {
|
||||||
|
r := i.(PeopleResponse)
|
||||||
|
for _, record := range r.Data.Records {
|
||||||
|
tt := time.UnixMilli(int64(record.InputTime))
|
||||||
|
*v = append(*v, data.FetchData{
|
||||||
|
Url: record.Url,
|
||||||
|
Title: StripTags(record.Title),
|
||||||
|
Desc: StripTags(record.Content),
|
||||||
|
Date: tt.Format("2006-01-02 15:04:05"),
|
||||||
|
Source: record.OriginName,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
},
|
||||||
|
HeaderFun: func(req *http.Request) {
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
req.Header.Add("Cookie", "__jsluid_h=a1b7d0d8dad3604c9393bbcaf36ced1f; sso_c=0; sfr=1; __jsluid_h=fbf7d0abc29ec349c0c0c89c779c268c")
|
||||||
|
req.Header.Add("Origin", "http://search.people.cn")
|
||||||
|
req.Header.Add("Referer", "http://search.people.cn/s/?keyword=%E7%BA%AA%E6%A3%80&st=0")
|
||||||
|
req.Header.Add("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36")
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
15
newsource/people.go
Normal file
15
newsource/people.go
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
package newsource
|
||||||
|
|
||||||
|
type PeopleResponse struct {
|
||||||
|
Data struct {
|
||||||
|
Records []PeopleRecord
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type PeopleRecord struct {
|
||||||
|
Title string `json:"title"`
|
||||||
|
Url string `json:"url"`
|
||||||
|
OriginName string `json:"originName"`
|
||||||
|
Content string `json:"content"`
|
||||||
|
InputTime int `json:"inputTime"`
|
||||||
|
}
|
19
newsource/zaobao.go
Normal file
19
newsource/zaobao.go
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
package newsource
|
||||||
|
|
||||||
|
type ZaoBaoResponse struct {
|
||||||
|
Result struct {
|
||||||
|
Data []ZaoBaoItem `json:"data"`
|
||||||
|
} `json:"result"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ZaoBaoItem struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
Digest interface{} `json:"digest"`
|
||||||
|
ContentPreview string `json:"contentPreview"`
|
||||||
|
MainNode string `json:"mainNode"`
|
||||||
|
PublicationDate int64 `json:"publicationDate"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
Pictures interface{} `json:"pictures"`
|
||||||
|
IsSticky int `json:"isSticky"`
|
||||||
|
}
|
|
@ -31,6 +31,10 @@
|
||||||
|
|
||||||
</template>
|
</template>
|
||||||
</el-table-column>
|
</el-table-column>
|
||||||
|
<el-table-column
|
||||||
|
prop="source"
|
||||||
|
label="来源">
|
||||||
|
</el-table-column>
|
||||||
<el-table-column
|
<el-table-column
|
||||||
prop="desc"
|
prop="desc"
|
||||||
label="描述"
|
label="描述"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user