快完工
This commit is contained in:
parent
a969e9cf9e
commit
ab97f5c624
131
main.go
131
main.go
@ -2,6 +2,7 @@ package main
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
@ -17,7 +18,9 @@ import (
|
||||
"net/url"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@ -141,10 +144,12 @@ func (f *fetchHandler) handle(conn string) {
|
||||
}
|
||||
for _, source := range newsource.GetSource() {
|
||||
r := f.fetch2(source, key)
|
||||
if strings.ToUpper(source.Type) == "HTML" {
|
||||
f.parsesDom(r, conn, source)
|
||||
} else {
|
||||
f.parseAjax(r, source, conn)
|
||||
if r != nil {
|
||||
if strings.ToUpper(source.Type) == "HTML" {
|
||||
f.parsesDom(r, conn, source)
|
||||
} else {
|
||||
f.parseAjax(r, source, conn)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -164,6 +169,11 @@ func (f *fetchHandler) receiveMsg() {
|
||||
}
|
||||
|
||||
func (f *fetchHandler) fetch2(source newsource.Source, key string) *http.Response {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
log.Printf("err:%s. stack:%s", r, debug.Stack())
|
||||
}
|
||||
}()
|
||||
jar, _ := cookiejar.New(nil)
|
||||
client := http.Client{
|
||||
Transport: nil,
|
||||
@ -173,22 +183,40 @@ func (f *fetchHandler) fetch2(source newsource.Source, key string) *http.Respons
|
||||
}
|
||||
searchUrl := source.SearchUrl
|
||||
source.Method = strings.ToUpper(source.Method)
|
||||
if source.Method == "GET" {
|
||||
if source.Method == "GET" && source.KeywordField != "" {
|
||||
if !strings.Contains(searchUrl, "?") {
|
||||
searchUrl += "?" + source.KeywordField + "=" + url.QueryEscape(key)
|
||||
} else {
|
||||
searchUrl += "&" + source.KeywordField + "=" + url.QueryEscape(key)
|
||||
}
|
||||
} else if source.Method == "GET" && source.KeywordField == "" {
|
||||
if strings.Contains(searchUrl, "${keyword}") {
|
||||
searchUrl = strings.Replace(searchUrl, "${keyword}", url.QueryEscape(key), -1)
|
||||
} else {
|
||||
searchUrl += url.QueryEscape(key)
|
||||
}
|
||||
}
|
||||
var req *http.Request
|
||||
if source.Method == "POST" {
|
||||
body := source.KeywordField + "=" + key
|
||||
body := ""
|
||||
if nil != source.ExternParam {
|
||||
body += "&"
|
||||
for s, s2 := range source.ExternParam {
|
||||
body += s + "=" + s2 + "&"
|
||||
if source.IsJson {
|
||||
t := source.ExternParam
|
||||
t[source.KeywordField] = key
|
||||
bytes, err := json.Marshal(t)
|
||||
if err != nil {
|
||||
log.Printf("build post json param err:[%s]", err)
|
||||
return nil
|
||||
}
|
||||
body = string(bytes)
|
||||
} else {
|
||||
body = source.KeywordField + "=" + key
|
||||
body += "&"
|
||||
for s, s2 := range source.ExternParam {
|
||||
body += s + "=" + s2 + "&"
|
||||
}
|
||||
body = strings.TrimRight(body, "&")
|
||||
}
|
||||
body = strings.TrimRight(body, "&")
|
||||
}
|
||||
req, _ = http.NewRequest(source.Method, searchUrl, strings.NewReader(body))
|
||||
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
@ -220,46 +248,8 @@ func (f *fetchHandler) fetch2(source newsource.Source, key string) *http.Respons
|
||||
response, err := client.Do(req)
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return response
|
||||
}
|
||||
|
||||
func (f *fetchHandler) fetch(url string) *http.Response {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
log.Println(r)
|
||||
}
|
||||
}()
|
||||
client := http.Client{
|
||||
Transport: nil,
|
||||
CheckRedirect: nil,
|
||||
Jar: nil,
|
||||
Timeout: 10 * time.Second,
|
||||
}
|
||||
req, _ := http.NewRequest("GET", url, nil)
|
||||
|
||||
req.Header.Add("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
|
||||
req.Header.Add("accept-language", "zh-CN,zh;q=0.9")
|
||||
req.Header.Add("cache-control", "no-cache")
|
||||
req.Header.Add("connection", "keep-alive")
|
||||
req.Header.Add("cookie", "BIDUPSID=844E3DCAA2EEBF5C872DC99B967B6B7B; PSTM=1655872163; BAIDUID=844E3DCAA2EEBF5CB3E1D79750162204:FG=1; BD_UPN=123353; ORIGIN=2; ISSW=1; ISSW=1; BAIDUID_BFESS=844E3DCAA2EEBF5CB3E1D79750162204:FG=1; ZFY=jWFAySgO:AoQfb6emY9vnmEdptVao:Anj0FFkp028wFws:C; BD_HOME=1; delPer=0; BD_CK_SAM=1; PSINO=3; COOKIE_SESSION=42_0_2_2_3_0_1_0_2_0_0_0_18_0_51_0_1655888428_0_1655888377%7C3%230_0_1655888377%7C1; BAIDU_WISE_UID=wapp_1655902298617_702; ZD_ENTRY=google; channel=baidusearch; baikeVisitId=b3b23509-9330-4d33-82ae-b8eb37895917; BA_HECTOR=8k2g2g218ga40181ak1hbgg1n14; BDRCVFR[C0p6oIjvx-c]=mbxnW11j9Dfmh7GuZR8mvqV; BDSVRTM=1011; H_PS_PSSID=36550_36459_36673_36455_36453_36692_36165_36695_36697_36569_36075_36467_36316_36651")
|
||||
req.Header.Add("referer", "http://news.baidu.com/")
|
||||
req.Header.Add("sec-fetch-dest", "document")
|
||||
req.Header.Add("sec-fetch-mode", "navigate")
|
||||
req.Header.Add("sec-fetch-site", "cross-site")
|
||||
req.Header.Add("sec-fetch-user", "?1")
|
||||
req.Header.Add("upgrade-insecure-requests", "1")
|
||||
req.Header.Add("user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36")
|
||||
req.Header.Add("#sec-ch-ua", "\".Not/A)Brand\";v=\"99\", \"Google Chrome\";v=\"103\", \"Chromium\";v=\"103\"")
|
||||
req.Header.Add("sec-ch-ua-mobile", "?0")
|
||||
req.Header.Add("sec-ch-ua-platform", "\"Linux\"")
|
||||
req.Header.Add("postman-token", "81407fbc-2b96-54a7-0193-f640156714ab")
|
||||
|
||||
response, err := client.Do(req)
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
log.Printf("request %s err: %s", req.URL, err)
|
||||
return nil
|
||||
}
|
||||
return response
|
||||
}
|
||||
@ -267,15 +257,41 @@ func (f *fetchHandler) fetch(url string) *http.Response {
|
||||
func (f *fetchHandler) parseAjax(response *http.Response, source newsource.Source, conn string) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
log.Println(r)
|
||||
log.Printf("parse ajax response err[%s]. stack:[%s]", r, debug.Stack())
|
||||
}
|
||||
}()
|
||||
nowDate := time.Now().Format("2006-01-02 15:04:06")
|
||||
var newFetch []data.FetchData
|
||||
source.AjaxDealFun(&newFetch, response)
|
||||
if len(newFetch) > 0 {
|
||||
|
||||
for i, fetchData := range newFetch {
|
||||
if source.AjaxSimpleDeal != nil && source.AjaxDealFun == nil {
|
||||
bytes, err := ioutil.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
log.Printf("read response body err:[%s]", err)
|
||||
return
|
||||
}
|
||||
if source.AjaxSimpleDeal != nil && source.Target != nil {
|
||||
dst := reflect.New(source.Target).Elem()
|
||||
err = json.Unmarshal(bytes, dst.Addr().Interface())
|
||||
if err != nil {
|
||||
log.Printf("jsondecode err:[%s]", err)
|
||||
return
|
||||
}
|
||||
source.AjaxSimpleDeal(dst.Interface(), &newFetch)
|
||||
}
|
||||
} else if source.AjaxDealFun != nil && source.AjaxSimpleDeal == nil {
|
||||
source.AjaxDealFun(&newFetch, response)
|
||||
}
|
||||
|
||||
if len(newFetch) > 0 {
|
||||
for i := 0; i < len(newFetch); i++ {
|
||||
fetchData := newFetch[i]
|
||||
k := conn + "_" + fetchData.Url + "_" + fetchData.Title
|
||||
if newFetch[i].CreatedTime == "" {
|
||||
newFetch[i].CreatedTime = nowDate
|
||||
}
|
||||
if newFetch[i].Source == "" {
|
||||
newFetch[i].Source = source.Name
|
||||
}
|
||||
if _, ok := (*f.hadFetchedMap.mapX)[k]; !ok {
|
||||
f.hadFetchData = append(f.hadFetchData, fetchData)
|
||||
setMap(&f.hadFetchedMap, k, 1)
|
||||
@ -297,7 +313,7 @@ func (f *fetchHandler) parseAjax(response *http.Response, source newsource.Sourc
|
||||
func (f *fetchHandler) parsesDom(html *http.Response, conn string, source newsource.Source) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
log.Println(r)
|
||||
log.Printf("parse html err:[%s]. stack:[%s]", r, debug.Stack())
|
||||
}
|
||||
}()
|
||||
doc, err := goquery.NewDocumentFromReader(html.Body)
|
||||
@ -309,6 +325,7 @@ func (f *fetchHandler) parsesDom(html *http.Response, conn string, source newsou
|
||||
doc.Find(source.ListQuery).Each(func(i int, selection *goquery.Selection) {
|
||||
fetchData := data.FetchData{
|
||||
CreatedTime: nowDate,
|
||||
Source: source.Name,
|
||||
}
|
||||
source.QueryHandler(i, selection, &fetchData)
|
||||
k := conn + "_" + fetchData.Url + "_" + fetchData.Title
|
||||
@ -341,7 +358,7 @@ func (f *fetchHandler) sendFetchData() {
|
||||
Data: dataFetch.item,
|
||||
})
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
log.Printf("send new fetch data err:[%s]", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -434,7 +451,7 @@ func main() {
|
||||
cc <- 1
|
||||
return
|
||||
}
|
||||
log.Println(err)
|
||||
log.Printf("websocket read client msg err:[%s]", err)
|
||||
} else {
|
||||
h.rMsgChan <- msg
|
||||
}
|
||||
|
43
newsource/bjnews.go
Normal file
43
newsource/bjnews.go
Normal file
@ -0,0 +1,43 @@
|
||||
package newsource
|
||||
|
||||
type BjNewsResponse struct {
|
||||
Data struct {
|
||||
Data []BjNewsItem `json:"data"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type BjNewsDetailURL struct {
|
||||
MURL string `json:"m_url"`
|
||||
PcURL string `json:"pc_url"`
|
||||
}
|
||||
|
||||
type BjNewsItem struct {
|
||||
Source BjNewsSource `json:"_source"`
|
||||
Highlight BjNewsHighlight `json:"highlight"`
|
||||
}
|
||||
|
||||
type BjNewsSource struct {
|
||||
SpecialType string `json:"special_type"`
|
||||
PublishTime string `json:"publish_time"`
|
||||
Title string `json:"title"`
|
||||
Type int `json:"type"`
|
||||
Keyword []interface{} `json:"keyword"`
|
||||
UUID string `json:"uuid"`
|
||||
CoverList string `json:"cover_list"`
|
||||
HasVideo int `json:"has_video"`
|
||||
ImgList []string `json:"img_list"`
|
||||
Cover string `json:"cover"`
|
||||
BigCover string `json:"big_cover"`
|
||||
CommentNum int `json:"comment_num"`
|
||||
ZanNum int `json:"zan_num"`
|
||||
ShareNum string `json:"share_num"`
|
||||
LiveStatus string `json:"live_status"`
|
||||
XjhID string `json:"xjh_id"`
|
||||
DetailURL BjNewsDetailURL `json:"detail_url"`
|
||||
}
|
||||
|
||||
type BjNewsHighlight struct {
|
||||
Title string `json:"title"`
|
||||
Desc string `json:"desc"`
|
||||
Content string `json:"content"`
|
||||
}
|
@ -1,10 +1,5 @@
|
||||
package newsource
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type CCDIPAGERInfo struct {
|
||||
BM string `json:"BM"`
|
||||
BC string `json:"BC"`
|
||||
@ -27,9 +22,3 @@ type CCDIPAGERData struct {
|
||||
Info []CCDIPAGERInfo `json:"info"`
|
||||
NextPage int `json:"nextPage"`
|
||||
}
|
||||
|
||||
func stripTags(content string) string {
|
||||
content = strings.Replace(content, " ", "", -1)
|
||||
re := regexp.MustCompile(`<(.|\n)*?>`)
|
||||
return re.ReplaceAllString(content, "")
|
||||
}
|
||||
|
14
newsource/ifeng.go
Normal file
14
newsource/ifeng.go
Normal file
@ -0,0 +1,14 @@
|
||||
package newsource
|
||||
|
||||
type IFENGResponse struct {
|
||||
Data struct {
|
||||
Items []IFENGItem `json:"items"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type IFENGItem struct {
|
||||
ID string `json:"id"`
|
||||
Source string `json:"source"`
|
||||
Title string `json:"title"`
|
||||
URL string `json:"url"`
|
||||
}
|
@ -8,6 +8,7 @@ import (
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net/http"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
@ -16,18 +17,27 @@ import (
|
||||
)
|
||||
|
||||
type Source struct {
|
||||
Name string
|
||||
SearchUrl string
|
||||
Method string
|
||||
Type string
|
||||
KeywordField string
|
||||
ListQuery string
|
||||
QueryHandler func(i int, selection *goquery.Selection, fetchData *data.FetchData)
|
||||
AjaxHandler func(fetchData *data.FetchData)
|
||||
Header map[string]string
|
||||
HeaderFun func(r *http.Request)
|
||||
ExternParam map[string]string
|
||||
AjaxDealFun func(*[]data.FetchData, *http.Response)
|
||||
Name string
|
||||
SearchUrl string
|
||||
Method string
|
||||
Type string
|
||||
KeywordField string
|
||||
ListQuery string
|
||||
QueryHandler func(i int, selection *goquery.Selection, fetchData *data.FetchData)
|
||||
AjaxHandler func(fetchData *data.FetchData)
|
||||
Header map[string]string
|
||||
HeaderFun func(r *http.Request)
|
||||
ExternParam map[string]string
|
||||
AjaxDealFun func(*[]data.FetchData, *http.Response)
|
||||
Target reflect.Type
|
||||
UnmarshalerFun func([]byte) interface{}
|
||||
AjaxSimpleDeal func(interface{}, *[]data.FetchData)
|
||||
IsJson bool
|
||||
}
|
||||
|
||||
func StripTags(content string) string {
|
||||
re := regexp.MustCompile(`<(.|\n)*?>`)
|
||||
return re.ReplaceAllString(content, "")
|
||||
}
|
||||
|
||||
func GetSource() []Source {
|
||||
@ -36,7 +46,7 @@ func GetSource() []Source {
|
||||
nowDate := ti.Format("2006-01-02 15:04:05")
|
||||
return []Source{
|
||||
{
|
||||
Name: "中央纪委监察部",
|
||||
Name: "中央纪委国家监委网站",
|
||||
SearchUrl: "https://www.ccdi.gov.cn/was5/web/search",
|
||||
Method: "post",
|
||||
ListQuery: ".center_box0 li",
|
||||
@ -80,6 +90,7 @@ func GetSource() []Source {
|
||||
t := selection.Find(".news-title-font_1xS-F").First()
|
||||
data.Title = t.Text()
|
||||
data.Desc = selection.Find(".c-row .c-color-text").First().Text()
|
||||
data.Source = selection.Find("span[aria-label*=\"新闻来源\"]").First().Text()
|
||||
data.Date = selection.Find("span[class=\"c-color-gray2 c-font-normal c-gap-right-xsmall\"]").First().Text()
|
||||
n := compile.FindAllStringSubmatch(data.Date, -1)
|
||||
if nil != n {
|
||||
@ -151,7 +162,7 @@ func GetSource() []Source {
|
||||
if "" == v.YT {
|
||||
continue
|
||||
}
|
||||
desc := stripTags(v.IRCONTENT)
|
||||
desc := StripTags(v.IRCONTENT)
|
||||
l := utf8.RuneCountInString(desc)
|
||||
if l > 30 {
|
||||
l = 30
|
||||
@ -159,7 +170,7 @@ func GetSource() []Source {
|
||||
desc = string([]rune(desc)[:30])
|
||||
d := data.FetchData{
|
||||
Url: fmt.Sprintf("https://jjjcb.ccdi.gov.cn/epaper/index.html?guid=%s", v.ZBGUID),
|
||||
Title: v.YT,
|
||||
Title: v.DOCTITLE,
|
||||
Desc: desc,
|
||||
Date: v.DOCPUBTIME,
|
||||
CreatedTime: nowDate,
|
||||
@ -169,5 +180,230 @@ func GetSource() []Source {
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "中新网搜索",
|
||||
SearchUrl: "https://sou.chinanews.com.cn/search.do",
|
||||
KeywordField: "q",
|
||||
Method: "get",
|
||||
Type: "html",
|
||||
ListQuery: "#news_list table",
|
||||
QueryHandler: func(i int, selection *goquery.Selection, fetchData *data.FetchData) {
|
||||
t := selection.Find(".news_item a").First()
|
||||
fetchData.Title = t.Text()
|
||||
fetchData.Url, _ = t.Attr("href")
|
||||
fetchData.Desc = selection.Find(".news_content").First().Text()
|
||||
tt := selection.Find(".news_other").First().Text()
|
||||
fet := strings.Split(tt, "html")
|
||||
fetchData.Date = fet[len(fet)-1]
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "新浪新闻搜索",
|
||||
SearchUrl: "https://search.sina.com.cn/?range=title&c=news&time=&ie=utf-8",
|
||||
Method: "get",
|
||||
Type: "html",
|
||||
KeywordField: "q",
|
||||
ListQuery: "#wrap .box-result",
|
||||
Header: map[string]string{
|
||||
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36",
|
||||
"authority": "search.sina.com.cn",
|
||||
"referer": "https://news.sina.com.cn/rol",
|
||||
},
|
||||
QueryHandler: func(i int, selection *goquery.Selection, fetchData *data.FetchData) {
|
||||
t := selection.Find("h2>a").First()
|
||||
fetchData.Title = t.Text()
|
||||
fetchData.Url, _ = t.Attr("href")
|
||||
fetchData.Desc = selection.Find(".r-info .content").Text()
|
||||
s := selection.Find("h2 >.fgray_time").First().Text()
|
||||
ll := strings.Fields(s)
|
||||
if len(ll) > 2 {
|
||||
fetchData.Date = ll[1] + " " + ll[2]
|
||||
fetchData.Source = ll[0]
|
||||
} else {
|
||||
fetchData.Date = ll[1]
|
||||
fetchData.Source = ll[0]
|
||||
}
|
||||
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "联合早报",
|
||||
SearchUrl: "https://www.zaobao.com/search?pageNo=1&pageSize=10",
|
||||
Method: "get",
|
||||
Type: "ajax",
|
||||
Header: map[string]string{
|
||||
"authority": "www.zaobao.com",
|
||||
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36",
|
||||
},
|
||||
KeywordField: "keywords",
|
||||
AjaxDealFun: func(i *[]data.FetchData, response *http.Response) {
|
||||
bytes, err := ioutil.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return
|
||||
}
|
||||
var r ZaoBaoResponse
|
||||
err = json.Unmarshal(bytes, &r)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return
|
||||
}
|
||||
for _, datum := range r.Result.Data {
|
||||
t := time.UnixMilli(datum.PublicationDate)
|
||||
v := data.FetchData{
|
||||
Url: "https://www.zaobao.com" + datum.URL,
|
||||
Title: datum.Title,
|
||||
Desc: datum.ContentPreview,
|
||||
Date: t.Format("2006-01-02 15:04:05"),
|
||||
}
|
||||
*i = append(*i, v)
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "新京报",
|
||||
SearchUrl: "https://s.bjnews.com.cn/bjnews/getlist?from=bw&page=1&orderby=0",
|
||||
KeywordField: "bwsk",
|
||||
Type: "ajax",
|
||||
Method: "get",
|
||||
Header: map[string]string{
|
||||
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36",
|
||||
},
|
||||
Target: reflect.TypeOf(BjNewsResponse{}),
|
||||
AjaxSimpleDeal: func(rr interface{}, i *[]data.FetchData) {
|
||||
r := rr.(BjNewsResponse)
|
||||
for _, v := range r.Data.Data {
|
||||
item := data.FetchData{
|
||||
Url: v.Source.DetailURL.PcURL,
|
||||
Title: v.Source.Title,
|
||||
Desc: v.Highlight.Desc,
|
||||
Date: v.Source.PublishTime,
|
||||
}
|
||||
*i = append(*i, item)
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "环球网",
|
||||
SearchUrl: "https://www.baidu.com/s?wd=site:huanqiu.com%20",
|
||||
Method: "get",
|
||||
ListQuery: "div[class=\"result c-container xpath-log new-pmd\"]",
|
||||
QueryHandler: func(i int, selection *goquery.Selection, data *data.FetchData) {
|
||||
data.Url, _ = selection.Attr("mu")
|
||||
t := selection.Find("h3[class='c-title t t tts-title'] a").First()
|
||||
data.Title = t.Text()
|
||||
data.Desc = selection.Find(".content-right_8Zs40").First().Text()
|
||||
data.Source = selection.Find(".source_1Vdff .c-color-gray").First().Text()
|
||||
data.Date = selection.Find("span[class=\"c-color-gray2\"]").First().Text()
|
||||
n := compile.FindAllStringSubmatch(data.Date, -1)
|
||||
if nil != n {
|
||||
nn, _ := strconv.Atoi(n[0][0])
|
||||
if strings.Contains(data.Date, "小时") {
|
||||
data.Date = ti.Add(-time.Duration(nn) * time.Hour).Format("2006-01-02 15:04")
|
||||
}
|
||||
if strings.Contains(data.Date, "分钟") {
|
||||
data.Date = ti.Add(-time.Duration(nn) * time.Minute).Format("2006-01-02 15:04")
|
||||
}
|
||||
if strings.Contains(data.Date, "天") {
|
||||
data.Date = ti.Add(-time.Duration(nn) * time.Hour * 24).Format("2006-01-02 15:04")
|
||||
}
|
||||
}
|
||||
if strings.Contains(data.Date, "昨天") {
|
||||
data.Date = ti.Add(-time.Duration(24) * time.Hour).Format("2006-01-02 15:04")
|
||||
}
|
||||
if strings.Contains(data.Date, "前天") {
|
||||
data.Date = ti.Add(-time.Duration(48) * time.Hour).Format("2006-01-02 15:04")
|
||||
}
|
||||
},
|
||||
Type: "html",
|
||||
HeaderFun: func(req *http.Request) {
|
||||
req.Header.Add("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
|
||||
req.Header.Add("accept-language", "zh-CN,zh;q=0.9")
|
||||
req.Header.Add("cache-control", "no-cache")
|
||||
req.Header.Add("connection", "keep-alive")
|
||||
req.Header.Add("cookie", "BIDUPSID=844E3DCAA2EEBF5C872DC99B967B6B7B; PSTM=1655872163; BAIDUID=844E3DCAA2EEBF5CB3E1D79750162204:FG=1; BD_UPN=123353; ORIGIN=2; ISSW=1; ISSW=1; BAIDUID_BFESS=844E3DCAA2EEBF5CB3E1D79750162204:FG=1; ZFY=jWFAySgO:AoQfb6emY9vnmEdptVao:Anj0FFkp028wFws:C; BD_HOME=1; delPer=0; BD_CK_SAM=1; PSINO=3; COOKIE_SESSION=42_0_2_2_3_0_1_0_2_0_0_0_18_0_51_0_1655888428_0_1655888377%7C3%230_0_1655888377%7C1; BAIDU_WISE_UID=wapp_1655902298617_702; ZD_ENTRY=google; channel=baidusearch; baikeVisitId=b3b23509-9330-4d33-82ae-b8eb37895917; BA_HECTOR=8k2g2g218ga40181ak1hbgg1n14; BDRCVFR[C0p6oIjvx-c]=mbxnW11j9Dfmh7GuZR8mvqV; BDSVRTM=1011; H_PS_PSSID=36550_36459_36673_36455_36453_36692_36165_36695_36697_36569_36075_36467_36316_36651")
|
||||
req.Header.Add("referer", "http://news.baidu.com/")
|
||||
req.Header.Add("sec-fetch-dest", "document")
|
||||
req.Header.Add("sec-fetch-mode", "navigate")
|
||||
req.Header.Add("sec-fetch-site", "cross-site")
|
||||
req.Header.Add("sec-fetch-user", "?1")
|
||||
req.Header.Add("upgrade-insecure-requests", "1")
|
||||
req.Header.Add("user-agent", "Mozilla/5.0 (X11; Windows x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36")
|
||||
req.Header.Add("#sec-ch-ua", "\".Not/A)Brand\";v=\"99\", \"Google Chrome\";v=\"103\", \"Chromium\";v=\"103\"")
|
||||
req.Header.Add("sec-ch-ua-mobile", "?0")
|
||||
req.Header.Add("sec-ch-ua-platform", "\"Windows\"")
|
||||
req.Header.Add("postman-token", "81407fbc-2b96-54a7-0193-f640156714ab")
|
||||
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "凤凰新闻",
|
||||
SearchUrl: "https://shankapi.ifeng.com/season/getSoFengData/all/${keyword}/1/getSoFengDataCallback?callback=getSoFengDataCallback2",
|
||||
Type: "ajax",
|
||||
Method: "get",
|
||||
AjaxDealFun: func(i *[]data.FetchData, response *http.Response) {
|
||||
bytes, err := ioutil.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return
|
||||
}
|
||||
r := string(bytes)
|
||||
r = strings.Replace(r, "getSoFengDataCallback(", "", 1)
|
||||
r = strings.TrimRight(r, ")")
|
||||
var res IFENGResponse
|
||||
err = json.Unmarshal([]byte(r), &res)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return
|
||||
}
|
||||
for _, v := range res.Data.Items {
|
||||
*i = append(*i, data.FetchData{
|
||||
Url: "https:" + v.URL,
|
||||
Title: StripTags(v.Title),
|
||||
Source: v.Source,
|
||||
})
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "人民网",
|
||||
SearchUrl: "http://search.people.cn/search-platform/front/search",
|
||||
Type: "ajax",
|
||||
Method: "post",
|
||||
IsJson: true,
|
||||
KeywordField: "key",
|
||||
Target: reflect.TypeOf(PeopleResponse{}),
|
||||
ExternParam: map[string]string{
|
||||
"page": "1",
|
||||
"limit": "10",
|
||||
"hasTitle": "true",
|
||||
"hasContent": "true",
|
||||
"isFuzzy": "false",
|
||||
"type": "0",
|
||||
"sortType": "0",
|
||||
"startTime": "0",
|
||||
"endTime": "0",
|
||||
},
|
||||
AjaxSimpleDeal: func(i interface{}, v *[]data.FetchData) {
|
||||
r := i.(PeopleResponse)
|
||||
for _, record := range r.Data.Records {
|
||||
tt := time.UnixMilli(int64(record.InputTime))
|
||||
*v = append(*v, data.FetchData{
|
||||
Url: record.Url,
|
||||
Title: StripTags(record.Title),
|
||||
Desc: StripTags(record.Content),
|
||||
Date: tt.Format("2006-01-02 15:04:05"),
|
||||
Source: record.OriginName,
|
||||
})
|
||||
}
|
||||
},
|
||||
HeaderFun: func(req *http.Request) {
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Add("Cookie", "__jsluid_h=a1b7d0d8dad3604c9393bbcaf36ced1f; sso_c=0; sfr=1; __jsluid_h=fbf7d0abc29ec349c0c0c89c779c268c")
|
||||
req.Header.Add("Origin", "http://search.people.cn")
|
||||
req.Header.Add("Referer", "http://search.people.cn/s/?keyword=%E7%BA%AA%E6%A3%80&st=0")
|
||||
req.Header.Add("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36")
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
15
newsource/people.go
Normal file
15
newsource/people.go
Normal file
@ -0,0 +1,15 @@
|
||||
package newsource
|
||||
|
||||
type PeopleResponse struct {
|
||||
Data struct {
|
||||
Records []PeopleRecord
|
||||
}
|
||||
}
|
||||
|
||||
type PeopleRecord struct {
|
||||
Title string `json:"title"`
|
||||
Url string `json:"url"`
|
||||
OriginName string `json:"originName"`
|
||||
Content string `json:"content"`
|
||||
InputTime int `json:"inputTime"`
|
||||
}
|
19
newsource/zaobao.go
Normal file
19
newsource/zaobao.go
Normal file
@ -0,0 +1,19 @@
|
||||
package newsource
|
||||
|
||||
type ZaoBaoResponse struct {
|
||||
Result struct {
|
||||
Data []ZaoBaoItem `json:"data"`
|
||||
} `json:"result"`
|
||||
}
|
||||
|
||||
type ZaoBaoItem struct {
|
||||
ID string `json:"id"`
|
||||
Title string `json:"title"`
|
||||
Digest interface{} `json:"digest"`
|
||||
ContentPreview string `json:"contentPreview"`
|
||||
MainNode string `json:"mainNode"`
|
||||
PublicationDate int64 `json:"publicationDate"`
|
||||
URL string `json:"url"`
|
||||
Pictures interface{} `json:"pictures"`
|
||||
IsSticky int `json:"isSticky"`
|
||||
}
|
@ -31,6 +31,10 @@
|
||||
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column
|
||||
prop="source"
|
||||
label="来源">
|
||||
</el-table-column>
|
||||
<el-table-column
|
||||
prop="desc"
|
||||
label="描述"
|
||||
|
Loading…
Reference in New Issue
Block a user