1
This commit is contained in:
73
platformapi/jdapi/go_query_test.go
Normal file
73
platformapi/jdapi/go_query_test.go
Normal file
@@ -0,0 +1,73 @@
|
||||
package jdapi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"git.rosy.net.cn/baseapi/utils"
|
||||
"git.rosy.net.cn/jx-callback/globals"
|
||||
"golang.org/x/net/html"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestGoQuery(t *testing.T) {
|
||||
urls := []string{"https://www.jxc4.com/?info=eyJjb2RlIjoiLTEiLCJkZXNjIjoi562+5ZCN5pe26Ze05oiz5Y+C5pWw6LaF5pe2IGxldmVsOjAsIGNvZGU6ODUzMDAyIiwiZGF0YSI6IiJ9#/ordermanager"} // 要爬取的网页链接
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for _, url := range urls {
|
||||
wg.Add(1)
|
||||
go func(u string) {
|
||||
defer wg.Done()
|
||||
data, err := fetchPage(u)
|
||||
if err != nil {
|
||||
fmt.Println("Error fetching page:", err)
|
||||
return
|
||||
}
|
||||
// 解析页面数据
|
||||
links := parseLinks(data)
|
||||
fmt.Println("Links on", u, ":", links)
|
||||
}(url)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func fetchPage(url string) (string, error) {
|
||||
res := &http.Response{Header: http.Header{}}
|
||||
res.Header.Add("token", "TOKEN.V2.2452A93EEB9111EC9B06525400E86DC0.20240527-150008.localpass.C0F204211BF611EF8C78525400E86DC0.[18981810340]")
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// 读取页面内容
|
||||
data, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(data), nil
|
||||
}
|
||||
|
||||
func parseLinks(data string) []string {
|
||||
// 使用golang.org/x/net/html包解析HTML页面,提取链接
|
||||
links := make([]string, 0)
|
||||
tokenizer := html.NewTokenizer(strings.NewReader(data))
|
||||
for {
|
||||
tokenType := tokenizer.Next()
|
||||
globals.SugarLogger.Debugf("=======tokenType := %s", utils.Format4Output(tokenType, false))
|
||||
if tokenType == html.ErrorToken {
|
||||
break
|
||||
}
|
||||
token := tokenizer.Token()
|
||||
if tokenType == html.StartTagToken && token.Data == "a" {
|
||||
for _, attr := range token.Attr {
|
||||
if attr.Key == "href" {
|
||||
links = append(links, attr.Val)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return links
|
||||
}
|
||||
Reference in New Issue
Block a user