package jdapi import ( "fmt" "gitrosy.jxc4.com/baseapi/utils" "gitrosy.jxc4.com/jx-callback/globals" "golang.org/x/net/html" "io/ioutil" "net/http" "strings" "sync" "testing" ) func TestGoQuery(t *testing.T) { urls := []string{"https://www.jxc4.com/?info=eyJjb2RlIjoiLTEiLCJkZXNjIjoi562+5ZCN5pe26Ze05oiz5Y+C5pWw6LaF5pe2IGxldmVsOjAsIGNvZGU6ODUzMDAyIiwiZGF0YSI6IiJ9#/ordermanager"} // 要爬取的网页链接 var wg sync.WaitGroup for _, url := range urls { wg.Add(1) go func(u string) { defer wg.Done() data, err := fetchPage(u) if err != nil { fmt.Println("Error fetching page:", err) return } // 解析页面数据 links := parseLinks(data) fmt.Println("Links on", u, ":", links) }(url) } wg.Wait() } func fetchPage(url string) (string, error) { res := &http.Response{Header: http.Header{}} res.Header.Add("token", "TOKEN.V2.2452A93EEB9111EC9B06525400E86DC0.20240527-150008.localpass.C0F204211BF611EF8C78525400E86DC0.[18981810340]") resp, err := http.Get(url) if err != nil { return "", err } defer resp.Body.Close() // 读取页面内容 data, err := ioutil.ReadAll(resp.Body) if err != nil { return "", err } return string(data), nil } func parseLinks(data string) []string { // 使用golang.org/x/net/html包解析HTML页面,提取链接 links := make([]string, 0) tokenizer := html.NewTokenizer(strings.NewReader(data)) for { tokenType := tokenizer.Next() globals.SugarLogger.Debugf("=======tokenType := %s", utils.Format4Output(tokenType, false)) if tokenType == html.ErrorToken { break } token := tokenizer.Token() if tokenType == html.StartTagToken && token.Data == "a" { for _, attr := range token.Attr { if attr.Key == "href" { links = append(links, attr.Val) } } } } return links }