generated from kedaya_haitao/template
51 lines
1.2 KiB
Go
51 lines
1.2 KiB
Go
package proxy
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
"time"
|
|
|
|
"gitea.timerzz.com/timerzz/proxy-detector/pkg/getter"
|
|
healthcheck "gitea.timerzz.com/timerzz/proxy-detector/pkg/health-check"
|
|
"gitea.timerzz.com/timerzz/proxy-detector/pkg/proxy/structs"
|
|
"gitea.timerzz.com/timerzz/proxy-detector/pkg/worker"
|
|
log "github.com/sirupsen/logrus"
|
|
)
|
|
|
|
func CrawlProxies(ctx context.Context, getters []getter.Getter) {
|
|
var proxies = structs.NewProxies(make([]structs.Proxy, 0))
|
|
log.Infof("共%d个抓取源", len(getters))
|
|
var wg sync.WaitGroup
|
|
for _, gtr := range getters {
|
|
wg.Add(1)
|
|
err := worker.Pool.Submit(func() {
|
|
defer wg.Done()
|
|
if ps := gtr.Get(); len(ps) > 0 {
|
|
proxies.Add(ps)
|
|
}
|
|
})
|
|
if err != nil {
|
|
log.Errorln("添加并发任务失败: ", err)
|
|
}
|
|
}
|
|
wg.Wait()
|
|
log.Infof("Crawled %d proxies", proxies.Len())
|
|
proxyList := healthcheck.CleanBadProxies(ctx, proxies.Get())
|
|
log.Infof("Health checked %d proxies", len(proxyList))
|
|
structs.ProxiesList.Add(proxyList)
|
|
return
|
|
}
|
|
|
|
func CronCrawl(ctx context.Context, getters []getter.Getter, interval uint64) {
|
|
ticker := time.NewTicker(time.Duration(interval) * time.Minute)
|
|
defer ticker.Stop()
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-ticker.C:
|
|
CrawlProxies(ctx, getters)
|
|
}
|
|
}
|
|
}
|