package proxy import ( "context" "sync" "time" "gitea.timerzz.com/kedaya_haitao/proxy-detector/pkg/getter" healthcheck "gitea.timerzz.com/kedaya_haitao/proxy-detector/pkg/health-check" "gitea.timerzz.com/kedaya_haitao/proxy-detector/pkg/proxy/structs" "gitea.timerzz.com/kedaya_haitao/proxy-detector/pkg/worker" log "github.com/sirupsen/logrus" ) func CrawlProxies(ctx context.Context, getters []getter.Getter) { var proxies structs.Proxies log.Infof("共%d个抓取源", len(getters)) var wg sync.WaitGroup for _, gtr := range getters { wg.Add(1) err := worker.Pool.Submit(func() { defer wg.Done() if ps := gtr.Get(); len(ps) > 0 { proxies.Add(ps) } }) if err != nil { log.Errorln("添加并发任务失败: ", err) } } wg.Wait() log.Infof("Crawled %d proxies", proxies.Len()) proxyList := healthcheck.CleanBadProxies(ctx, proxies.Get()) log.Infof("Health checked %d proxies", proxyList) structs.ProxiesList.Add(proxyList) return } func CronCrawl(ctx context.Context, getters []getter.Getter, interval uint64) { ticker := time.NewTicker(time.Duration(interval) * time.Minute) defer ticker.Stop() for { select { case <-ctx.Done(): return case <-ticker.C: CrawlProxies(ctx, getters) } } }