proxy-detector/pkg/proxy/crawl.go

40 lines
1018 B
Go
Raw Normal View History

package proxy
import (
"context"
"time"
"gitea.timerzz.com/kedaya_haitao/proxy-detector/pkg/getter"
healthcheck "gitea.timerzz.com/kedaya_haitao/proxy-detector/pkg/health-check"
"gitea.timerzz.com/kedaya_haitao/proxy-detector/pkg/proxy/structs"
log "github.com/sirupsen/logrus"
)
func CrawlProxies(ctx context.Context, getters []getter.Getter) {
var proxies []structs.Proxy
log.Infof("共%d个抓取源", len(getters))
for _, getter := range getters {
if ps := getter.Get(); len(ps) > 0 {
proxies = append(proxies, ps...)
}
}
log.Infof("Crawled %d proxies", len(proxies))
proxies = healthcheck.CleanBadProxies(ctx, proxies)
log.Infof("Health checked %d proxies", len(proxies))
structs.ProxiesList.Add(proxies)
return
}
func CronCrawl(ctx context.Context, getters []getter.Getter, interval uint64) {
ticker := time.NewTicker(time.Duration(interval) * time.Minute)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
CrawlProxies(ctx, getters)
}
}
}