generated from kedaya_haitao/template
40 lines
1018 B
Go
40 lines
1018 B
Go
|
package proxy
|
||
|
|
||
|
import (
|
||
|
"context"
|
||
|
"time"
|
||
|
|
||
|
"gitea.timerzz.com/kedaya_haitao/proxy-detector/pkg/getter"
|
||
|
healthcheck "gitea.timerzz.com/kedaya_haitao/proxy-detector/pkg/health-check"
|
||
|
"gitea.timerzz.com/kedaya_haitao/proxy-detector/pkg/proxy/structs"
|
||
|
log "github.com/sirupsen/logrus"
|
||
|
)
|
||
|
|
||
|
func CrawlProxies(ctx context.Context, getters []getter.Getter) {
|
||
|
var proxies []structs.Proxy
|
||
|
log.Infof("共%d个抓取源", len(getters))
|
||
|
for _, getter := range getters {
|
||
|
if ps := getter.Get(); len(ps) > 0 {
|
||
|
proxies = append(proxies, ps...)
|
||
|
}
|
||
|
}
|
||
|
log.Infof("Crawled %d proxies", len(proxies))
|
||
|
proxies = healthcheck.CleanBadProxies(ctx, proxies)
|
||
|
log.Infof("Health checked %d proxies", len(proxies))
|
||
|
structs.ProxiesList.Add(proxies)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func CronCrawl(ctx context.Context, getters []getter.Getter, interval uint64) {
|
||
|
ticker := time.NewTicker(time.Duration(interval) * time.Minute)
|
||
|
defer ticker.Stop()
|
||
|
for {
|
||
|
select {
|
||
|
case <-ctx.Done():
|
||
|
return
|
||
|
case <-ticker.C:
|
||
|
CrawlProxies(ctx, getters)
|
||
|
}
|
||
|
}
|
||
|
}
|