update 更新抓取
All checks were successful
Build image / build (push) Successful in 2m21s

This commit is contained in:
timerzz 2025-03-28 17:46:13 +08:00
parent dd5b127681
commit e42a17f6c9
3 changed files with 71 additions and 20 deletions

2
go.mod
View File

@ -5,7 +5,7 @@ go 1.23.0
toolchain go1.23.7
require (
gitea.timerzz.com/kedaya_haitao/common v0.0.0-20250327021208-7303e49bd09a
gitea.timerzz.com/kedaya_haitao/common v0.0.0-20250328075027-44514fbf39c8
gitea.timerzz.com/kedaya_haitao/pusher v0.0.0-20241129135359-c16e02a7eab0
github.com/cloudwego/kitex v0.11.3
github.com/gofiber/fiber/v3 v3.0.0-beta.4

4
go.sum
View File

@ -1,6 +1,6 @@
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
gitea.timerzz.com/kedaya_haitao/common v0.0.0-20250327021208-7303e49bd09a h1:AIi74nG73laj16WMoXAbMDyNCZoHt9vdWtCeEp/wybo=
gitea.timerzz.com/kedaya_haitao/common v0.0.0-20250327021208-7303e49bd09a/go.mod h1:cfkwyDHbOjucM8xLLg8yIkZKz33kdVqvBZYrfNjM8oc=
gitea.timerzz.com/kedaya_haitao/common v0.0.0-20250328075027-44514fbf39c8 h1:jMMLOFUWIeuwA5IyS3gAXON0NAN0ykacg825XissGTE=
gitea.timerzz.com/kedaya_haitao/common v0.0.0-20250328075027-44514fbf39c8/go.mod h1:cfkwyDHbOjucM8xLLg8yIkZKz33kdVqvBZYrfNjM8oc=
gitea.timerzz.com/kedaya_haitao/pusher v0.0.0-20241129135359-c16e02a7eab0 h1:WMNOErbI6At865VWI3sN74RMQaZ8ZhwsNSB9A4vg/6Q=
gitea.timerzz.com/kedaya_haitao/pusher v0.0.0-20241129135359-c16e02a7eab0/go.mod h1:nRdxwOP3hhkUdH3PjHq3gt8SA+YEfR/d7Ig9DuQQZQY=
gitea.timerzz.com/timerzz/proxy-detector v0.0.0-20250324053453-1608e1bf3949 h1:iRO07bfKmGLwLOyl65Hq2OhHbFt9EWRVqwZZ1h1Vc9w=

View File

@ -178,7 +178,6 @@ func (c *Controller) productsToArticles(products []coach_client.Product) (articl
for _, product := range products {
// 一个color是一个sku
for _, color := range product.Colors {
// 如果没找到,说明没有标准商品,创建一个
article := v2.Article{
Name: product.Name,
EnglishName: product.Name,
@ -225,36 +224,88 @@ func (c *Controller) cronCrawl() {
c.Crawl()
}
// TODO长时间没更新的需要更新
func (c *Controller) Crawl() {
logrus.Infof("%s 开始抓取信息", time.Now())
// 开始拉取,修改状态
c.setProviderStatus(v2.ProviderStatus_Pulling)
c.crawlAllBags()
c.crawlUpdateBags()
// 拉取结束,修改状态
c.provider.PullAt = time.Now()
c.setProviderStatus(v2.ProviderStatus_Normal)
logrus.Infof("%s 抓取信息结束", time.Now())
}
var msgs = make([]string, 0)
// 抓取所有包
func (c *Controller) crawlAllBags() {
for page, totalPage := 1, -1; page <= totalPage || totalPage == -1; page++ {
resp, err := c.client.ViewAllBags(c.ctx, page)
logrus.Infof("开始处理第%d页数据", page)
logrus.WithField("part", "抓所有包").Infof("开始处理第%d页数据", page)
if err != nil {
msg := fmt.Sprintf("访问coach第%d页失败: %v", page, err)
msgs = append(msgs, msg)
logrus.Error(msg)
logrus.WithField("part", "抓所有包").Errorf("访问coach第%d页失败: %v", page, err)
continue
}
totalPage = resp.PageData.TotalPages
c.saveProducts(c.productsToArticles(resp.PageData.Products))
logrus.Infof("第%d页数据保存完成", page)
break
logrus.WithField("part", "抓所有包").Infof("第%d页数据保存完成", page)
}
}
// 拉取结束,修改状态
c.provider.PullAt = time.Now()
if len(msgs) > 0 {
c.setProviderStatus(v2.ProviderStatus_Error, strings.Join(msgs, "\n"))
} else {
c.setProviderStatus(v2.ProviderStatus_Normal)
}
logrus.Infof("%s 抓取信息结束", time.Now())
// 抓取之前没更新的
func (c *Controller) crawlUpdateBags() {
logrus.Info("更新之前没更新的商品信息")
var results = make([]v2.Article, 0, 10)
var total = 0
c.storage.DB().
Joins("LEFT JOIN provider_articles ON provider_articles.article_id=articles.id AND provider_articles.provider_id = ?", c.providerId).
Where("(provider_articles.exclude = false or provider_articles.exclude is null) and (articles.brand = 'coach') and (provider_articles.updated_at is null or provider_articles.updated_at < ?)", time.Now().Add(-time.Hour*24)).
FindInBatches(&results, 10, func(tx *gorm.DB, batch int) error {
total += len(results)
pids := lo.Map(results, func(item v2.Article, index int) string {
return item.Pid
})
list, err := c.client.RequestProductDetailList(c.ctx, pids...)
if err != nil {
logrus.Errorf("请求商品信息失败: %v", err)
return nil
}
for idx, item := range list {
article := &results[idx]
var pArticle v2.ProviderArticle
if item.Id == "" {
pArticle = v2.ProviderArticle{
Pid: article.Pid,
Brand: article.Brand,
ProviderId: c.providerId,
SkuID: article.Pid,
Exclude: true,
}
} else {
pArticle = v2.ProviderArticle{
Pid: article.Pid,
Brand: article.Brand,
Link: item.Url,
Image: article.Image,
ProviderId: c.providerId,
SkuID: article.Pid,
Ats: item.Inventory.Ats,
Available: item.Inventory.Orderable,
}
pArticle.Cost = utils.CalculateProviderPrice(
append(c.provider.CalculateProcess, pArticle.CalculateProcess...),
map[string]float64{
"originalPrice": item.Prices.CurrentPrice,
"freight": c.provider.Config.Freight,
"exchangeRate": c.provider.Config.ExchangeRate,
})
pArticle.HistoryPrice = append(pArticle.HistoryPrice, pArticle.Cost)
}
article.Providers = append(article.Providers, pArticle)
}
c.saveProducts(results)
return nil
})
logrus.Infof("共%d条数据更新", total)
}
// 对coach返回的数据进行处理保存