diff --git a/go.mod b/go.mod index 4f74317..ce1e0c4 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.23.0 toolchain go1.23.7 require ( - gitea.timerzz.com/kedaya_haitao/common v0.0.0-20250327021208-7303e49bd09a + gitea.timerzz.com/kedaya_haitao/common v0.0.0-20250328075027-44514fbf39c8 gitea.timerzz.com/kedaya_haitao/pusher v0.0.0-20241129135359-c16e02a7eab0 github.com/cloudwego/kitex v0.11.3 github.com/gofiber/fiber/v3 v3.0.0-beta.4 diff --git a/go.sum b/go.sum index c420f9b..4333ff4 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,6 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -gitea.timerzz.com/kedaya_haitao/common v0.0.0-20250327021208-7303e49bd09a h1:AIi74nG73laj16WMoXAbMDyNCZoHt9vdWtCeEp/wybo= -gitea.timerzz.com/kedaya_haitao/common v0.0.0-20250327021208-7303e49bd09a/go.mod h1:cfkwyDHbOjucM8xLLg8yIkZKz33kdVqvBZYrfNjM8oc= +gitea.timerzz.com/kedaya_haitao/common v0.0.0-20250328075027-44514fbf39c8 h1:jMMLOFUWIeuwA5IyS3gAXON0NAN0ykacg825XissGTE= +gitea.timerzz.com/kedaya_haitao/common v0.0.0-20250328075027-44514fbf39c8/go.mod h1:cfkwyDHbOjucM8xLLg8yIkZKz33kdVqvBZYrfNjM8oc= gitea.timerzz.com/kedaya_haitao/pusher v0.0.0-20241129135359-c16e02a7eab0 h1:WMNOErbI6At865VWI3sN74RMQaZ8ZhwsNSB9A4vg/6Q= gitea.timerzz.com/kedaya_haitao/pusher v0.0.0-20241129135359-c16e02a7eab0/go.mod h1:nRdxwOP3hhkUdH3PjHq3gt8SA+YEfR/d7Ig9DuQQZQY= gitea.timerzz.com/timerzz/proxy-detector v0.0.0-20250324053453-1608e1bf3949 h1:iRO07bfKmGLwLOyl65Hq2OhHbFt9EWRVqwZZ1h1Vc9w= diff --git a/spider/controller.go b/spider/controller.go index 3b6502b..607056c 100644 --- a/spider/controller.go +++ b/spider/controller.go @@ -178,7 +178,6 @@ func (c *Controller) productsToArticles(products []coach_client.Product) (articl for _, product := range products { // 一个color是一个sku for _, color := range product.Colors { - // 如果没找到,说明没有标准商品,创建一个 article := v2.Article{ Name: product.Name, EnglishName: product.Name, @@ -225,36 +224,88 @@ func (c *Controller) cronCrawl() { c.Crawl() } -// TODO长时间没更新的,需要更新 func (c *Controller) Crawl() { logrus.Infof("%s 开始抓取信息", time.Now()) // 开始拉取,修改状态 c.setProviderStatus(v2.ProviderStatus_Pulling) + c.crawlAllBags() + c.crawlUpdateBags() + // 拉取结束,修改状态 + c.provider.PullAt = time.Now() + c.setProviderStatus(v2.ProviderStatus_Normal) + logrus.Infof("%s 抓取信息结束", time.Now()) +} - var msgs = make([]string, 0) +// 抓取所有包 +func (c *Controller) crawlAllBags() { for page, totalPage := 1, -1; page <= totalPage || totalPage == -1; page++ { resp, err := c.client.ViewAllBags(c.ctx, page) - logrus.Infof("开始处理第%d页数据", page) + logrus.WithField("part", "抓所有包").Infof("开始处理第%d页数据", page) if err != nil { - msg := fmt.Sprintf("访问coach第%d页失败: %v", page, err) - msgs = append(msgs, msg) - logrus.Error(msg) + logrus.WithField("part", "抓所有包").Errorf("访问coach第%d页失败: %v", page, err) continue } totalPage = resp.PageData.TotalPages c.saveProducts(c.productsToArticles(resp.PageData.Products)) - logrus.Infof("第%d页数据保存完成", page) - break + logrus.WithField("part", "抓所有包").Infof("第%d页数据保存完成", page) } +} - // 拉取结束,修改状态 - c.provider.PullAt = time.Now() - if len(msgs) > 0 { - c.setProviderStatus(v2.ProviderStatus_Error, strings.Join(msgs, "\n")) - } else { - c.setProviderStatus(v2.ProviderStatus_Normal) - } - logrus.Infof("%s 抓取信息结束", time.Now()) +// 抓取之前没更新的 +func (c *Controller) crawlUpdateBags() { + logrus.Info("更新之前没更新的商品信息") + var results = make([]v2.Article, 0, 10) + var total = 0 + c.storage.DB(). + Joins("LEFT JOIN provider_articles ON provider_articles.article_id=articles.id AND provider_articles.provider_id = ?", c.providerId). + Where("(provider_articles.exclude = false or provider_articles.exclude is null) and (articles.brand = 'coach') and (provider_articles.updated_at is null or provider_articles.updated_at < ?)", time.Now().Add(-time.Hour*24)). + FindInBatches(&results, 10, func(tx *gorm.DB, batch int) error { + total += len(results) + pids := lo.Map(results, func(item v2.Article, index int) string { + return item.Pid + }) + list, err := c.client.RequestProductDetailList(c.ctx, pids...) + if err != nil { + logrus.Errorf("请求商品信息失败: %v", err) + return nil + } + for idx, item := range list { + article := &results[idx] + var pArticle v2.ProviderArticle + if item.Id == "" { + pArticle = v2.ProviderArticle{ + Pid: article.Pid, + Brand: article.Brand, + ProviderId: c.providerId, + SkuID: article.Pid, + Exclude: true, + } + } else { + pArticle = v2.ProviderArticle{ + Pid: article.Pid, + Brand: article.Brand, + Link: item.Url, + Image: article.Image, + ProviderId: c.providerId, + SkuID: article.Pid, + Ats: item.Inventory.Ats, + Available: item.Inventory.Orderable, + } + pArticle.Cost = utils.CalculateProviderPrice( + append(c.provider.CalculateProcess, pArticle.CalculateProcess...), + map[string]float64{ + "originalPrice": item.Prices.CurrentPrice, + "freight": c.provider.Config.Freight, + "exchangeRate": c.provider.Config.ExchangeRate, + }) + pArticle.HistoryPrice = append(pArticle.HistoryPrice, pArticle.Cost) + } + article.Providers = append(article.Providers, pArticle) + } + c.saveProducts(results) + return nil + }) + logrus.Infof("共%d条数据更新", total) } // 对coach返回的数据进行处理保存