cn-coach-spider/spider/controller.go

133 lines
3.6 KiB
Go
Raw Normal View History

2024-06-14 16:36:33 +08:00
package spider
import (
"context"
"fmt"
2024-07-26 16:47:00 +08:00
"time"
2024-06-16 11:08:28 +08:00
"gitea.timerzz.com/kedaya_haitao/cn-coach-spider/pkg/options"
2024-06-14 16:36:33 +08:00
"gitea.timerzz.com/kedaya_haitao/common/model/product"
coach_client "gitea.timerzz.com/kedaya_haitao/common/pkg/coach-client"
"github.com/golang/glog"
"github.com/samber/lo"
"gorm.io/gorm"
"gorm.io/gorm/clause"
)
type Controller struct {
2024-06-16 11:08:28 +08:00
ctx context.Context
client *coach_client.CN
db *gorm.DB
interval time.Duration
website productv1.Website
linkPrefix string
pid string
calculates []productv1.CalculateProcess
2024-06-14 16:36:33 +08:00
}
2024-06-16 11:08:28 +08:00
func NewController(client *coach_client.CN, db *gorm.DB, interval time.Duration, shopType string) *Controller {
2024-06-14 16:36:33 +08:00
ctl := &Controller{
2024-06-16 11:08:28 +08:00
client: client,
db: db,
interval: interval,
website: productv1.WebSite_CN_Coach,
linkPrefix: "https://www.coach.com.cn/products/",
pid: CNCoachPid,
}
if shopType == options.ShopType_outlet {
ctl.website = productv1.WebSite_CN_Coach_Outlet
ctl.linkPrefix = "https://www.coach.com.cn/outlet/products/"
ctl.pid = CNCoachOutletPid
2024-06-14 16:36:33 +08:00
}
ctl.AutoMigrate()
2024-06-16 11:08:28 +08:00
ctl.LoadCalculateProcess()
2024-06-14 16:36:33 +08:00
return ctl
}
func (c *Controller) AutoMigrate() {
if err := c.db.AutoMigrate(&productv1.Product{}, &productv1.HistoryPrice{}); err != nil {
panic(err)
}
}
2024-06-16 11:08:28 +08:00
2024-06-14 16:36:33 +08:00
func (c *Controller) Run(ctx context.Context) {
c.ctx = ctx
ticker := time.NewTicker(c.interval)
defer ticker.Stop()
if err := c.Crawl(); err != nil {
glog.Error(err.Error())
} else {
glog.Info("抓取信息成功")
}
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
if err := c.Crawl(); err != nil {
glog.Error(err.Error())
} else {
glog.Info("抓取信息成功")
}
}
}
}
func (c *Controller) Crawl() error {
glog.Info("开始抓取信息")
2024-06-14 17:45:51 +08:00
for page, totalPage := 1, 1; page <= totalPage; page++ {
2024-06-14 16:36:33 +08:00
resp, err := c.client.ListItems(c.ctx, page, 50)
if err != nil {
return fmt.Errorf("访问coach第%d页失败: %w", page, err)
}
totalPage = resp.TotalPages
if err = c.saveRespData(resp.Items); err != nil {
return fmt.Errorf("保存第%d页数据失败: %w", page, err)
}
glog.Infof("第%d页数据保存成功", page)
}
return nil
}
func (c *Controller) saveRespData(list []coach_client.CNItem) error {
var products = make([]productv1.Product, 0, len(list))
for _, item := range list {
2024-06-16 11:08:28 +08:00
var savedProduct productv1.Product
c.db.Model(&savedProduct).Where("pid = ?", item.Code).Select("dw_price").Scan(&savedProduct)
2024-07-26 16:47:00 +08:00
var img string
if len(item.Images) > 0 && len(item.Images[0].Imgs) > 0 {
img = item.Images[0].Imgs[0].Img
} else {
glog.Warningf("%s img不存在 %v", item.Code, item)
}
2024-06-16 11:08:28 +08:00
p := productv1.Product{
UpdatedAt: time.Now(),
Name: item.Title,
Pid: item.Code,
Link: fmt.Sprintf("%s%s", c.linkPrefix, item.Code),
2024-07-26 16:47:00 +08:00
Image: img,
2024-06-16 11:08:28 +08:00
Orderable: item.Stock > 0,
DiscPercent: 100 - int(item.DiscountRateMin*100),
OriginalPrice: item.SkuMaxPrice,
Website: c.website,
DWPrice: savedProduct.DWPrice,
}
var calculate []productv1.CalculateProcess
c.db.Model(&productv1.CalculateProcess{}).Find(&calculate, "pid = ? AND website = ?", p.Pid, c.website)
p.CalCNY(append(calculate, c.calculates...))
products = append(products, p)
2024-06-14 16:36:33 +08:00
}
// 去重
products = lo.UniqBy(products, func(p productv1.Product) string {
return p.Pid
})
return c.db.Clauses(clause.OnConflict{
Columns: []clause.Column{{Name: "pid"}},
2024-06-16 11:08:28 +08:00
DoUpdates: clause.AssignmentColumns([]string{"name", "link", "orderable", "original_price", "rate", "price_status", "disc_percent", "updated_at"}),
2024-06-14 16:36:33 +08:00
}).Create(products).Error
}