2024-06-14 16:36:33 +08:00
|
|
|
package spider
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
|
|
|
"gitea.timerzz.com/kedaya_haitao/common/model/product"
|
|
|
|
coach_client "gitea.timerzz.com/kedaya_haitao/common/pkg/coach-client"
|
|
|
|
"github.com/golang/glog"
|
|
|
|
"github.com/samber/lo"
|
|
|
|
"gorm.io/gorm"
|
|
|
|
"gorm.io/gorm/clause"
|
|
|
|
"time"
|
|
|
|
)
|
|
|
|
|
|
|
|
type Controller struct {
|
|
|
|
ctx context.Context
|
|
|
|
client *coach_client.CN
|
|
|
|
db *gorm.DB
|
|
|
|
interval time.Duration
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewController(client *coach_client.CN, db *gorm.DB, interval time.Duration) *Controller {
|
|
|
|
ctl := &Controller{
|
|
|
|
client: client,
|
|
|
|
db: db,
|
|
|
|
interval: interval,
|
|
|
|
}
|
|
|
|
ctl.AutoMigrate()
|
|
|
|
return ctl
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *Controller) AutoMigrate() {
|
|
|
|
if err := c.db.AutoMigrate(&productv1.Product{}, &productv1.HistoryPrice{}); err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
func (c *Controller) Run(ctx context.Context) {
|
|
|
|
c.ctx = ctx
|
|
|
|
ticker := time.NewTicker(c.interval)
|
|
|
|
defer ticker.Stop()
|
|
|
|
|
|
|
|
if err := c.Crawl(); err != nil {
|
|
|
|
glog.Error(err.Error())
|
|
|
|
} else {
|
|
|
|
glog.Info("抓取信息成功")
|
|
|
|
}
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
|
|
|
case <-ticker.C:
|
|
|
|
if err := c.Crawl(); err != nil {
|
|
|
|
glog.Error(err.Error())
|
|
|
|
} else {
|
|
|
|
glog.Info("抓取信息成功")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *Controller) Crawl() error {
|
|
|
|
glog.Info("开始抓取信息")
|
2024-06-14 17:45:51 +08:00
|
|
|
for page, totalPage := 1, 1; page <= totalPage; page++ {
|
2024-06-14 16:36:33 +08:00
|
|
|
resp, err := c.client.ListItems(c.ctx, page, 50)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("访问coach第%d页失败: %w", page, err)
|
|
|
|
}
|
|
|
|
totalPage = resp.TotalPages
|
|
|
|
|
|
|
|
if err = c.saveRespData(resp.Items); err != nil {
|
|
|
|
return fmt.Errorf("保存第%d页数据失败: %w", page, err)
|
|
|
|
}
|
|
|
|
glog.Infof("第%d页数据保存成功", page)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *Controller) saveRespData(list []coach_client.CNItem) error {
|
|
|
|
var products = make([]productv1.Product, 0, len(list))
|
|
|
|
for _, item := range list {
|
|
|
|
products = append(products, productv1.Product{
|
|
|
|
UpdatedAt: time.Now(),
|
|
|
|
Name: item.Title,
|
|
|
|
Pid: item.Code,
|
|
|
|
Link: fmt.Sprintf("https://www.coachoutlet.cn/products/%s", item.Code),
|
|
|
|
Image: item.Images[0].Imgs[0].Img,
|
|
|
|
Orderable: item.Stock > 0,
|
|
|
|
DiscPercent: 100 - int(item.DiscountRateMin*100),
|
|
|
|
CNYPrice: item.SkuMaxPrice,
|
|
|
|
Website: productv1.WebSite_CN_Coach,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
// 去重
|
|
|
|
products = lo.UniqBy(products, func(p productv1.Product) string {
|
|
|
|
return p.Pid
|
|
|
|
})
|
|
|
|
return c.db.Clauses(clause.OnConflict{
|
|
|
|
Columns: []clause.Column{{Name: "pid"}},
|
|
|
|
DoUpdates: clause.AssignmentColumns([]string{"name", "link", "orderable", "cny_price", "rate", "price_status", "disc_percent", "updated_at"}),
|
|
|
|
}).Create(products).Error
|
|
|
|
}
|