需求是这样的:需要查询数据库里面的数据,然后生成对应格式的xml,打算用协程实现,我的大致思路是这样的:查询出总数据量,然后根据配置的每个xml的数据个数,计算出需要生成多少个xml,每个xml就启动一个协程去执行生成xml,代码已经写完,但是经常跑一会就卡主不执行了。。不知道为啥,有大神指导一下,或者提供一个更加好的思路么,数据量不多,26万条左右。。如下是协程代码:
func getSiteMaps(db *sql.DB, offset int64, size int64, c chan Sitemap) {
fmt.Println("offset in channel:", offset)
sql := "select a.id as outerID,a.name,a.sale_price as price, a.category_id,a.market_price as value,b.name as brand from item as a left join item_brand as b on a.brand_id = b.id where a.status = 1 order by a.id asc limit " + strconv.FormatInt(offset, 10) + "," + strconv.FormatInt(size, 10)
var r Urlset //初始化xml
var urls []Url //初始化urls
//查询具体的数据
if rows, rowsErr := db.Query(sql); rowsErr != nil {
panic(rowsErr)
} else {
for rows.Next() {
var (
outerID, name, brand, category_id string
price, value float64
)
rows.Scan(&outerID, &name, &price, &category_id, &value, &brand)
if outerID == "" {
continue
}
//loc获取
_loc := getLocUrl(outerID)
//通过categoryId获取分类信息
_categoryInfo := getCategoryInfoByParentId(category_id, db)
// fmt.Println(_categoryInfo)
// os.Exit(1)
if _categoryInfo["categoryId"] == "" {
continue
}
//获取商品图片
_imgs := getImages(outerID, db)
if len(_imgs) <= 0 {
continue
}
_firstImg := ""
var _moreImages []Images
for i, v := range _imgs {
if i == 0 {
_firstImg = v
} else {
_moreImages = append(_moreImages, Images{InnerText: v, Index: i})
}
}
moreImages := Mimages{Img: _moreImages}
// moreImages := _moreImages
subAttribute := []SubAttribute{SubAttribute{Key: "ext_down_load", Value: "https://m.mia.com/detail-a-" + outerID + ".html"}, SubAttribute{Key: "ext_put_url", Value: "https://m.mia.com/detail-a-" + outerID + ".html"}}
//Data数据
_choice := Attribute{Attribute: subAttribute}
itemData := Data{
OuterID: outerID,
Name: name,
Price: price,
Value: value,
PriceUnit: _PRICE_UNIT,
Availability: _AVAILABILITY,
Image: _firstImg,
MoreImages: moreImages,
Brand: brand,
Loc: _loc,
PcLoc: getPcLoc(outerID),
SellerSiteUrl: _SELLER_SITE_URL,
ShopName: _SHOP_NAME,
SearchWiseUrl: getSearchWiseUrl(outerID),
Category: _categoryInfo["categoryName"],
CategoryUrl: getCategoryUrl(category_id),
CategoryPcUrl: getCategoryPcUrl(category_id),
SubCategory: _categoryInfo["subCategoryName"],
SubCategoryUrl: getCategoryUrl(_categoryInfo["subCategoryId"]),
SubcategoryPcUrl: getCategoryPcUrl(_categoryInfo["subCategoryId"]),
Choice: _choice,
SellerName: _SELLER_NAME,
Logo: _LOGO}
// fmt.Println(itemData)
// os.Exit(1)
//url数据
itemUrl := Url{Loc: _loc, Data: itemData}
//urls数据
urls = append(urls, itemUrl)
}
//生成xml数据
r.Url = urls
}
//生成xml文件
if len(urls) > 1 {
//取数据,
fileName := getXmlName(offset, size)
generateXml(fileName, r, func() {
xmlUrl := _URL_PREFIX + fileName
itemSiteMap := Sitemap{Loc: xmlUrl, Lastmod: getCurrentTime()}
// sitemap = append(sitemap, itemSiteMap)
//需要停顿几秒,待写文件完毕。主要是防止最后一次写文件没写完,程序就执行完退出
time.Sleep(5)
fmt.Println("done...", offset)
c <- itemSiteMap
})
}
}
如下是main里面的内容:
func main() {
db, dbErr := sql.Open("mysql", _DB)
if dbErr != nil {
fmt.Println("db connection err")
os.Exit(1)
} else {
fmt.Println("db ok")
}
sql2 := "select count(*) as total from item as a left join item_brand as b on a.brand_id = b.id where a.status = 1"
var total float64
if rowCountErr := db.QueryRow(sql2).Scan(&total); rowCountErr != nil {
panic(rowCountErr)
} else {
fmt.Println("total:", total)
//根据条数,获取协程数量
pages := int(math.Ceil(total / _NUM_PER_XML))
ch := make(chan Sitemap, pages)
for i := 0; i <= pages; i++ {
var offset int64
if i > 0 {
offset = int64(_NUM_PER_XML*i + 1)
}
go getSiteMaps(db, offset, _NUM_PER_XML, ch)
}
for v1 := range ch {
fmt.Println("val:", v1)
}
}
}
新手,求指导?
这样写的话,相当于在同一刻进行getSiteMaps动作,按你说有26w笔数据吧,那就是同一刻进行26w次的getSiteMaps操作,你确定你不是在做"压力测试"?这都不死机的话,除非你是天河二号。
解决你的问题从两个方面入手,一个是开启多核心支持,一个是调整代码
首先你要在main体里面加上这句
runtime.GOMAXPROCS(runtime.NumCPU())
,这样才算用上了多核心,否则程序运行时只会用1刻核心,哪怕你有128核,也只用1核。比较复杂的是代码方面,建议你实现个工人池的代码,让工人来做getSiteMaps,并根据cpu数量来决定工人数量,还要做到一个工人在一个时间只做1件事情。