请大家帮忙看看代码,我想写一个从数据库导出数据到 excel 的工具,目前虽然实现了但是执行效率很慢?

概述:小弟的程序可以通过输入数据库类型、数据库连接字符串、执行 sql 或者含有 sql 的文件等,将数据库中的 sql 查询出来并写入 excel
性能:目前导出 25W 条数据,每行数据 5 个字段,数据库类型 oracle,驱动 goracle,excel 生成库使用 excelize,用时 1 分 33 秒;同环境使用 python3.6,数据库驱动 cx_Oracle,excel 生成库使用 pyexcelerate,同样的查询语句,用时 51s (是的…没干过 python/(ㄒoㄒ)/~~)
程序描述:为了提升执行效率,小弟使用了 goruntine,一个线程专门执行 sql 并将结果生成[]interface{}并装入通道,另外一个线程不断的从通道中取出[]interface{}并写入 excel
个人感觉可能存在的问题点:
1、golang 的数据库查询方式只能一条一条生成,同时,机制用到了反射,而不像 python 可以通过 fetchmany 一次性获取大量数据,不知道此处是否会有性能差距
2、当字段类型是 date 类型时,当字段为空时,如果不做 isZero 判断,输出到 excel 的日期零值很异常(值为-5XXXXX,显示为##########)。所以每取出一条[]interface{},都需要挨个判断类型是不是日期,如果是日期的话,是不是零值,此处可能会影响效率。而 python 没有这个问题

请各位 golang 大大给提点优化意见吧,谢谢大家
代码如下:

package main

import (
    _ "gopkg.in/goracle.v2"
    _ "github.com/asifjalil/cli"
    "github.com/jmoiron/sqlx"
    "flag"
    "fmt"
    "github.com/axgle/mahonia"
    "strings"
    "os"
    "strconv"
    "io/ioutil"
    "time"
    "github.com/360EntSecGroup-Skylar/excelize"
    "runtime"
)

func DataGetter(db *sqlx.DB,query string,rowChan chan <- []interface{},columnChan chan <- []string){
    defer db.Close()
    row,err := db.Queryx(query)
    if err != nil{
        panic(err)
    }
    defer row.Close()
    columns,err := row.Columns()
    columnChan <- columns
    close(columnChan)
    if err !=nil {
        panic(fmt.Sprint("failed to add sheet:%s",err.Error()))
    }
    for row.Next(){
        r,err := row.SliceScan()
        if err !=nil{
            panic("db row query failed")
        }
        rowChan <- r
    }
    close(rowChan)
}

func ExcelWriter(sheetHead string,fileName string,rowChan <- chan[]interface{},columnChan <- chan[]string){
    cnt := 2
    sheetcnt := 1
    var r []interface{}
    columns := <- columnChan
    hasNext := true
    excel := excelize.NewFile()
    excel.NewSheet(sheetHead)
    excel.SetSheetRow(sheetHead,"A1",columns)
    //excel.SetSheetRow(sheetHead,"A"+strconv.Itoa(cnt),columns)
    for hasNext{
        r,hasNext = <- rowChan
        for a := 0;a<len(columns);a++{
            t,ok := r[a].(time.Time)
            if ok{
                if t.IsZero(){
                    excel.SetCellValue(sheetHead,excelize.ToAlphaString(a)+strconv.Itoa(cnt),"")
                }else{
                    excel.SetCellValue(sheetHead,excelize.ToAlphaString(a)+strconv.Itoa(cnt),t)
                }
            }else{
                excel.SetCellValue(sheetHead,excelize.ToAlphaString(a)+strconv.Itoa(cnt),r[a])
            }
        }
        cnt = cnt + 1
        if cnt >= 100000{
            excel.NewSheet(sheetHead+strconv.Itoa(sheetcnt))
            sheetHead = sheetHead+strconv.Itoa(sheetcnt)
            excel.SetSheetRow(sheetHead,"A1",columns)
            cnt = 2
            sheetcnt = sheetcnt + 1
        }
    }
    excel.SaveAs(fileName+".xlsx")
}



func getConn(dbconn string,dbtype string)(db *sqlx.DB){
    if dbtype == "oracle"{
        driver := "goracle"
        return sqlx.MustOpen(driver,strings.Replace(dbconn,":","/",1))
    }else if dbtype == "db2"{
        driver := "cli"
        userPart := strings.Split(dbconn,"@")[0]
        username := strings.Split(userPart,":")[0]
        password := strings.Split(userPart,":")[1]
        dbPart := strings.Split(dbconn,"@")[0]
        dbname := strings.Split(dbPart,"/")[1]
        dbip := strings.Split(strings.Split(dbPart,"/")[0],":")[0]
        dbport := strings.Split(strings.Split(dbPart,"/")[0],":")[1]
        connString := fmt.Sprintf("Driver={IBM DB2 ODBC Driver};Hostname=%s;Port=%s;Protocol=TCPIP;Database=%s;CurrentSchema=%s;UID=%s;PWD=%s;",
            dbip,dbport,dbname,username,password)
        return sqlx.MustOpen(driver,connString)
    }else if dbtype == "postgres"{
        driver := "postgres"
        connString := "postgres://" + dbconn
        return sqlx.MustOpen(driver,connString)
    }else{
        fmt.Println("dbtype not matched!")
        os.Exit(-1)
        return
    }
}

func main() {
    //输入参数解析
    dbconn := flag.String("d","",`Database connect string,use "user:password@ip:port/dbname" for db2 or "user:password@tnsname" for oracle`)
    dbtype := flag.String("t","","Database type:oracle db2 mysql mssql")
    filetype := flag.String("f","xlsx","exported file type:xlsx or txt default:xlsx")
    //xlsx require options
    sheetname := flag.String("h","Sheet1","sheet name: default Sheet1")
    //txt require options
    //charset := flag.String("c","utf-8","charset for exported text file:gbk utf-8 and so on")
    //separator := flag.String("s","/","separator: default:/")
    //sql options
    query := flag.String("q","","sql in one line")
    sqlfile := flag.String("l","","sqlfile")
    filename := flag.String("n",time.Now().Format("20060102150405"),"filename")
    flag.Parse()

    if *dbconn == "" || *dbtype == "" || *filetype == ""{
        flag.Usage()
        return
    }
    if *query == "" && *sqlfile == ""{
        flag.Usage()
        return
    }
    if *sqlfile != "" {
        sqlbyte,err := ioutil.ReadFile(*sqlfile)
        if err != nil{
            panic("read sqlfile failed!")
        }
        utf8 := mahonia.NewEncoder("utf-8")
        *query = utf8.ConvertString(string(sqlbyte))
    }
    runtime.GOMAXPROCS(2)
    if *filetype == "xlsx"{
        rowChan := make(chan []interface{},50000)
        columnsChan := make(chan []string)
        db := getConn(*dbconn,*dbtype)
        go DataGetter(db,*query,rowChan,columnsChan)
        ExcelWriter(*sheetname,*filename,rowChan,columnsChan)
        //}else if *filetype == "txt"{
        //    db := getConn(*dbconn,*dbtype)
        //    TextFileExporter(db,*charset,*separator,*filename,*query)
        //}else{
        flag.Usage()
        return
    }

}
阅读 3.9k
1 个回答

python的fetchmany必定也是一条条取出来的,只是帮你组合返回了数组。

撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题