Python使用第三方库xlrd获取Excel表格中的字段并生成SQL文件

之前写过一篇使用Python脚本生成Excel文件的文章，时隔很久这种终于有空写该库的兄弟库xlrd，用来读取Excel文件。
最近被调到电商项目，由于种种原因（设计不完善、SQL语句书写不规范，甲方太奇葩...槽点太多，就不在这里吐槽了）需要经常进行刷数据操作（批量修改错误数据），一般就是写一写SQL文件来直接操作，但是在进行了反复的ctrl+c和ctrl+v的操作之后，终于决定写一个脚本，直接将Excel文件中需要insert和update读取出来，并生成SQL文件。

项目GitHub地址：https://github.com/sunny0826/...

生成update语句：

#!/usr/bin/env python
#encoding: utf-8
#Author: guoxixi
import xlrd
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

def open_excel(file='test.xls'):
    try:
        data = xlrd.open_workbook(file)  # 打开excel文件
        return data
    except Exception, e:
        print str(e)


def excel_table_bycol(file='', where=[0], colindex=[0], table_name='Sheet1'):
    data = open_excel(file)
    table = data.sheet_by_name(table_name)  # 获取excel里面的某一页
    nrows = table.nrows  # 获取行数
    t_name = table.row_values(0)[0].encode('utf8') #表名
    colnames = table.row_values(1)  # 获取第一行的值，作为key来使用
    list = []
    # （2，nrows）表示取第二行以后的行，第一行为表名，第二行为表头
    for rownum in range(2, nrows):
        row = table.row_values(rownum)
        if row:
            whe = {}
            for n in where:
                whe[str(colnames[n]).encode("utf-8")] = str(row[n]).encode("utf-8")  #输入的筛选字段
            app = {}
            for i in colindex:
                app[str(colnames[i]).encode("utf-8")] = str(row[i]).encode("utf-8")  # 将数据填入一个字典中，同时对数据进行utf-8转码，因为有些数据是unicode编码的
            list.append({'where':whe,'app':app})  # 将字典加入列表中去
    return list,t_name


def main(file,where,colindex):
    # colindex为需要更新的列，where为筛选的列
    tables = excel_table_bycol(file,where,colindex, table_name=u'Sheet1')
    with open('./sql_result/update#'+tables[1]+'.sql', 'w') as f:    # 创建sql文件，并开启写模式
        for info in tables[0]:
            sql_line = "UPDATE "+tables[1]+" SET"
            apps = info.get('app')
            for key,value in apps.items():
                if sql_line.endswith('SET'):
                    sql_line += " "+key+"='"+value+"' "
                else:
                    sql_line += ", " + key + "='" + value + "' "
            sql_line += " WHERE"
            where = info.get('where')
            for key, value in where.items():
                if sql_line.endswith('WHERE'):
                    sql_line += " "+key+"='"+value+"' "
                else:
                    sql_line += "AND " + key + "='" + value + "' "
            sql_line+="\n"
            f.write(sql_line)  # 往文件里写入sql语句

if __name__ == "__main__":
    file_name = 'test.xls'  # 导入xls文件名
    where = [0,1,2]         # 条件字段
    colindex = [3, 4]       # 需要插入的列
    main(file_name,where,colindex)

在Excel文件中，第一行行需要update的表名，第二行为字段名，每一列为该字段的对应值，这里可以进行where过滤，只需要修改main函数下where数组内的数字即可，需要update的字段同理，执行后就会生成名字为update#表名.sql的SQL文件。

生成insert语句：

#!/usr/bin/env python
#encoding: utf-8
#Author: guoxixi
import xlrd
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

def open_excel(file='test.xls'):
    try:
        data = xlrd.open_workbook(file)  # 打开excel文件
        return data
    except Exception, e:
        print str(e)


def excel_table_bycol(file='', colindex=[0], table_name='Sheet1'):
    data = open_excel(file)
    table = data.sheet_by_name(table_name)  # 获取excel里面的某一页
    nrows = table.nrows  # 获取行数
    t_name = table.row_values(0)[0].encode('utf8') #表名
    colnames = table.row_values(1)  # 获取第一行的值，作为key来使用
    list = []
    # （2，nrows）表示取第二行以后的行，第一行为表名，第二行为表头
    list.append(t_name)
    list.append(colnames)
    for rownum in range(2, nrows):
        row = table.row_values(rownum)
        if row:
            app = []
            for i in colindex:
                app.append(str(row[i]).encode("utf-8") )
            list.append(app)  # 将字典加入列表中去
    return list


def main(file_name,colindex):
    # colindex为需要插入的列
    tables = excel_table_bycol(file_name,colindex, table_name=u'Sheet1')
    t_name = tables.pop(0)
    key_list = ','.join(tables.pop(0)).encode('utf8')   #list转为str
    sql_line = "INSERT INTO "+t_name+"（"+key_list+"）VALUE"
    line = ''
    for info in tables:
        content = ','.join(info)
        if line != '':
            line =line + ',(' + content + ')'
        else:
            line = '('+content+')'
    sql_line = sql_line + line + ';'
    with open('./sql_result/insert#' + t_name + '.sql', 'w') as f:  # 创建sql文件，并开启写模式
        f.write(sql_line)  # 往文件里写入sql语句

if __name__ == "__main__":
    file_name = 'test.xls'          #导入xls文件名
    colindex = [0, 1, 2, 3, 4]      #需要插入的列
    main(file_name,colindex)

生成insert语句SQL文件的Excel格式与update的相同，但是传入参数方面，因为不需要过滤条件，所以只需要往数组中写入需要插入字段的列数就好。

希望对大家有所帮助^_^

Python使用第三方库xlrd获取Excel表格中的字段并生成SQL文件

郭西西

引用和评论

Kubecm：管理你的 kubeconfig

Anaconda安装教程以及Anaconda和pip配置国内镜像

科学计算编程涉及到的技术栈简介

使用 chardet 判断文件编码需要注意的坑——过大的文件会导致高耗时

Python3 格式化时间（qbit）

manus 的替代品有哪些？使用LLM大模型技术做手机/网页/浏览器自动化操作技术汇总

怎么判断自己下载的 trae 是国际版还是国内版？