你好,
业务需要,开发了一个分区的函数,实现分区表的自动分区(range分区),
现在问题是数据表有7000张,每到月中执行一次,扩展15天的分区,现在测试的情况是函数效率很低,定位到问题在greenplum中,把函数/存储过程当成了一整个事务,必须全部执行完毕才提交吗?
有没有办法做到没循环执行一个sql(alter table add partition,就提交一次)
附详细函数代码,不胜感激
-- Function: dwdb.proc_add_partition()
-- DROP FUNCTION dwdb.proc_add_partition();
CREATE OR REPLACE FUNCTION dwdb.proc_add_partition()
RETURNS void AS
$BODY$ DECLARE V_SQL1 TEXT;
V_SQL2 TEXT;
V_TABLENAME TEXT;
v_CNT NUMERIC;
V_TABLEDATE NUMERIC;
START_TIME_NUMERIC NUMERIC;
START_END_NUMERIC NUMERIC;
END_TIME_NUMERIC NUMERIC;
START_TIME_DATE TIMESTAMP WITHOUT TIME ZONE;
END_TIME_DATE TIMESTAMP WITHOUT TIME ZONE;
C_CUR1 CURSOR FOR
SELECT SCHEMANAME || '.' || TABLENAME,
MAX(TO_CHAR((SUBSTR(REPLACE(PARTITIONNAME, 'p', ''), 1, 4) || '-' ||
SUBSTR(REPLACE(PARTITIONNAME, 'p', ''), 5, 2) || '-' ||
SUBSTR(REPLACE(PARTITIONNAME, 'p', ''), 7, 2))
::TIMESTAMP + INTERVAL '1 DAY',
'YYYYMMDD') ::NUMERIC)
FROM PG_PARTITIONS
WHERE SCHEMANAME IN ('dwdb')
AND PARTITIONNAME LIKE '%p%'
AND PARTITIONNAME NOT LIKE '%-%'
AND LENGTH(PARTITIONNAME) = 9
--AND TABLENAME = 'dw_f_re_st_volte_mgcf_h'
GROUP BY SCHEMANAME || '.' || TABLENAME;
BEGIN
OPEN C_CUR1;
LOOP
FETCH C_CUR1
INTO V_TABLENAME, V_TABLEDATE;
IF NOT FOUND THEN
EXIT;
END IF;
START_END_NUMERIC := TO_CHAR((SUBSTR(V_TABLEDATE, 1, 4) || '-' ||
SUBSTR(V_TABLEDATE, 5, 2) || '-' ||
SUBSTR(V_TABLEDATE, 7, 2))
::TIMESTAMP + INTERVAL '1 DAY',
'YYYYMMDD') ::NUMERIC;
START_TIME_NUMERIC := V_TABLEDATE;
START_TIME_DATE := (SUBSTR(START_TIME_NUMERIC, 1, 4) || '-' ||
SUBSTR(START_TIME_NUMERIC, 5, 2) || '-' ||
SUBSTR(START_TIME_NUMERIC, 7, 2)) ::TIMESTAMP;
END_TIME_DATE := START_TIME_DATE + INTERVAL '1 DAY';
END_TIME_NUMERIC := TO_CHAR((SUBSTR(V_TABLEDATE, 1, 4) || '-' ||
SUBSTR(V_TABLEDATE, 5, 2) || '-' ||
SUBSTR(V_TABLEDATE, 7, 2))
::TIMESTAMP + INTERVAL '1 DAY',
'YYYYMMDD') ::NUMERIC;
BEGIN
LOOP
EXIT WHEN START_TIME_NUMERIC >= START_END_NUMERIC;
V_SQL1 = '
ALTER TABLE ' || V_TABLENAME || ' ADD PARTITION P' || START_TIME_NUMERIC || ' START (' || START_TIME_NUMERIC || ') END (' || END_TIME_NUMERIC || ');
';
EXECUTE V_SQL1;
--INSERT INTO ST_PROCESS_LOG (REMARK) VALUES(V_SQL1);
START_TIME_NUMERIC := TO_CHAR((SUBSTR(START_TIME_NUMERIC, 1, 4) || '-' ||
SUBSTR(START_TIME_NUMERIC, 5, 2) || '-' ||
SUBSTR(START_TIME_NUMERIC, 7, 2))
::TIMESTAMP + INTERVAL '1 DAY',
'YYYYMMDD') ::NUMERIC;
START_TIME_DATE := START_TIME_DATE + INTERVAL '1 DAY';
END_TIME_NUMERIC := TO_CHAR((SUBSTR(START_TIME_NUMERIC, 1, 4) || '-' ||
SUBSTR(START_TIME_NUMERIC, 5, 2) || '-' ||
SUBSTR(START_TIME_NUMERIC, 7, 2))
::TIMESTAMP + INTERVAL '1 DAY',
'YYYYMMDD') ::NUMERIC;
END_TIME_DATE := END_TIME_DATE + INTERVAL '1 DAY';
END LOOP;
COMMIT;
EXCEPTION
WHEN OTHERS THEN
END;
END LOOP;
END;
$BODY$
LANGUAGE plpgsql VOLATILE;
ALTER FUNCTION dwdb.proc_add_partition()
OWNER TO gpadmin;
这个据我所知可能不行。
变通的办法是,这样的逻辑,用 python 实现也不复杂。如果实在想用 sql 做接口,那就用外部表去调用写好的 python 程序。