python 如何快速创建空列表?

from datetime import datetime, timedelta, timezone
import time
import contextlib


@contextlib.contextmanager
def timer(msg: str = None, logger=None):
    if not logger:
        from loguru import logger as loguru_logger
        logger = loguru_logger

    start = time.time()
    yield
    logger.debug(f'{msg}, used {round(time.time() - start,3)} s')


with timer('创建空的容器耗时'):
    matched_metas = [str(i) for i in range(376)]
    dna_search_result_len = 15655

    groupbyed_dna_search_result: dict[str,
                                      list[list[dict]]] = {}

    for video_uuid in matched_metas:
        groupbyed_dna_search_result[video_uuid] = [
            []
            for i in range(dna_search_result_len)
        ]

这个方式「创建空的容器耗时, used 2.108 s」

怎么控制到 0.1s 内?


完整代码如下:

def group_by_dna_search_result(
    dna_search_result: list[list[SampleFrameSearchResult]],
    filter_top_k: int = 5
) -> dict[str, list[list[SampleFrameSearchResult]]]:

    matched_metas: set[str] = set()

    with timer('group_by > 1. 统计所有 video_uuid 耗时'):
        for frame_search_result in dna_search_result:
            for frame_match_result in frame_search_result:
                video_uuid = frame_match_result.video_uuid
                matched_metas.add(video_uuid)
    logger.debug(f'一共有 {len(matched_metas)} 个样本被检出')
    logger.debug(f'一共有 {len(dna_search_result)} 个 frame 被检出')

    groupbyed_dna_search_result: dict[str,
                                      list[list[SampleFrameSearchResult]]] = {}

    with timer('group_by > 2.  创建空的容器'):
        for video_uuid in matched_metas:
            groupbyed_dna_search_result[video_uuid] = [[]
                                                    for i in range(len(dna_search_result))]

    with timer('group_by > 3.   分组'):
        for frame_search_result_index, frame_search_result in enumerate(dna_search_result):
            for frame_match_result in frame_search_result:

                # assert isinstance(frame_match_result, SampleFrameSearchResult)
                groupbyed_dna_search_result[frame_match_result.video_uuid][frame_search_result_index].append(
                    frame_match_result)
                
    with timer('group_by > 4.   过滤'):
        for video_uuid, frame_search_result in groupbyed_dna_search_result.items():
            for frame_search_result_index, frame_match_result in enumerate(frame_search_result):
                groupbyed_dna_search_result[video_uuid][frame_search_result_index] = fist_item(
                    frame_match_result, top_k=filter_top_k)

列表乘法会有 bug

a = [[]]*10

a[0].append('haha')

print(a)

修改任何一个 item

其他的也都被修改了

[['haha'], ['haha'], ['haha'], ['haha'], ['haha'], ['haha'], ['haha'], ['haha'], ['haha'], ['haha']]

from copy import copy

a = [[],[],[]]

b=copy(a)

a[0].append('haha')

print(a)
print(b)

浅拷贝不行

深拷贝可以

但是速度更慢了

阅读 2.2k
1 个回答

减少初始化时间优化一下:

import contextlib
import time

# 定义计时器
@contextlib.contextmanager
def timer(msg: str = None):
    start = time.time()
    yield
    print(f'{msg}, used {round(time.time() - start, 3)} s')

# 定义 matched_metas 和 dna_search_result_len
matched_metas = [str(i) for i in range(376)]
dna_search_result_len = 15655

# 定义原始方法
with timer('创建空的容器耗时 - 原始方法'):
    groupbyed_dna_search_result_original = {}
    for video_uuid in matched_metas:
        groupbyed_dna_search_result_original[video_uuid] = [
            []
            for i in range(dna_search_result_len)
        ]

# 用生成器方法
def empty_list_generator(n):
    for _ in range(n):
        yield []

with timer('创建空的容器耗时 - 生成器方法'):
    groupbyed_dna_search_result_generator = {video_uuid: list(empty_list_generator(dna_search_result_len)) for video_uuid in matched_metas}

# 检查结果
groupbyed_dna_search_result_generator == groupbyed_dna_search_result_original

输出:


创建空的容器耗时 - 原始方法, used 4.41 s
创建空的容器耗时 - 生成器方法, used 3.934 s

RESULT

True
撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题