from datetime import datetime, timedelta, timezone
import time
import contextlib
@contextlib.contextmanager
def timer(msg: str = None, logger=None):
if not logger:
from loguru import logger as loguru_logger
logger = loguru_logger
start = time.time()
yield
logger.debug(f'{msg}, used {round(time.time() - start,3)} s')
with timer('创建空的容器耗时'):
matched_metas = [str(i) for i in range(376)]
dna_search_result_len = 15655
groupbyed_dna_search_result: dict[str,
list[list[dict]]] = {}
for video_uuid in matched_metas:
groupbyed_dna_search_result[video_uuid] = [
[]
for i in range(dna_search_result_len)
]
这个方式「创建空的容器耗时, used 2.108 s」
怎么控制到 0.1s 内?
完整代码如下:
def group_by_dna_search_result(
dna_search_result: list[list[SampleFrameSearchResult]],
filter_top_k: int = 5
) -> dict[str, list[list[SampleFrameSearchResult]]]:
matched_metas: set[str] = set()
with timer('group_by > 1. 统计所有 video_uuid 耗时'):
for frame_search_result in dna_search_result:
for frame_match_result in frame_search_result:
video_uuid = frame_match_result.video_uuid
matched_metas.add(video_uuid)
logger.debug(f'一共有 {len(matched_metas)} 个样本被检出')
logger.debug(f'一共有 {len(dna_search_result)} 个 frame 被检出')
groupbyed_dna_search_result: dict[str,
list[list[SampleFrameSearchResult]]] = {}
with timer('group_by > 2. 创建空的容器'):
for video_uuid in matched_metas:
groupbyed_dna_search_result[video_uuid] = [[]
for i in range(len(dna_search_result))]
with timer('group_by > 3. 分组'):
for frame_search_result_index, frame_search_result in enumerate(dna_search_result):
for frame_match_result in frame_search_result:
# assert isinstance(frame_match_result, SampleFrameSearchResult)
groupbyed_dna_search_result[frame_match_result.video_uuid][frame_search_result_index].append(
frame_match_result)
with timer('group_by > 4. 过滤'):
for video_uuid, frame_search_result in groupbyed_dna_search_result.items():
for frame_search_result_index, frame_match_result in enumerate(frame_search_result):
groupbyed_dna_search_result[video_uuid][frame_search_result_index] = fist_item(
frame_match_result, top_k=filter_top_k)
列表乘法会有 bug
a = [[]]*10
a[0].append('haha')
print(a)
修改任何一个 item
其他的也都被修改了
[['haha'], ['haha'], ['haha'], ['haha'], ['haha'], ['haha'], ['haha'], ['haha'], ['haha'], ['haha']]
from copy import copy
a = [[],[],[]]
b=copy(a)
a[0].append('haha')
print(a)
print(b)
浅拷贝不行
深拷贝可以
但是速度更慢了
减少初始化时间优化一下:
输出: