飞桨x昇腾生态适配方案：13_API离线推理

ais_bench提供的python API可供使能基于昇腾硬件的离线模型(.om模型)推理。具体介绍可参考API_GUIDE
下面列举几个常用的API推理场景使用方法。

静态API推理

单个om推理

api_1.py

import cv2
import numpy as np
from ais_bench.infer.interface import InferSession
from ais_bench.infer.common.utils import logger_print


def infer_api_static():
    device_id = 0
    model_path = '/home/w30067200/paddle_test/PaddleOCR/inference/om/inference.om'
    image_path = '/home/w30067200/paddle_test/PaddleOCR/test/general_ocr_002.png'
    image = cv2.imread(image_path)

    # create session of om model for inference
    session = InferSession(device_id, model_path)

    # create new numpy data according inputs info
    shape0 = session.get_inputs()[0].shape
    print(shape0)
    height, width = shape0[2], shape0[3]
    resized_image = cv2.resize(image, (width, height))
    image_array = np.array(resized_image).astype(np.float32)

    feeds = [image_array]

    # execute inference, inputs is ndarray list and outputs is ndarray list
    outputs = session.infer(feeds, mode='static')
    print(outputs[0].shape)

    np.set_printoptions(threshold=np.inf)
    print(outputs1[0])

    # cv2.imwrite('output.png', outputs1[0])
    logger_print("outputs: %s" % outputs)

    # free model resource and device context of session
    session0.free_resource()


infer_api_static()

多个om推理

这里需要注意的是需要将前一个om推理结果输出的shape转化为下一个om推理模型输入的shape：
api_2.py

import cv2
import numpy as np
from ais_bench.infer.interface import InferSession
from ais_bench.infer.common.utils import logger_print


def infer_api_static():
    device_id = 0
    model_path0 = './det.om'
    model_path = './rec.om'
    image_path = './general_ocr_002.png'
    image = cv2.imread(image_path)

    # create session of om model for inference
    session0 = InferSession(device_id, model_path0)
    session = InferSession(device_id, model_path)

    # create new numpy data according inputs info
    shape0 = session0.get_inputs()[0].shape
    shape1 = session.get_inputs()[0].shape
    print("shape0:",shape0)
    print("shape1:",shape1)
    height, width = shape0[2], shape0[3]
    resized_image = cv2.resize(image, (width, height))
    image_array = np.array(resized_image).astype(np.float32)

    feeds = [image_array]

    # execute inference, inputs is ndarray list and outputs is ndarray list
    outputs = session0.infer(feeds, mode='static')
    print("outputs[0].shape:",outputs[0].shape)
    # det.om推理结果输出的shape [1, 1, 640, 480] 转化为下一个rec.om推理模型输入的shape[1, 3, 48, 320]
    height, width = shape1[2], shape1[3]
    arr = np.repeat(outputs[0], 3, axis=1)
    arr = arr.transpose(0, 2, 3, 1)  # [1, 224, 224, 3]

    resized = cv2.resize(arr[0], (width, height), interpolation=cv2.INTER_LINEAR)


    resized = resized[np.newaxis, ...].transpose(0, 3, 1, 2)  # [1, 3, 48, 680]

    '''
    resized_image1 = cv2.resize(arr, (width, height))
    image_array1 = np.array(resized_image1).astype(np.float32)
    '''
    feeds = [resized]
    print("feeds[0].shape",feeds[0].shape)
    # execute inference, inputs is ndarray list and outputs is ndarray list
    outputs1 = session.infer(feeds, mode='static')
    np.set_printoptions(threshold=np.inf)
    # print(outputs1[0].shape)

    # cv2.imwrite('output.png', outputs1[0])
    # logger_print("outputs1: %s" % outputs1)

    # free model resource and device context of session
    session0.free_resource()


infer_api_static()

执行结果如下所示：
01_静态API推理结果

动态API推理

多个om推理

import onnx
import numpy as np
from ais_bench.infer.interface import InferSession



def infer_api_dymshape():
    model_path0 = './model_dest_linux_x86_64.om'
    model_path1 = './mode_loop_input2_i_cond_linux_x86_64.om'

    # Load the model
    device_id = 1
    session0 = InferSession(device_id, model_path0)
    session1 = InferSession(device_id, model_path1)

    # 假设 batch 大小为 4
    batch = 4

    # 生成全 1 的 input1
    input1 = np.ones((batch, 16, 32), dtype=np.float32)

    # 生成全 2 的 input2
    input2 = np.full((batch, 16, 32), 2, dtype=np.float32)

    # print("input1 的形状:", input1.shape)
    # print("input2 的形状:", input2.shape)

    input = [input1, input2]
    # 执行model_dest_linux_x86_64.om推理
    output0 = session0.infer(input, mode='dymshape')
    input3 = output0[0]
    cnts = output0[1]

    for i in range(cnts):
        inputs = {"x.13": input3, "input2": input2}
        # 执行mode_loop_input2_i_cond_linux_x86_64.om 推理
        input3 = session1.infer(inputs, mode='dymshape')
    print("input3", input3)

    # free model resource and device context of session
    session0.free_resource()
    session1.free_resource()

infer_api_dymshape()

执行结果：
02_动态API推理结果

飞桨x昇腾生态适配方案：13_API离线推理

静态API推理

单个om推理

多个om推理

动态API推理

多个om推理

讲道义的遥控器

引用和评论

飞桨x昇腾生态适配方案：00_整体方案介绍

Open WebUI：开源AI交互平台的全面解析

大模型中的Token究竟是什么？从原理到作用深度解析

一文掌握 MCP 上下文协议：从理论到实践

MySQL × 向量数据库：大模型时代的黄金组合实战指南

AdventureX 2025 正式启动：五天四夜，120小时极限创造！一起在杭州点燃青年创新之火！

大模型时代，后端程序员如何避免被AI卷死？