nameko 避坑笔记：避免频繁实例化 ClusterRpcProxy

看到一篇文章nameko 的使用及注意事项，中有一段关于发起远程调用的描述

rpc后紧跟的是微服务定义时的类变量 name 的值即为微服务名称，接着紧跟rpc方法，使用 call_async 为异步调用，而调用 result_async.result() 时会等待异步任务返回结果。需要注意的是，运行 ClusterRpcProxy(config) 时会创建与队列的连接，该操作比较耗时，如果有大量的微服务调用，不应该重复创建连接，应在语句块内完成所有调用。异步调用的结果只能在语句块内获取，即调用 .result() 等待结果。语句块之外连接断开就无法获取了。

大概意思就是说频繁实例化 ClusterRpcProxy,结合我之前使用在 RabbitMQ 后台看到的奇怪现象，感觉有必要来研究一下：

下面是常见的 nameko 调用方式

api.py

from fastapi import FastAPI
from rpc import (
    upload_service_rpc
)

app = FastAPI()

@app.get('/')
async def root():
    return {"message": "Hello World"}

@app.post('/upload/')
def upload(data: UploadRequestBody):
    logger.debug(data.json(ensure_ascii=False))

    success: bool = upload_service_rpc(data)  # 此处发起 rpc 调用
    return {
        'status': success
    }

upload_service_rpc 方法是对 nameko 提供的 ClusterRpcProxy 的一层函数包装

rpc.py

from nameko.standalone.rpc import ClusterRpcProxy
import settings
from schemas import (
    UploadRequestBody,
)
from loguru import logger

config = {
    'AMQP_URI': f'amqp://{settings.AMQP_URI.RABBIT_USER}:'
                f'{settings.AMQP_URI.RABBIT_PASSWORD}@{settings.AMQP_URI.RABBIT_HOST}:'
                f'{settings.AMQP_URI.RABBIT_PORT}/{settings.AMQP_URI.RABBIT_VHOST}'
}

def upload_service_rpc(data: UploadRequestBody) -> bool:
    """ 给 fatapi 暴露的 rpc 接口 """
    with ClusterRpcProxy(config) as cluster_rpc:   # 通过 ClusterRpcProxy 发起 RPC 请求
        success: bool = cluster_rpc.console_service.upload(
            data=data.json(ensure_ascii=False)
        )
        return success

但是上面的写法，看起来很完美，但是 nameko 的实现是每次实例化 ClusterRpcProxy 都会在 RabbitMQ 中创建一个新的 queue ，如果我们每次 rpc 请求都想上面的代码一样频繁实例化 ClusterRpcProxy 会导致大量的时间耗费在创建 queue 上。

下图是 RabbmitMQ 后台管理界面的截图，可以看到当发起多次请求的时候，会出现大量 rpc.reply-standalone_rpc_proxy_{routing_key} 格式的 queue

这些 rpc.reply-standalone_rpc_proxy_{routing_key} 队列会在没有消息之后几秒后被关闭，不会一直存在下去

接下来改造代码：

api.py

import settings
from loguru import logger
from fastapi import FastAPI
from schemas import (
    UploadRequestBody
)
from rpc import (
    init_rpc_proxy
)

app = FastAPI()


rpc_proxy = init_rpc_proxy()    #  把 rpc_proxy 对象变成一个全局变量，生命周期伴随整个程序


@app.post('/upload/')
def upload(data: UploadRequestBody):
    logger.debug(data.json(ensure_ascii=False))

    success: bool = rpc_proxy.console_service.upload(  # 执行 rpc 调用 console_service 的 upload 方法
        data=data.json(ensure_ascii=False)
    )

    return {
        'status': success
    }

rpc.py

# coding=utf-8

from nameko.standalone.rpc import ClusterRpcProxy
import settings
from schemas import (
    UploadRequestBody,
)
from loguru import logger

config = {
    'AMQP_URI': f'amqp://{settings.AMQP_URI.RABBIT_USER}:'
                f'{settings.AMQP_URI.RABBIT_PASSWORD}@{settings.AMQP_URI.RABBIT_HOST}:'
                f'{settings.AMQP_URI.RABBIT_PORT}/{settings.AMQP_URI.RABBIT_VHOST}'
}


def init_rpc_proxy():
    return ClusterRpcProxy(config) # init_rpc_proxy 只负责返回对象，不执行代码

但是当我们执行上面的新代码的时候，就报错

AttributeError: 'ClusterRpcProxy' object has no attribute 'console_service'

为什么呢？原因在于 ClusterRpcProxy 类的 __enter__ 方法，但我们不使用 with 上下文管理器的时候，就不会执行 __enter__ 方法中的内容，而秘密就在 __enter__ 方法中，让我们来看看 __enter__ 方法中有什么吧！

nameko/standalone/rpc.py

class StandaloneProxyBase(object):   # StandaloneProxyBase 是 ClusterRpcProxy 的父类
    class ServiceContainer(object):
        """ Implements a minimum interface of the
        :class:`~containers.ServiceContainer` to be used by the subclasses
        and rpc imports in this module.
        """
        service_name = "standalone_rpc_proxy"

        def __init__(self, config):
            self.config = config
            self.shared_extensions = {}

    class Dummy(Entrypoint):
        method_name = "call"

    _proxy = None

    def __init__(
        self, config, context_data=None, timeout=None,
        reply_listener_cls=SingleThreadedReplyListener
    ):
        container = self.ServiceContainer(config)

        self._worker_ctx = WorkerContext(
            container, service=None, entrypoint=self.Dummy,
            data=context_data)
        self._reply_listener = reply_listener_cls(
            timeout=timeout).bind(container)

    def __enter__(self):
        return self.start()

    def __exit__(self, tpe, value, traceback):
        self.stop()

    def start(self):
        self._reply_listener.setup()
        return self._proxy

    def stop(self):
        self._reply_listener.stop()

class ClusterRpcProxy(StandaloneProxyBase):
    def __init__(self, *args, **kwargs):
        super(ClusterRpcProxy, self).__init__(*args, **kwargs)
        self._proxy = ClusterProxy(self._worker_ctx, self._reply_listener)

StandaloneProxyBase 是 ClusterRpcProxy 的父类，可以看到 __enter__ 方法执行了 return self.start()，start 方法返回 return self._proxy，而不是常见的 return self，所以这就导致了我们前面的错误。

知道了问题的原因，改起来就很快了！

api.py

import settings
from loguru import logger
from fastapi import FastAPI
from schemas import (
    UploadRequestBody
)
from rpc import (
    init_rpc_proxy
)

app = FastAPI()


_rpc_proxy = init_rpc_proxy()  # 区分两个 _rpc_proxy 和 rpc_proxy
rpc_proxy = _rpc_proxy.start()


@app.post('/upload/')
def upload(data: UploadRequestBody):
    logger.debug(data.json(ensure_ascii=False))

    # success: bool = upload_service_rpc2(data)
    success: bool = rpc_proxy.console_service.upload( # 使用 rpc_proxy 调用 rpc 方法
        data=data.json(ensure_ascii=False)
    )

    return {
        'status': success
    }

rpc.py

# coding=utf-8

from nameko.standalone.rpc import ClusterRpcProxy
import settings
from schemas import (
    UploadRequestBody,
)
from loguru import logger

config = {
    'AMQP_URI': f'amqp://{settings.AMQP_URI.RABBIT_USER}:'
                f'{settings.AMQP_URI.RABBIT_PASSWORD}@{settings.AMQP_URI.RABBIT_HOST}:'
                f'{settings.AMQP_URI.RABBIT_PORT}/{settings.AMQP_URI.RABBIT_VHOST}'
}


def init_rpc_proxy():
    return ClusterRpcProxy(config)

好了，我们看看前后的速度差异：

测试代码：

import requests

data = {
    # 隐藏了这部分内容
}

for i in range(20):
    response = requests.post('http://localhost:63000/upload/', json=data)
    print(response.status_code, response.text)

循环跑 20 次：

修改前：

─➤  time python test_api.py
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
python test_api.py  0.14s user 0.05s system 1% cpu 14.696 total

修改后：

─➤  time python test_api.py
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
python test_api.py  0.14s user 0.05s system 2% cpu 7.271 total

因为避免了每次 RPC 请求都创建一个 queue，所以速度实现了比较大的提升。

14 秒对比 7 秒，实现了速度的翻倍！

线程安全

但是需要注意的是 ClusterProxy 并不是并发安全的，通过源代码的注释可以看到，A single-threaded RPC proxy to a cluster of services.

nameko/standalone/rpc.py

class ClusterProxy(object):
    """
    A single-threaded RPC proxy to a cluster of services. Individual services
    are accessed via attributes, which return service proxies. Method calls on
    the proxies are converted into RPC calls to the service, with responses
    returned directly.

    Enables services not hosted by nameko to make RPC requests to a nameko
    cluster. It is commonly used as a context manager but may also be manually
    started and stopped.

    This is similar to the service proxy, but may be uses a single reply queue
    for calls to all services, where a collection of service proxies would have
    one reply queue per proxy.

    *Usage*

    As a context manager::

        with ClusterRpcProxy(config) as proxy:
            proxy.service.method()
            proxy.other_service.method()

    The equivalent call, manually starting and stopping::

        proxy = ClusterRpcProxy(config)
        proxy = proxy.start()
        proxy.targetservice.method()
        proxy.other_service.method()
        proxy.stop()

    If you call ``start()`` you must eventually call ``stop()`` to close the
    connection to the broker.

    You may also supply ``context_data``, a dictionary of data to be
    serialised into the AMQP message headers, and specify custom worker
    context class to serialise them.

    When the name of the service is not legal in Python, you can also
    use a dict-like syntax::

        with ClusterRpcProxy(config) as proxy:
            proxy['service-name'].method()
            proxy['other-service'].method()

    """

所以，要在多线程或者协程环境下并发使用全局单例 ClusterProxy 的话是不可行的，这个时候我们可以参考数据库的连接池，搞一个 ClusterProxy 的多线程并发安全连接池

参考代码如下：

rpc.py

# coding=utf-8

from nameko.standalone.rpc import ClusterRpcProxy
import settings
import threading
import queue
from nameko.standalone.rpc import (ClusterProxy, ClusterRpcProxy)

config = {
    'AMQP_URI': f'amqp://{settings.AMQP_URI.RABBIT_USER}:'
                f'{settings.AMQP_URI.RABBIT_PASSWORD}@{settings.AMQP_URI.RABBIT_HOST}:'
                f'{settings.AMQP_URI.RABBIT_PORT}/{settings.AMQP_URI.RABBIT_VHOST}'
}


def synchronized(func):

    func.__lock__ = threading.Lock()

    def lock_func(*args, **kwargs):
        with func.__lock__:
            return func(*args, **kwargs)
    return lock_func


class RpcProxyPool:
    queue = queue.Queue()

    @synchronized
    def get_connection(self) -> ClusterProxy:
        if self.queue.empty():
            conn = self.create_connection()
            self.queue.put(conn)
        return self.queue.get()

    def init_rpc_proxy(self):
        return ClusterRpcProxy(config)

    @synchronized
    def create_connection(self) -> ClusterProxy:
        _rpc_proxy: ClusterRpcProxy = self.init_rpc_proxy()
        rpc_proxy: ClusterProxy = _rpc_proxy.start()

        return rpc_proxy

    @synchronized
    def put_connection(self, conn: ClusterProxy) -> bool:
        if isinstance(conn, ClusterProxy):
            self.queue.put(conn)
            return True
        return False

api.py

from loguru import logger
from fastapi import FastAPI
from schemas import (
    AddStruct
)
from rpc import (
    RpcProxyPool
)

app = FastAPI(
)

pool = RpcProxyPool()


@app.get('/')
async def root():
    return {"message": "Hello World"}


@app.post('/upload/')
def upload(data: AddStruct):
    logger.debug(data.dict())

    rpc_proxy = pool.get_connection()

    c: int = rpc_proxy.add_service.add(
        data.a, data.b
    )
    pool.put_connection(rpc_proxy)

    return {
        'r': c
    }

写了一个 RpcProxyPool ，无界连接池

流程图大致如下，虽然画的很烂

图片.png

nameko 避坑笔记：避免频繁实例化 ClusterRpcProxy

线程安全

universe_king

引用和评论

apple silicon 的 mac 上有哪些好用的安卓 app 模拟器？

前端微服务跨域配置解决办法，devServer为例

🔥全程不用写代码，我用 AI 程序员写了一个飞机大战

python与nodejs哪个性能高

🔥吐血整理 Bolt.diy 部署与应用攻略

Anaconda安装教程以及Anaconda和pip配置国内镜像

【Docker】基本概念及语法与环境搭建