看到一篇文章nameko 的使用及注意事项 ,中有一段关于发起远程调用的描述
rpc后紧跟的是微服务定义时的类变量 name 的值即为微服务名称,接着紧跟rpc方法,使用 call_async 为异步调用,而调用 result_async.result() 时会等待异步任务返回结果。需要注意的是, 运行 ClusterRpcProxy(config) 时会创建与队列的连接,该操作比较耗时,如果有大量的微服务调用,不应该重复创建连接,应在语句块内完成所有调用。异步调用的结果只能在语句块内获取,即调用 .result() 等待结果。语句块之外连接断开就无法获取了。
大概意思就是说频繁实例化 ClusterRpcProxy
,结合我之前使用在 RabbitMQ 后台看到的奇怪现象,感觉有必要来研究一下:
下面是常见的 nameko
调用方式
api.py
from fastapi import FastAPI
from rpc import (
upload_service_rpc
)
app = FastAPI()
@app.get('/')
async def root():
return {"message": "Hello World"}
@app.post('/upload/')
def upload(data: UploadRequestBody):
logger.debug(data.json(ensure_ascii=False))
success: bool = upload_service_rpc(data) # 此处发起 rpc 调用
return {
'status': success
}
upload_service_rpc
方法是对 nameko
提供的 ClusterRpcProxy
的一层函数包装
rpc.py
from nameko.standalone.rpc import ClusterRpcProxy
import settings
from schemas import (
UploadRequestBody,
)
from loguru import logger
config = {
'AMQP_URI': f'amqp://{settings.AMQP_URI.RABBIT_USER}:'
f'{settings.AMQP_URI.RABBIT_PASSWORD}@{settings.AMQP_URI.RABBIT_HOST}:'
f'{settings.AMQP_URI.RABBIT_PORT}/{settings.AMQP_URI.RABBIT_VHOST}'
}
def upload_service_rpc(data: UploadRequestBody) -> bool:
""" 给 fatapi 暴露的 rpc 接口 """
with ClusterRpcProxy(config) as cluster_rpc: # 通过 ClusterRpcProxy 发起 RPC 请求
success: bool = cluster_rpc.console_service.upload(
data=data.json(ensure_ascii=False)
)
return success
但是上面的写法,看起来很完美,但是 nameko
的实现是每次实例化 ClusterRpcProxy
都会在 RabbitMQ
中创建一个新的 queue
,如果我们每次 rpc
请求都想上面的代码一样频繁实例化 ClusterRpcProxy
会导致大量的时间耗费在创建 queue
上。
下图是 RabbmitMQ
后台管理界面的截图,可以看到当发起多次请求的时候,会出现大量 rpc.reply-standalone_rpc_proxy_{routing_key}
格式的 queue
这些 rpc.reply-standalone_rpc_proxy_{routing_key}
队列会在没有消息之后几秒后被关闭,不会一直存在下去
接下来改造代码:
api.py
import settings
from loguru import logger
from fastapi import FastAPI
from schemas import (
UploadRequestBody
)
from rpc import (
init_rpc_proxy
)
app = FastAPI()
rpc_proxy = init_rpc_proxy() # 把 rpc_proxy 对象变成一个全局变量,生命周期伴随整个程序
@app.post('/upload/')
def upload(data: UploadRequestBody):
logger.debug(data.json(ensure_ascii=False))
success: bool = rpc_proxy.console_service.upload( # 执行 rpc 调用 console_service 的 upload 方法
data=data.json(ensure_ascii=False)
)
return {
'status': success
}
rpc.py
# coding=utf-8
from nameko.standalone.rpc import ClusterRpcProxy
import settings
from schemas import (
UploadRequestBody,
)
from loguru import logger
config = {
'AMQP_URI': f'amqp://{settings.AMQP_URI.RABBIT_USER}:'
f'{settings.AMQP_URI.RABBIT_PASSWORD}@{settings.AMQP_URI.RABBIT_HOST}:'
f'{settings.AMQP_URI.RABBIT_PORT}/{settings.AMQP_URI.RABBIT_VHOST}'
}
def init_rpc_proxy():
return ClusterRpcProxy(config) # init_rpc_proxy 只负责返回对象,不执行代码
但是当我们执行上面的新代码的时候,就报错
AttributeError: 'ClusterRpcProxy' object has no attribute 'console_service'
为什么呢?原因在于 ClusterRpcProxy 类的 __enter__
方法,但我们不使用 with
上下文管理器的时候,就不会执行 __enter__
方法中的内容,而秘密就在 __enter__
方法中,让我们来看看 __enter__
方法中有什么吧!
nameko/standalone/rpc.py
class StandaloneProxyBase(object): # StandaloneProxyBase 是 ClusterRpcProxy 的父类
class ServiceContainer(object):
""" Implements a minimum interface of the
:class:`~containers.ServiceContainer` to be used by the subclasses
and rpc imports in this module.
"""
service_name = "standalone_rpc_proxy"
def __init__(self, config):
self.config = config
self.shared_extensions = {}
class Dummy(Entrypoint):
method_name = "call"
_proxy = None
def __init__(
self, config, context_data=None, timeout=None,
reply_listener_cls=SingleThreadedReplyListener
):
container = self.ServiceContainer(config)
self._worker_ctx = WorkerContext(
container, service=None, entrypoint=self.Dummy,
data=context_data)
self._reply_listener = reply_listener_cls(
timeout=timeout).bind(container)
def __enter__(self):
return self.start()
def __exit__(self, tpe, value, traceback):
self.stop()
def start(self):
self._reply_listener.setup()
return self._proxy
def stop(self):
self._reply_listener.stop()
class ClusterRpcProxy(StandaloneProxyBase):
def __init__(self, *args, **kwargs):
super(ClusterRpcProxy, self).__init__(*args, **kwargs)
self._proxy = ClusterProxy(self._worker_ctx, self._reply_listener)
StandaloneProxyBase
是 ClusterRpcProxy 的父类,可以看到 __enter__
方法执行了 return self.start()
,start 方法返回 return self._proxy
, 而不是常见的 return self
,所以这就导致了我们前面的错误。
知道了问题的原因,改起来就很快了!
api.py
import settings
from loguru import logger
from fastapi import FastAPI
from schemas import (
UploadRequestBody
)
from rpc import (
init_rpc_proxy
)
app = FastAPI()
_rpc_proxy = init_rpc_proxy() # 区分两个 _rpc_proxy 和 rpc_proxy
rpc_proxy = _rpc_proxy.start()
@app.post('/upload/')
def upload(data: UploadRequestBody):
logger.debug(data.json(ensure_ascii=False))
# success: bool = upload_service_rpc2(data)
success: bool = rpc_proxy.console_service.upload( # 使用 rpc_proxy 调用 rpc 方法
data=data.json(ensure_ascii=False)
)
return {
'status': success
}
rpc.py
# coding=utf-8
from nameko.standalone.rpc import ClusterRpcProxy
import settings
from schemas import (
UploadRequestBody,
)
from loguru import logger
config = {
'AMQP_URI': f'amqp://{settings.AMQP_URI.RABBIT_USER}:'
f'{settings.AMQP_URI.RABBIT_PASSWORD}@{settings.AMQP_URI.RABBIT_HOST}:'
f'{settings.AMQP_URI.RABBIT_PORT}/{settings.AMQP_URI.RABBIT_VHOST}'
}
def init_rpc_proxy():
return ClusterRpcProxy(config)
好了,我们看看前后的速度差异:
测试代码:
import requests
data = {
# 隐藏了这部分内容
}
for i in range(20):
response = requests.post('http://localhost:63000/upload/', json=data)
print(response.status_code, response.text)
循环跑 20 次:
修改前:
─➤ time python test_api.py
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
python test_api.py 0.14s user 0.05s system 1% cpu 14.696 total
修改后:
─➤ time python test_api.py
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
200 {"status":true}
python test_api.py 0.14s user 0.05s system 2% cpu 7.271 total
因为避免了每次 RPC 请求都创建一个 queue
,所以速度实现了比较大的提升。
14
秒对比 7
秒,实现了速度的翻倍!
线程安全
但是需要注意的是 ClusterProxy 并不是并发安全的,通过源代码的注释可以看到,A single-threaded RPC proxy to a cluster of services.
nameko/standalone/rpc.py
class ClusterProxy(object):
"""
A single-threaded RPC proxy to a cluster of services. Individual services
are accessed via attributes, which return service proxies. Method calls on
the proxies are converted into RPC calls to the service, with responses
returned directly.
Enables services not hosted by nameko to make RPC requests to a nameko
cluster. It is commonly used as a context manager but may also be manually
started and stopped.
This is similar to the service proxy, but may be uses a single reply queue
for calls to all services, where a collection of service proxies would have
one reply queue per proxy.
*Usage*
As a context manager::
with ClusterRpcProxy(config) as proxy:
proxy.service.method()
proxy.other_service.method()
The equivalent call, manually starting and stopping::
proxy = ClusterRpcProxy(config)
proxy = proxy.start()
proxy.targetservice.method()
proxy.other_service.method()
proxy.stop()
If you call ``start()`` you must eventually call ``stop()`` to close the
connection to the broker.
You may also supply ``context_data``, a dictionary of data to be
serialised into the AMQP message headers, and specify custom worker
context class to serialise them.
When the name of the service is not legal in Python, you can also
use a dict-like syntax::
with ClusterRpcProxy(config) as proxy:
proxy['service-name'].method()
proxy['other-service'].method()
"""
所以,要在多线程或者协程环境下并发使用全局单例 ClusterProxy 的话是不可行的,这个时候我们可以参考数据库的连接池,搞一个 ClusterProxy 的多线程并发安全连接池
参考代码如下:
rpc.py
# coding=utf-8
from nameko.standalone.rpc import ClusterRpcProxy
import settings
import threading
import queue
from nameko.standalone.rpc import (ClusterProxy, ClusterRpcProxy)
config = {
'AMQP_URI': f'amqp://{settings.AMQP_URI.RABBIT_USER}:'
f'{settings.AMQP_URI.RABBIT_PASSWORD}@{settings.AMQP_URI.RABBIT_HOST}:'
f'{settings.AMQP_URI.RABBIT_PORT}/{settings.AMQP_URI.RABBIT_VHOST}'
}
def synchronized(func):
func.__lock__ = threading.Lock()
def lock_func(*args, **kwargs):
with func.__lock__:
return func(*args, **kwargs)
return lock_func
class RpcProxyPool:
queue = queue.Queue()
@synchronized
def get_connection(self) -> ClusterProxy:
if self.queue.empty():
conn = self.create_connection()
self.queue.put(conn)
return self.queue.get()
def init_rpc_proxy(self):
return ClusterRpcProxy(config)
@synchronized
def create_connection(self) -> ClusterProxy:
_rpc_proxy: ClusterRpcProxy = self.init_rpc_proxy()
rpc_proxy: ClusterProxy = _rpc_proxy.start()
return rpc_proxy
@synchronized
def put_connection(self, conn: ClusterProxy) -> bool:
if isinstance(conn, ClusterProxy):
self.queue.put(conn)
return True
return False
api.py
from loguru import logger
from fastapi import FastAPI
from schemas import (
AddStruct
)
from rpc import (
RpcProxyPool
)
app = FastAPI(
)
pool = RpcProxyPool()
@app.get('/')
async def root():
return {"message": "Hello World"}
@app.post('/upload/')
def upload(data: AddStruct):
logger.debug(data.dict())
rpc_proxy = pool.get_connection()
c: int = rpc_proxy.add_service.add(
data.a, data.b
)
pool.put_connection(rpc_proxy)
return {
'r': c
}
写了一个 RpcProxyPool
,无界连接池
流程图大致如下,虽然画的很烂
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。