前言
技术栈
Python 3.11.8 websockets 15.0.1 aliyun-python-sdk-core 2.16.0 nls 1.0.0
- 截至
2025.3.13
,nls.NlsSpeechTranscriber
不支持异步调用 - 使用
asyncio.run
或loop.create_task
将异步调用转化为同步调用 后文中,为保持字节流和字符串的一致性,定义了不同格式的结束符
b'$END$' client -> server '$END$' server -> client
材料准备
- 从 GitHub
下载 nls 目录
和tests 目录里面的 test1.pcm
文件 - 将
nls
目录放到site-packages
目录下,相当于安装nls
库
示例代码
server 端代码(
sync
同步版本)# encoding: utf-8 # author: qbit # date: 2025-03-13 # summary: websocket 服务端结束语言识别请求,调用阿里云返回识别的文字结果 import time import json import nls from websockets.sync.server import serve URL = "wss://nls-gateway-cn-shanghai.aliyuncs.com/ws/v1" TOKEN = "your_token" # 参考 https://help.aliyun.com/document_detail/450255.html 获取token APPKEY = "your_key" # 获取Appkey请前往控制台:https://nls-portal.console.aliyun.com/applist class FuncCallback: r"""nls.NlsSpeechTranscriber 需要的回调函数""" def __init__(self, ): self.result = "" self.state = "" def on_sentence_begin(self, message, *args): print("on_sentence_begin") def on_sentence_end(self, message, *args): self.result = json.loads(message)["payload"]["result"] self.state = "sentence_end" print(f"on_sentence_end: {self.result}") websocket = args[0] websocket.send(self.result) def on_start(self, message, *args): print("on_start") def on_error(self, message, *args): print(f"on_error from aliyun {message}") self.state = "error" def on_close(self, *args): print("on_close") def on_result_chg(self, message, *args): self.result = json.loads(message)["payload"]["result"] self.state = "result_chg" print(f"on_result_chg: {self.result}") websocket = args[0] websocket.send(self.result) def on_completed(self, message, *args): print("on_completed") self.state = "completed" def asr(websocket): r"""语言识别服务,Automatic Speech Recognition (ASR)""" print( f"Client connected: {websocket.remote_address[0]}:{websocket.remote_address[1]}" ) callback = FuncCallback() sr = nls.NlsSpeechTranscriber( url=URL, token=TOKEN, appkey=APPKEY, on_sentence_begin=callback.on_sentence_begin, on_sentence_end=callback.on_sentence_end, on_start=callback.on_start, on_result_changed=callback.on_result_chg, on_completed=callback.on_completed, on_error=callback.on_error, on_close=callback.on_close, callback_args=[websocket], ) sr.start( aformat="pcm", # 支持格式 pcm, opu, opus enable_intermediate_result=True, enable_punctuation_prediction=True, enable_inverse_text_normalization=True, ) for message in websocket: if message == b"$END$": break sr.send_audio(message) sr.stop() while callback.state not in ('completed', 'error'): time.sleep(0.1) print(f"callback.state: {callback.state}") websocket.send("$END$") # 发送自定义结束标志 def main(): host = "127.0.0.1" port = 8765 with serve(asr, host, port) as server: print(f"Server started {host}:{port}...") server.serve_forever() if __name__ == "__main__": main()
client 端代码
# encoding: utf-8 # author: qbit # date: 2025-03-13 # summary: websocket 客户端模拟实时语音请求 import threading from websockets.sync.client import connect def send(websocket): r""" 测试语言识别 Automatic Speech Recognition (ASR) """ with open("./audio/test1.pcm", "rb") as f: data = f.read() __slices = zip(*(iter(data),) * 6400) for i in __slices: websocket.send(bytes(i)) websocket.send(b"$END$") # 发送自定义结束标志 def recv(websocket): r""" 测试语言识别 Automatic Speech Recognition (ASR) """ while True: message = websocket.recv() print(f"Received message: {message}") if message == "$END$": break if __name__ == "__main__": uri = "ws://localhost:8765" print(f"Connecting to {uri}...") with connect(uri) as websocket: t1 = threading.Thread(target=send, args=(websocket,)) t2 = threading.Thread(target=recv, args=(websocket,)) t1.start() t2.start() t1.join() t2.join()
运行与输出
- 先启动 server 代码,再启动 client 代码
server 输出
Server started 127.0.0.1:8765... Client connected: 127.0.0.1:53387 on_start on_sentence_begin on_result_chg: 因 on_result_chg: 一 on_result_chg: 12 on_result_chg: 123 on_result_chg: 1234 on_result_chg: 12345 on_result_chg: 123456 on_result_chg: 1234567 on_result_chg: 12345678 on_result_chg: 1 2 3 4 5 6 7 8 9 10 on_sentence_end: 1 2 3 4 5 6 7 8 9 10。 on_completed on_close
client 输出
Connecting to ws://localhost:8765... Received message: 因 Received message: 一 Received message: 12 Received message: 123 Received message: 1234 Received message: 12345 Received message: 123456 Received message: 1234567 Received message: 12345678 Received message: 1 2 3 4 5 6 7 8 9 10 Received message: 1 2 3 4 5 6 7 8 9 10。 Received message: $END$
async 异步版 Server 代码
- 本节的异步版本代码,可以与 fastapi.WebSocket 结合使用
asyncio.run
版本# encoding: utf-8 # author: qbit # date: 2025-03-13 # summary: websocket 服务端结束语言识别请求,调用阿里云返回识别的文字结果 import json import nls import asyncio from websockets.asyncio.server import serve URL = "wss://nls-gateway-cn-shanghai.aliyuncs.com/ws/v1" TOKEN = "your_token" # 参考 https://help.aliyun.com/document_detail/450255.html 获取token APPKEY = "your_key" # 获取Appkey请前往控制台:https://nls-portal.console.aliyun.com/applist class FuncCallback: r"""nls.NlsSpeechTranscriber 需要的回调函数""" def __init__(self, ): self.result = "" self.state = "" def on_sentence_begin(self, message, *args): print("on_sentence_begin") def on_sentence_end(self, message, *args): self.result = json.loads(message)["payload"]["result"] self.state = "sentence_end" print(f"on_sentence_end: {self.result}") websocket = args[0] asyncio.run(websocket.send(self.result)) # 将异步调用转化为同步调用 def on_start(self, message, *args): print("on_start") def on_error(self, message, *args): print(f"on_error from aliyun {message}") self.state = "error" def on_close(self, *args): print("on_close") def on_result_chg(self, message, *args): self.result = json.loads(message)["payload"]["result"] self.state = "result_chg" print(f"on_result_chg: {self.result}") websocket = args[0] asyncio.run(websocket.send(self.result)) # 将异步调用转化为同步调用 def on_completed(self, message, *args): print("on_completed") self.state = "completed" async def asr(websocket): callback = FuncCallback() sr = nls.NlsSpeechTranscriber( url=URL, token=TOKEN, appkey=APPKEY, on_sentence_begin=callback.on_sentence_begin, on_sentence_end=callback.on_sentence_end, on_start=callback.on_start, on_result_changed=callback.on_result_chg, on_completed=callback.on_completed, on_error=callback.on_error, on_close=callback.on_close, callback_args=[websocket], ) sr.start( aformat="pcm", enable_intermediate_result=True, enable_punctuation_prediction=True, enable_inverse_text_normalization=True, ) async for message in websocket: if message == b"$END$": break sr.send_audio(message) sr.stop() while callback.state not in ("completed", "error"): await asyncio.sleep(0.1) print(f"callback.state: {callback.state}") await websocket.send("$END$") # 发送自定义结束标志 async def main(): host = "127.0.0.1" port = 8765 async with serve(asr, host, port) as server: print(f"Server started {host}:{port}...") await server.serve_forever() if __name__ == "__main__": asyncio.run(main())
因为
asyncio.run
新开了事件循环有些重,花了不少时间弄出下面的loop.create_task
版本# encoding: utf-8 # author: qbit # date: 2025-03-14 # summary: websocket 服务端结束语言识别请求,调用阿里云返回识别的文字结果 import json import nls import asyncio from websockets.asyncio.server import serve URL = "wss://nls-gateway-cn-shanghai.aliyuncs.com/ws/v1" TOKEN = "your_token" # 参考 https://help.aliyun.com/document_detail/450255.html 获取token APPKEY = "your_key" # 获取Appkey请前往控制台:https://nls-portal.console.aliyun.com/applist class FuncCallback: r"""nls.NlsSpeechTranscriber 需要的回调函数""" def __init__(self): self.result = "" self.state = "" def on_sentence_begin(self, message, *args): print("on_sentence_begin") def on_sentence_end(self, message, *args): self.result = json.loads(message)["payload"]["result"] self.state = "sentence_end" print(f"on_sentence_end: {self.result}") websocket = args[0] loop: asyncio.windows_events.ProactorEventLoop = args[1] loop.create_task(websocket.send(self.result)) def on_start(self, message, *args): print("on_start") def on_error(self, message, *args): print(f"on_error from aliyun {message}") self.state = "error" def on_close(self, *args): print("on_close") def on_result_chg(self, message, *args): self.result = json.loads(message)["payload"]["result"] self.state = "result_chg" print(f"on_result_chg: {self.result}") websocket = args[0] loop: asyncio.windows_events.ProactorEventLoop = args[1] loop.create_task(websocket.send(self.result)) def on_completed(self, message, *args): print("on_completed") self.state = "completed" async def asr(websocket): callback = FuncCallback() # loop = asyncio.get_running_loop() loop = asyncio.get_event_loop() sr = nls.NlsSpeechTranscriber( url=URL, token=TOKEN, appkey=APPKEY, on_sentence_begin=callback.on_sentence_begin, on_sentence_end=callback.on_sentence_end, on_start=callback.on_start, on_result_changed=callback.on_result_chg, on_completed=callback.on_completed, on_error=callback.on_error, on_close=callback.on_close, callback_args=[websocket, loop], ) sr.start( aformat="pcm", # 支持格式 pcm, opu, opus enable_intermediate_result=True, enable_punctuation_prediction=True, enable_inverse_text_normalization=True, ) async for message in websocket: if message == b"$END$": break sr.send_audio(message) sr.stop() # 下一行代码很重要!没有的话 FuncCallback 里面的 loop.create_task 不会被触发 await asyncio.sleep(0) while callback.state not in ("completed", "error"): await asyncio.sleep(0.1) print(f"callback.state: {callback.state}") await websocket.send("$END$") # 发送自定义结束标志 async def main(): host = "127.0.0.1" port = 8765 async with serve(asr, host, port) as server: print(f"Server started {host}:{port}...") await server.serve_forever() if __name__ == "__main__": asyncio.run(main())
相关阅读
- 阿里云实时语音识别官方文档 https://help.aliyun.com/zh/isi/developer-reference/sdk-for-py...
- websockets GitHub 仓库:https://github.com/python-websockets/websockets
本文出自 qbit sanp
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。