我们用fastapi写一个接口,使用funasr将语音识别为文字
1. 安装依赖
首先,确保你已经安装了以下依赖:
pip install fastapi uvicorn funasr torch torchaudio
2. 编写fastapi接口
创建一个Python文件(例如main.py),并编写以下代码:
import base64
import os
from fastapi import FastAPI, File, UploadFile, HTTPException
from funasr import AutoModel
from pydantic import BaseModel
# 初始化FastAPI应用
app = FastAPI()
# 加载FunASR模型
model = AutoModel(model="paraformer-zh")
# 定义请求体模型
class ASRResponse(BaseModel):
text: str
# 定义接口
@app.post("/asr", response_model=ASRResponse)
async def asr(file: UploadFile = File(...)):
try:
# 保存上传的音频文件
file_path = "temp.wav"
with open(file_path, "wb") as f:
f.write(await file.read())
# 使用FunASR进行语音识别
result = model.generate(input=file_path)
# 删除临时文件
os.remove(file_path)
# 返回识别结果
return {"text": result[0]['text']}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# 启动服务
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
import os
from fastapi import FastAPI, File, UploadFile, HTTPException
from funasr import AutoModel
from pydantic import BaseModel
# 初始化FastAPI应用
app = FastAPI()
# 加载FunASR模型
model = AutoModel(model="paraformer-zh")
# 定义请求体模型
class ASRResponse(BaseModel):
text: str
# 定义接口
@app.post("/asr", response_model=ASRResponse)
async def asr(file: UploadFile = File(...)):
try:
# 保存上传的音频文件
file_path = "temp.wav"
with open(file_path, "wb") as f:
f.write(await file.read())
# 使用FunASR进行语音识别
result = model.generate(input=file_path)
# 删除临时文件
os.remove(file_path)
# 返回识别结果
return {"text": result[0]['text']}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# 启动服务
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
3. 运行服务
在终端中运行以下命令启动服务:
python main.py