原文链接
一、部署openai-edge-tts
该服务提供了一个本地的、与 OpenAI 兼容的文本转语音 (TTS) API
。它模拟了 OpenAI TTS 端点 (
edge-tts
),使用户能够像 OpenAI API 一样,使用各种语音选项和播放速度从文本生成语音。
/v1/audio/speech
使用 Microsoft Edge 的在线文本转语音服务,因此完全免费。
edge-tts
1、从 Docker Hub 拉取最新的 Docker 镜像
docker pull travisvn/openai-edge-tts:latest
2、启动容器服务
docker run -d -p 5050:5050 -e API_KEY=your_api_key_here -e PORT=5050 travisvn/openai-edge-tts:latest
二、部署Jimeng AI Free 服务
支持即梦超强图像生成能力(目前官方每日赠送 66 积分,可生成 66 次),零配置部署,多路 token 支持。
与 OpenAI 接口完全兼容。
1、接入准备
从 即梦 获取 sessionid
进入即梦登录账号,然后 F12 打开开发者工具,从 Application > Cookies 中找到
的值,这将作为 Authorization 的 Bearer Token 值:
sessionid
Authorization: Bearer sessionid
2、Docker部署
拉取镜像并启动服务:
docker run -it -d --init --name jimeng-free-api -p 8000:8000 -e TZ=Asia/Shanghai vinlic/jimeng-free-api:latest
查看服务实时日志
docker logs -f jimeng-free-api
重启服务
docker restart jimeng-free-api
停止服务
docker stop jimeng-free-api
三、python应用实例
该实例通过封装openai-edge-tts和Jimeng AI Free 服务形成本地API接口,供外部访问。当用户传入提示词后,接口服务调用Jimeng AI Free生成符合提示词要求的图片,同时将提示词转换成音频文件;然后以图片URL和音频URL的形式提供给调用者。
python代码:
from fastapi import FastAPI, HTTPException from fastapi.staticfiles import StaticFiles from pydantic import BaseModel from openai import OpenAI import logging import time import requests from typing import List import uvicorn import configparser import os import datetime import random app = FastAPI() # 读取配置文件 config = configparser.ConfigParser() config.read('config.ini', encoding='utf-8') # 图像生成配置 image_generation_url = config.get('DEFAULT', 'image_generation_url') image_api_key = config.get('DEFAULT', 'image_api_key') # 音频生成配置 tts_model = config.get('DEFAULT', 'tts_model') tts_voice = config.get('DEFAULT', 'tts_voice') speed = config.get('DEFAULT', 'speed') response_format = config.get('DEFAULT', 'response_format') server_url = config.get('DEFAULT', 'server_url', fallback=f"http://192.168.1.12:8087") # TTS 配置 api_key = config.get('edgetts', 'openai_api_key') base_url = config.get('edgetts', 'openai_base_url') output_path = config.get('edgetts', 'output_path') # 确保输出目录存在 os.makedirs(output_path, exist_ok=True) # 挂载静态文件目录 app.mount("/files", StaticFiles(directory=output_path), name="files") # 初始化 OpenAI 客户端 client = OpenAI(api_key=api_key, base_url=base_url) # 定义请求体模型 class Item(BaseModel): description: str prompt: str text_snippet: str importance: int # 设置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) @app.post("/process-data/") async def process_data(item: Item): try: logger.info(f"开始处理项目: {item.description}") # 生成图像 logger.info("调用图像生成API") start_time_image = time.time() image_url = await generate_image(item.prompt) elapsed_time_image = time.time() - start_time_image logger.info(f"图像生成完成,耗时: {elapsed_time_image:.2f}秒") # 生成音频 logger.info("调用音频生成API") start_time_audio = time.time() audio_url = await generate_tts(item.text_snippet) elapsed_time_audio = time.time() - start_time_audio logger.info(f"音频生成完成,耗时: {elapsed_time_audio:.2f}秒") return { "description": item.text_snippet, "image_url": image_url, "audio_url": audio_url, } except Exception as e: logger.error(f"处理失败: {str(e)}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) def generate_timestamp_filename(extension='mp3'): """生成带时间戳的唯一文件名""" timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") random_number = random.randint(1000, 9999) return f"{timestamp}_{random_number}.{extension}" def save_audio_file(audio_content, output_dir): """保存音频文件到本地""" filename = generate_timestamp_filename() file_path = os.path.join(output_dir, filename) with open(file_path, 'wb') as file: file.write(audio_content) return filename, file_path async def generate_image(prompt: str): """生成图像""" headers = { "Authorization": f"Bearer {image_api_key}", "Content-Type": "application/json", } data = { "model": "jimeng-3.0", "prompt": prompt, "negativePrompt": "", "width": 1024, "height": 1024, "sample_strength": 0.5, } try: response = requests.post(image_generation_url, headers=headers, json=data, timeout=120) response.raise_for_status() result = response.json() # 记录API响应状态 logger.info(f"图像API响应: {response.status_code}") return result["data"][0]["url"] except requests.exceptions.RequestException as e: logger.error(f"图像API请求失败: {str(e)}") raise Exception(f"图像生成API请求失败: {e}") except (KeyError, IndexError) as e: logger.error(f"图像API响应解析失败: {str(e)}") raise Exception(f"图像API响应解析失败: {e}") async def generate_tts(text_snippet: str): """生成TTS音频并保存到本地""" try: # 创建语音 response = client.audio.speech.create( model=tts_model, input=text_snippet, voice=tts_voice, response_format=response_format, speed=float(speed)) # 保存音频文件 filename, local_path = save_audio_file(response.content, output_path) logger.info(f"音频文件保存到本地: {local_path}") # 生成HTTP访问URL audio_url = f"{server_url}/files/{filename}" logger.info(f"生成文件访问URL: {audio_url}") return audio_url except Exception as e: logger.error(f"TTS生成失败: {str(e)}", exc_info=True) raise HTTPException(status_code=500, detail=f"TTS生成失败: {str(e)}") @app.post("/make_AI_picture_audio/") async def make_ai_picture_audio(data: List[Item]): """批量处理图片和音频生成""" try: logger.info(f"开始批量处理 {len(data)} 个项目") results = [] for index, item in enumerate(data): logger.info(f"处理项目 {index + 1}/{len(data)}: {item.description}") result = await process_data(item) results.append(result) logger.info(f"批量处理完成,成功 {len(results)} 个项目") return results except Exception as e: logger.error(f"批量处理失败: {str(e)}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8087)
配置文件:config.ini
[DEFAULT] image_generation_url=http://192.168.1.60:8000/v1/images/generations image_api_key = XXXXXX audio_model = FunAudioLLM/CosyVoice2-0.5B audio_voice = FunAudioLLM/CosyVoice2-0.5B:david tts_model= tts-1 tts_voice= zh-CN-XiaoxiaoNeural speed= 1.0 response_format=mp3 [edgetts] openai_api_key=your_api_key_here openai_base_url=http://192.168.1.60:5050/v1 output_path=D: mp
© 版权声明
文章版权归作者所有,未经允许请勿转载。
相关文章
暂无评论...