Skip to content

Commit 7b1abec

Browse files
authored
Merge pull request #17 from xming521/astrBot
更新readme
2 parents ada7bca + 7fa6ee4 commit 7b1abec

File tree

8 files changed

+431
-132
lines changed

8 files changed

+431
-132
lines changed

README.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
## 核心功能✨
44
- 💬 使用微信聊天记录微调LLM
55
- 🎙️ 使用微信语音消息➕0.5B大模型实现高质量声音克隆 👉[WeClone-audio](https://github.com/xming521/WeClone/tree/master/WeClone-audio)
6-
- 🔗 绑定到微信机器人,实现自己的数字分身
6+
- 🔗 绑定到微信、QQ、Telegram、企微、飞书机器人,实现自己的数字分身
77

88
## 特性与说明📋
99

@@ -197,4 +197,15 @@ python ./src/wechat_bot/main.py
197197
<br>
198198
<br>
199199

200+
## ⭐ Star History
201+
> [!TIP]
202+
> 如果本项目对您有帮助,或者您关注本项目的未来发展,请给项目 Star,谢谢
203+
204+
<div align="center">
205+
206+
[![Star History Chart](https://api.star-history.com/svg?repos=xming521/WeClone&type=Date)](https://www.star-history.com/#xming521/WeClone&Date)
207+
208+
</div>
209+
210+
200211
<div align="center"> 克隆我们,保留那灵魂的芬芳 </div>
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
API_KEY=your_api_key_here
2+
PORT=5050
3+
4+
DEFAULT_VOICE=en-US-AvaNeural
5+
DEFAULT_RESPONSE_FORMAT=mp3
6+
DEFAULT_SPEED=1.0
7+
8+
DEFAULT_LANGUAGE=en-US
9+
10+
REQUIRE_API_KEY=True
11+
12+
REMOVE_FILTER=False
13+
14+
EXPAND_API=True
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import re
2+
import emoji
3+
4+
def prepare_tts_input_with_context(text: str) -> str:
5+
"""
6+
Prepares text for a TTS API by cleaning Markdown and adding minimal contextual hints
7+
for certain Markdown elements like headers. Preserves paragraph separation.
8+
9+
Args:
10+
text (str): The raw text containing Markdown or other formatting.
11+
12+
Returns:
13+
str: Cleaned text with contextual hints suitable for TTS input.
14+
"""
15+
16+
# Remove emojis
17+
text = emoji.replace_emoji(text, replace='')
18+
19+
# Add context for headers
20+
def header_replacer(match):
21+
level = len(match.group(1)) # Number of '#' symbols
22+
header_text = match.group(2).strip()
23+
if level == 1:
24+
return f"Title — {header_text}\n"
25+
elif level == 2:
26+
return f"Section — {header_text}\n"
27+
else:
28+
return f"Subsection — {header_text}\n"
29+
30+
text = re.sub(r"^(#{1,6})\s+(.*)", header_replacer, text, flags=re.MULTILINE)
31+
32+
# Announce links (currently commented out for potential future use)
33+
# text = re.sub(r"\[([^\]]+)\]\((https?:\/\/[^\)]+)\)", r"\1 (link: \2)", text)
34+
35+
# Remove links while keeping the link text
36+
text = re.sub(r"\[([^\]]+)\]\([^\)]+\)", r"\1", text)
37+
38+
# Describe inline code
39+
text = re.sub(r"`([^`]+)`", r"code snippet: \1", text)
40+
41+
# Remove bold/italic symbols but keep the content
42+
text = re.sub(r"(\*\*|__|\*|_)", '', text)
43+
44+
# Remove code blocks (multi-line) with a description
45+
text = re.sub(r"```([\s\S]+?)```", r"(code block omitted)", text)
46+
47+
# Remove image syntax but add alt text if available
48+
text = re.sub(r"!\[([^\]]*)\]\([^\)]+\)", r"Image: \1", text)
49+
50+
# Remove HTML tags
51+
text = re.sub(r"</?[^>]+(>|$)", '', text)
52+
53+
# Normalize line breaks
54+
text = re.sub(r"\n{2,}", '\n\n', text) # Ensure consistent paragraph separation
55+
56+
# Replace multiple spaces within lines
57+
text = re.sub(r" {2,}", ' ', text)
58+
59+
# Trim leading and trailing whitespace from the whole text
60+
text = text.strip()
61+
62+
return text
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
flask
2+
gevent
3+
python-dotenv
4+
edge-tts
5+
emoji
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
# server.py
2+
3+
from flask import Flask, request, send_file, jsonify
4+
from gevent.pywsgi import WSGIServer
5+
from dotenv import load_dotenv
6+
import os
7+
8+
from handle_text import prepare_tts_input_with_context
9+
from tts_handler import generate_speech, get_models, get_voices
10+
from utils import getenv_bool, require_api_key, AUDIO_FORMAT_MIME_TYPES
11+
12+
app = Flask(__name__)
13+
load_dotenv()
14+
15+
API_KEY = os.getenv('API_KEY', 'your_api_key_here')
16+
PORT = int(os.getenv('PORT', 5050))
17+
18+
DEFAULT_VOICE = os.getenv('DEFAULT_VOICE', 'en-US-AvaNeural')
19+
DEFAULT_RESPONSE_FORMAT = os.getenv('DEFAULT_RESPONSE_FORMAT', 'mp3')
20+
DEFAULT_SPEED = float(os.getenv('DEFAULT_SPEED', 1.0))
21+
22+
REMOVE_FILTER = getenv_bool('REMOVE_FILTER', False)
23+
EXPAND_API = getenv_bool('EXPAND_API', True)
24+
25+
# DEFAULT_MODEL = os.getenv('DEFAULT_MODEL', 'tts-1')
26+
27+
@app.route('/v1/audio/speech', methods=['POST'])
28+
@app.route('/audio/speech', methods=['POST']) # Add this line for the alias
29+
@require_api_key
30+
def text_to_speech():
31+
data = request.json
32+
if not data or 'input' not in data:
33+
return jsonify({"error": "Missing 'input' in request body"}), 400
34+
35+
text = data.get('input')
36+
37+
if not REMOVE_FILTER:
38+
text = prepare_tts_input_with_context(text)
39+
40+
# model = data.get('model', DEFAULT_MODEL)
41+
voice = data.get('voice', DEFAULT_VOICE)
42+
43+
response_format = data.get('response_format', DEFAULT_RESPONSE_FORMAT)
44+
speed = float(data.get('speed', DEFAULT_SPEED))
45+
46+
mime_type = AUDIO_FORMAT_MIME_TYPES.get(response_format, "audio/mpeg")
47+
48+
# Generate the audio file in the specified format with speed adjustment
49+
output_file_path = generate_speech(text, voice, response_format, speed)
50+
51+
# Return the file with the correct MIME type
52+
return send_file(output_file_path, mimetype=mime_type, as_attachment=True, download_name=f"speech.{response_format}")
53+
54+
@app.route('/v1/models', methods=['GET', 'POST'])
55+
@app.route('/models', methods=['GET', 'POST'])
56+
@require_api_key
57+
def list_models():
58+
return jsonify({"data": get_models()})
59+
60+
@app.route('/v1/voices', methods=['GET', 'POST'])
61+
@app.route('/voices', methods=['GET', 'POST'])
62+
@require_api_key
63+
def list_voices():
64+
specific_language = None
65+
66+
data = request.args if request.method == 'GET' else request.json
67+
if data and ('language' in data or 'locale' in data):
68+
specific_language = data.get('language') if 'language' in data else data.get('locale')
69+
70+
return jsonify({"voices": get_voices(specific_language)})
71+
72+
@app.route('/v1/voices/all', methods=['GET', 'POST'])
73+
@app.route('/voices/all', methods=['GET', 'POST'])
74+
@require_api_key
75+
def list_all_voices():
76+
return jsonify({"voices": get_voices('all')})
77+
78+
"""
79+
Support for ElevenLabs and Azure AI Speech
80+
(currently in beta)
81+
"""
82+
83+
# http://localhost:5050/elevenlabs/v1/text-to-speech
84+
# http://localhost:5050/elevenlabs/v1/text-to-speech/en-US-AndrewNeural
85+
@app.route('/elevenlabs/v1/text-to-speech/<voice_id>', methods=['POST'])
86+
@require_api_key
87+
def elevenlabs_tts(voice_id):
88+
if not EXPAND_API:
89+
return jsonify({"error": f"Endpoint not allowed"}), 500
90+
91+
# Parse the incoming JSON payload
92+
try:
93+
payload = request.json
94+
if not payload or 'text' not in payload:
95+
return jsonify({"error": "Missing 'text' in request body"}), 400
96+
except Exception as e:
97+
return jsonify({"error": f"Invalid JSON payload: {str(e)}"}), 400
98+
99+
text = payload['text']
100+
101+
if not REMOVE_FILTER:
102+
text = prepare_tts_input_with_context(text)
103+
104+
voice = voice_id # ElevenLabs uses the voice_id in the URL
105+
106+
# Use default settings for edge-tts
107+
response_format = 'mp3'
108+
speed = DEFAULT_SPEED # Optional customization via payload.get('speed', DEFAULT_SPEED)
109+
110+
# Generate speech using edge-tts
111+
try:
112+
output_file_path = generate_speech(text, voice, response_format, speed)
113+
except Exception as e:
114+
return jsonify({"error": f"TTS generation failed: {str(e)}"}), 500
115+
116+
# Return the generated audio file
117+
return send_file(output_file_path, mimetype="audio/mpeg", as_attachment=True, download_name="speech.mp3")
118+
119+
# tts.speech.microsoft.com/cognitiveservices/v1
120+
# https://{region}.tts.speech.microsoft.com/cognitiveservices/v1
121+
# http://localhost:5050/azure/cognitiveservices/v1
122+
@app.route('/azure/cognitiveservices/v1', methods=['POST'])
123+
@require_api_key
124+
def azure_tts():
125+
if not EXPAND_API:
126+
return jsonify({"error": f"Endpoint not allowed"}), 500
127+
128+
# Parse the SSML payload
129+
try:
130+
ssml_data = request.data.decode('utf-8')
131+
if not ssml_data:
132+
return jsonify({"error": "Missing SSML payload"}), 400
133+
134+
# Extract the text and voice from SSML
135+
from xml.etree import ElementTree as ET
136+
root = ET.fromstring(ssml_data)
137+
text = root.find('.//{http://www.w3.org/2001/10/synthesis}voice').text
138+
voice = root.find('.//{http://www.w3.org/2001/10/synthesis}voice').get('name')
139+
except Exception as e:
140+
return jsonify({"error": f"Invalid SSML payload: {str(e)}"}), 400
141+
142+
# Use default settings for edge-tts
143+
response_format = 'mp3'
144+
speed = DEFAULT_SPEED
145+
146+
if not REMOVE_FILTER:
147+
text = prepare_tts_input_with_context(text)
148+
149+
# Generate speech using edge-tts
150+
try:
151+
output_file_path = generate_speech(text, voice, response_format, speed)
152+
except Exception as e:
153+
return jsonify({"error": f"TTS generation failed: {str(e)}"}), 500
154+
155+
# Return the generated audio file
156+
return send_file(output_file_path, mimetype="audio/mpeg", as_attachment=True, download_name="speech.mp3")
157+
158+
print(f" Edge TTS (Free Azure TTS) Replacement for OpenAI's TTS API")
159+
print(f" ")
160+
print(f" * Serving OpenAI Edge TTS")
161+
print(f" * Server running on http://localhost:{PORT}")
162+
print(f" * TTS Endpoint: http://localhost:{PORT}/v1/audio/speech")
163+
print(f" ")
164+
165+
if __name__ == '__main__':
166+
http_server = WSGIServer(('0.0.0.0', PORT), app)
167+
http_server.serve_forever()

0 commit comments

Comments
 (0)