OpenAI TTS
Text-to-speech conversion using OpenAI's TTS API for generating high-quality, natural-sounding audio from text.
Features
6 different voice options (male/female)
Standard and HD quality models
Automatic text chunking for long content (4096 char limit)
Multiple output formats (mp3, opus, aac, flac)
Activation
This skill activates when the user:
Requests audio/voice output: "read this to me", "convert to audio", "generate speech", "make this an audio file"
Uses keywords: "tts", "openai tts", "text to speech", "voice", "audio", "podcast"
Needs content spoken for accessibility, multitasking, or podcast creation
Specifies voice preferences: "alloy", "echo", "fable", "onyx", "nova", "shimmer"
Asks to "narrate", "speak", or "vocalize" text
Requirements
OPENAI_API_KEYenvironment variable must be setPython 3.8+
Dependencies:
openai,pydub(optional, for long text)
Voices
| Voice | Type | Description |
|---|---|---|
| alloy | Neutral | Balanced, versatile |
| echo | Male | Warm, conversational |
| fable | Neutral | Expressive, storytelling |
| onyx | Male | Deep, authoritative |
| nova | Female | Friendly, upbeat |
| shimmer | Female | Clear, professional |
Usage
Basic Usage
from openai import OpenAI
import os
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
response = client.audio.speech.create(
model="tts-1", # or "tts-1-hd" for higher quality
voice="onyx", # choose from: alloy, echo, fable, onyx, nova, shimmer
input="Your text here",
speed=1.0 # 0.25 to 4.0 (optional)
)
with open("output.mp3", "wb") as f:
for chunk in response.iter_bytes():
f.write(chunk)
Command Line
# Basic
python -c "
from openai import OpenAI
client = OpenAI()
response = client.audio.speech.create(model='tts-1', voice='onyx', input='Hello world')
open('output.mp3', 'wb').write(response.content)
"
Long Text (Auto-chunking)
from openai import OpenAI
from pydub import AudioSegment
import tempfile
import os
import re
client = OpenAI()
MAX_CHARS = 4096
def split_text(text):
if len(text) <= MAX_CHARS:
return [text]
chunks = []
sentences = re.split(r'(?<=[.!?])\s+', text)
current = ''
for sentence in sentences:
if len(current) + len(sentence) + 1 <= MAX_CHARS:
current += (' ' if current else '') + sentence
else:
if current:
chunks.append(current)
current = sentence
if current:
chunks.append(current)
return chunks
def generate_tts(text, output_path, voice='onyx', model='tts-1'):
chunks = split_text(text)
if len(chunks) == 1:
response = client.audio.speech.create(model=model, voice=voice, input=text)
with open(output_path, 'wb') as f:
f.write(response.content)
else:
segments = []
for chunk in chunks:
response = client.audio.speech.create(model=model, voice=voice, input=chunk)
with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp:
tmp.write(response.content)
segments.append(AudioSegment.from_mp3(tmp.name))
os.unlink(tmp.name)
combined = segments[0]
for seg in segments[1:]:
combined += seg
combined.export(output_path, format='mp3')
return output_path
# Usage
generate_tts("Your long text here...", "output.mp3", voice="nova")
Models
| Model | Quality | Speed | Cost |
|---|---|---|---|
| tts-1 | Standard | Fast | $0.015/1K chars |
| tts-1-hd | High Definition | Slower | $0.030/1K chars |
Output Formats
Supported formats: mp3 (default), opus, aac, flac
response = client.audio.speech.create(
model="tts-1",
voice="onyx",
input="Hello",
response_format="opus" # or mp3, aac, flac
)
Error Handling
from openai import OpenAI, APIError, RateLimitError
import time
client = OpenAI()
def generate_with_retry(text, voice='onyx', max_retries=3):
for attempt in range(max_retries):
try:
response = client.audio.speech.create(
model="tts-1",
voice=voice,
input=text
)
return response.content
except RateLimitError:
if attempt < max_retries - 1:
time.sleep(2 ** attempt) # Exponential backoff
continue
raise
except APIError as e:
print(f"API Error: {e}")
raise
return None
Examples
Convert Article to Podcast
def article_to_podcast(article_text, output_file):
intro = "Welcome to today's article reading."
outro = "Thank you for listening."
full_text = f"{intro}\n\n{article_text}\n\n{outro}"
generate_tts(full_text, output_file, voice='nova', model='tts-1-hd')
print(f"Podcast saved to {output_file}")
Batch Processing
def batch_tts(texts, output_dir, voice='onyx'):
import os
os.makedirs(output_dir, exist_ok=True)
for i, text in enumerate(texts):
output_path = os.path.join(output_dir, f"audio_{i+1}.mp3")
generate_tts(text, output_path, voice=voice)
print(f"Generated: {output_path}")