add wyoming-chatterbox addon

2026-04-14 11:37:33 +00:00 · 2025-12-15 11:13:07 -06:00 · 2025-12-15 11:13:07 -06:00 · 390376daac
commit 390376daac
parent f539be4987
8 changed files with 110 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -16,3 +16,6 @@ find isolated builders with aligned values. auto-discovers humans on github, mas
 ### ankerctl
 control and monitor ankermake 3d printers via ankerctl.
 ### wyoming-chatterbox
 wyoming protocol server for chatterbox tts with voice cloning. clone any voice with a 10-30 second sample. requires nvidia gpu.
--- a/wyoming-chatterbox/Dockerfile
+++ b/wyoming-chatterbox/Dockerfile
@ -0,0 +1,18 @@
 ARG BUILD_FROM
 FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04
 # install python
 RUN apt-get update && apt-get install -y \
    python3 python3-pip python3-venv git \
    && rm -rf /var/lib/apt/lists/*
 # install wyoming-chatterbox
 RUN pip3 install --no-cache-dir wyoming-chatterbox
 # copy run script
 COPY run.sh /
 RUN chmod a+x /run.sh
 WORKDIR /data
 CMD ["/run.sh"]
--- a/wyoming-chatterbox/README.md
+++ b/wyoming-chatterbox/README.md
@ -0,0 +1,32 @@
 # wyoming-chatterbox addon
 wyoming protocol server for chatterbox tts with voice cloning. clone any voice with a 10-30 second sample.
 ## requirements
 - nvidia gpu with 4gb+ vram
 - gpu passthrough configured in your HA host
 ## configuration
 | option | default | description |
 |--------|---------|-------------|
 | `voice_ref` | required | path to voice reference wav (place in /share/) |
 | `volume_boost` | 3.0 | output volume multiplier |
 | `device` | cuda | torch device (cuda or cpu) |
 | `debug` | false | enable debug logging |
 ## setup
 1. place your voice reference wav in `/share/voice_reference.wav`
 2. configure the addon with the path
 3. start the addon
 4. add wyoming integration in HA pointing to port 10201
 ## voice reference tips
 for best results:
 - 10-30 seconds of clean speech
 - no background music or noise
 - consistent speaking style
 - wav format (any sample rate)
--- a/wyoming-chatterbox/build.yaml
+++ b/wyoming-chatterbox/build.yaml
@ -0,0 +1,7 @@
 build_from:
  amd64: nvidia/cuda:12.1.0-runtime-ubuntu22.04
 labels:
  org.opencontainers.image.title: "wyoming-chatterbox"
  org.opencontainers.image.description: "wyoming protocol server for chatterbox tts with voice cloning"
  org.opencontainers.image.source: "https://github.com/sudoxnym/wyoming-chatterbox"
  org.opencontainers.image.licenses: "MIT"
--- a/wyoming-chatterbox/config.yaml
+++ b/wyoming-chatterbox/config.yaml
@ -0,0 +1,27 @@
 name: wyoming-chatterbox
 version: "1.0.0"
 slug: wyoming-chatterbox
 description: "wyoming protocol server for chatterbox tts with voice cloning. clone any voice with a 10-30 second sample."
 url: "https://github.com/sudoxnym/wyoming-chatterbox"
 arch:
  - amd64
 startup: application
 boot: auto
 ports:
  10201/tcp: 10201
 ports_description:
  10201/tcp: "wyoming protocol"
 map:
  - share:rw
 options:
  voice_ref: "/share/voice_reference.wav"
  volume_boost: 3.0
  device: "cuda"
  debug: false
 schema:
  voice_ref: str
  volume_boost: float?
  device: list(cuda|cpu)?
  debug: bool?
 image: sudoxreboot/wyoming-chatterbox-addon-{arch}
 full_access: true
--- a/wyoming-chatterbox/icon.png
+++ b/wyoming-chatterbox/icon.png
--- a/wyoming-chatterbox/logo.png
+++ b/wyoming-chatterbox/logo.png
--- a/wyoming-chatterbox/run.sh
+++ b/wyoming-chatterbox/run.sh
@ -0,0 +1,23 @@
 #!/usr/bin/env bash
 set -e
 CONFIG_PATH=/data/options.json
 VOICE_REF=$(jq -r '.voice_ref' $CONFIG_PATH)
 VOLUME_BOOST=$(jq -r '.volume_boost // 3.0' $CONFIG_PATH)
 DEVICE=$(jq -r '.device // "cuda"' $CONFIG_PATH)
 DEBUG=$(jq -r '.debug // false' $CONFIG_PATH)
 echo "starting wyoming-chatterbox..."
 echo "voice_ref: ${VOICE_REF}"
 echo "device: ${DEVICE}"
 ARGS="--uri tcp://0.0.0.0:10201 --voice-ref ${VOICE_REF} --volume-boost ${VOLUME_BOOST} --device ${DEVICE}"
 if [ "${DEBUG}" = "true" ]; then
    ARGS="${ARGS} --debug"
 fi
 export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 exec wyoming-chatterbox ${ARGS}