diff --git a/README.md b/README.md index 27b669f..b9d2927 100644 --- a/README.md +++ b/README.md @@ -16,3 +16,6 @@ find isolated builders with aligned values. auto-discovers humans on github, mas ### ankerctl control and monitor ankermake 3d printers via ankerctl. + +### wyoming-chatterbox +wyoming protocol server for chatterbox tts with voice cloning. clone any voice with a 10-30 second sample. requires nvidia gpu. diff --git a/wyoming-chatterbox/Dockerfile b/wyoming-chatterbox/Dockerfile new file mode 100644 index 0000000..050e93a --- /dev/null +++ b/wyoming-chatterbox/Dockerfile @@ -0,0 +1,18 @@ +ARG BUILD_FROM +FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 + +# install python +RUN apt-get update && apt-get install -y \ + python3 python3-pip python3-venv git \ + && rm -rf /var/lib/apt/lists/* + +# install wyoming-chatterbox +RUN pip3 install --no-cache-dir wyoming-chatterbox + +# copy run script +COPY run.sh / +RUN chmod a+x /run.sh + +WORKDIR /data + +CMD ["/run.sh"] diff --git a/wyoming-chatterbox/README.md b/wyoming-chatterbox/README.md new file mode 100644 index 0000000..ebd54f8 --- /dev/null +++ b/wyoming-chatterbox/README.md @@ -0,0 +1,32 @@ +# wyoming-chatterbox addon + +wyoming protocol server for chatterbox tts with voice cloning. clone any voice with a 10-30 second sample. + +## requirements + +- nvidia gpu with 4gb+ vram +- gpu passthrough configured in your HA host + +## configuration + +| option | default | description | +|--------|---------|-------------| +| `voice_ref` | required | path to voice reference wav (place in /share/) | +| `volume_boost` | 3.0 | output volume multiplier | +| `device` | cuda | torch device (cuda or cpu) | +| `debug` | false | enable debug logging | + +## setup + +1. place your voice reference wav in `/share/voice_reference.wav` +2. configure the addon with the path +3. start the addon +4. add wyoming integration in HA pointing to port 10201 + +## voice reference tips + +for best results: +- 10-30 seconds of clean speech +- no background music or noise +- consistent speaking style +- wav format (any sample rate) diff --git a/wyoming-chatterbox/build.yaml b/wyoming-chatterbox/build.yaml new file mode 100644 index 0000000..1e5fee5 --- /dev/null +++ b/wyoming-chatterbox/build.yaml @@ -0,0 +1,7 @@ +build_from: + amd64: nvidia/cuda:12.1.0-runtime-ubuntu22.04 +labels: + org.opencontainers.image.title: "wyoming-chatterbox" + org.opencontainers.image.description: "wyoming protocol server for chatterbox tts with voice cloning" + org.opencontainers.image.source: "https://github.com/sudoxnym/wyoming-chatterbox" + org.opencontainers.image.licenses: "MIT" diff --git a/wyoming-chatterbox/config.yaml b/wyoming-chatterbox/config.yaml new file mode 100644 index 0000000..0cd1166 --- /dev/null +++ b/wyoming-chatterbox/config.yaml @@ -0,0 +1,27 @@ +name: wyoming-chatterbox +version: "1.0.0" +slug: wyoming-chatterbox +description: "wyoming protocol server for chatterbox tts with voice cloning. clone any voice with a 10-30 second sample." +url: "https://github.com/sudoxnym/wyoming-chatterbox" +arch: + - amd64 +startup: application +boot: auto +ports: + 10201/tcp: 10201 +ports_description: + 10201/tcp: "wyoming protocol" +map: + - share:rw +options: + voice_ref: "/share/voice_reference.wav" + volume_boost: 3.0 + device: "cuda" + debug: false +schema: + voice_ref: str + volume_boost: float? + device: list(cuda|cpu)? + debug: bool? +image: sudoxreboot/wyoming-chatterbox-addon-{arch} +full_access: true diff --git a/wyoming-chatterbox/icon.png b/wyoming-chatterbox/icon.png new file mode 100644 index 0000000..212b1a9 Binary files /dev/null and b/wyoming-chatterbox/icon.png differ diff --git a/wyoming-chatterbox/logo.png b/wyoming-chatterbox/logo.png new file mode 100644 index 0000000..212b1a9 Binary files /dev/null and b/wyoming-chatterbox/logo.png differ diff --git a/wyoming-chatterbox/run.sh b/wyoming-chatterbox/run.sh new file mode 100644 index 0000000..3e87679 --- /dev/null +++ b/wyoming-chatterbox/run.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +set -e + +CONFIG_PATH=/data/options.json + +VOICE_REF=$(jq -r '.voice_ref' $CONFIG_PATH) +VOLUME_BOOST=$(jq -r '.volume_boost // 3.0' $CONFIG_PATH) +DEVICE=$(jq -r '.device // "cuda"' $CONFIG_PATH) +DEBUG=$(jq -r '.debug // false' $CONFIG_PATH) + +echo "starting wyoming-chatterbox..." +echo "voice_ref: ${VOICE_REF}" +echo "device: ${DEVICE}" + +ARGS="--uri tcp://0.0.0.0:10201 --voice-ref ${VOICE_REF} --volume-boost ${VOLUME_BOOST} --device ${DEVICE}" + +if [ "${DEBUG}" = "true" ]; then + ARGS="${ARGS} --debug" +fi + +export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True + +exec wyoming-chatterbox ${ARGS}