<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
  <channel>
    <title>The Voice AI Index</title>
    <link>https://voice.kymatalabs.com</link>
    <description>The living index of voice &amp; speech AI tooling — TTS, speech recognition, voice cloning, realtime voice agents, toolkits.</description>
    <item><title>ggml-org/whisper.cpp — momentum 83</title><link>https://voice.kymatalabs.com/p/ggml-org-whisper-cpp/</link><guid isPermaLink="false">ggml-org/whisper.cpp</guid><description>Port of OpenAI's Whisper model in C/C++</description></item>
    <item><title>fishaudio/fish-speech — momentum 81</title><link>https://voice.kymatalabs.com/p/fishaudio-fish-speech/</link><guid isPermaLink="false">fishaudio/fish-speech</guid><description>SOTA Open Source TTS</description></item>
    <item><title>OpenBMB/VoxCPM — momentum 80</title><link>https://voice.kymatalabs.com/p/openbmb-voxcpm/</link><guid isPermaLink="false">OpenBMB/VoxCPM</guid><description>VoxCPM2: Tokenizer-Free TTS for Multilingual Speech Generation, Creative Voice Design, and True-to-Life Cloning</description></item>
    <item><title>cjpais/Handy — momentum 80</title><link>https://voice.kymatalabs.com/p/cjpais-handy/</link><guid isPermaLink="false">cjpais/Handy</guid><description>A free, open source, and extensible speech-to-text application that works completely offline.</description></item>
    <item><title>debpalash/OmniVoice-Studio — momentum 80</title><link>https://voice.kymatalabs.com/p/debpalash-omnivoice-studio/</link><guid isPermaLink="false">debpalash/OmniVoice-Studio</guid><description>The open-source ElevenLabs alternative for local voice cloning, design, create, dubbing and dictation Desktop App</description></item>
    <item><title>MisoLabsAI/MisoTTS — momentum 80</title><link>https://voice.kymatalabs.com/p/misolabsai-misotts/</link><guid isPermaLink="false">MisoLabsAI/MisoTTS</guid><description>Miso TTS is an 8 billion, highly emotive text-to-speech model</description></item>
    <item><title>index-tts/index-tts — momentum 79</title><link>https://voice.kymatalabs.com/p/index-tts-index-tts/</link><guid isPermaLink="false">index-tts/index-tts</guid><description>An Industrial-Level Controllable and Efficient Zero-Shot Text-To-Speech System</description></item>
    <item><title>DrewThomasson/ebook2audiobook — momentum 79</title><link>https://voice.kymatalabs.com/p/drewthomasson-ebook2audiobook/</link><guid isPermaLink="false">DrewThomasson/ebook2audiobook</guid><description>Generate audiobooks from e-books, voice cloning &amp; 1158+ languages!</description></item>
    <item><title>Huanshere/VideoLingo — momentum 79</title><link>https://voice.kymatalabs.com/p/huanshere-videolingo/</link><guid isPermaLink="false">Huanshere/VideoLingo</guid><description>Netflix-level subtitle cutting, translation, alignment, and even dubbing - one-click fully automated AI video subtitle team | Netflix级字幕切割、翻译、对齐、甚至加上配音，一键全自动视频搬运AI字幕组</description></item>
    <item><title>k2-fsa/OmniVoice — momentum 79</title><link>https://voice.kymatalabs.com/p/k2-fsa-omnivoice/</link><guid isPermaLink="false">k2-fsa/OmniVoice</guid><description>High-Quality Voice Cloning TTS for 600+ Languages</description></item>
    <item><title>m-bain/whisperX — momentum 78</title><link>https://voice.kymatalabs.com/p/m-bain-whisperx/</link><guid isPermaLink="false">m-bain/whisperX</guid><description>WhisperX:  Automatic Speech Recognition with Word-level Timestamps (&amp; Diarization)</description></item>
    <item><title>jianchang512/pyvideotrans — momentum 78</title><link>https://voice.kymatalabs.com/p/jianchang512-pyvideotrans/</link><guid isPermaLink="false">jianchang512/pyvideotrans</guid><description>Translate the video from one language to another and embed dubbing &amp; subtitles.</description></item>
    <item><title>modelscope/FunASR — momentum 78</title><link>https://voice.kymatalabs.com/p/modelscope-funasr/</link><guid isPermaLink="false">modelscope/FunASR</guid><description>Industrial-grade speech recognition toolkit: 170x realtime, 50+ languages, speaker diarization, emotion detection, streaming, and OpenAI-compatible API.</description></item>
    <item><title>NVIDIA-NeMo/NeMo — momentum 78</title><link>https://voice.kymatalabs.com/p/nvidia-nemo-nemo/</link><guid isPermaLink="false">NVIDIA-NeMo/NeMo</guid><description>A scalable generative AI framework built for researchers and developers working on Large Language Models, Multimodal, and Speech AI (Automatic Speech Recognition and Text-to-Speech)</description></item>
    <item><title>openai/whisper — momentum 77</title><link>https://voice.kymatalabs.com/p/openai-whisper/</link><guid isPermaLink="false">openai/whisper</guid><description>Robust Speech Recognition via Large-Scale Weak Supervision</description></item>
    <item><title>leon-ai/leon — momentum 77</title><link>https://voice.kymatalabs.com/p/leon-ai-leon/</link><guid isPermaLink="false">leon-ai/leon</guid><description>🧠 Leon is your open-source personal assistant.</description></item>
    <item><title>k2-fsa/sherpa-onnx — momentum 77</title><link>https://voice.kymatalabs.com/p/k2-fsa-sherpa-onnx/</link><guid isPermaLink="false">k2-fsa/sherpa-onnx</guid><description>Speech-to-text, text-to-speech, speaker diarization, speech enhancement, source separation, and VAD using next-gen Kaldi with onnxruntime without Internet connection. Support embedded systems, Android, iOS, HarmonyOS, Raspberry Pi, RISC-V, RK NPU, Axera NPU, Ascend NPU, x86_64 servers, websocket ser</description></item>
    <item><title>pipecat-ai/pipecat — momentum 77</title><link>https://voice.kymatalabs.com/p/pipecat-ai-pipecat/</link><guid isPermaLink="false">pipecat-ai/pipecat</guid><description>Open Source framework for voice and multimodal conversational AI</description></item>
    <item><title>PaddlePaddle/PaddleSpeech — momentum 77</title><link>https://voice.kymatalabs.com/p/paddlepaddle-paddlespeech/</link><guid isPermaLink="false">PaddlePaddle/PaddleSpeech</guid><description>Easy-to-use Speech Toolkit including Self-Supervised Learning model, SOTA/Streaming ASR with punctuation, Streaming TTS with text frontend, Speaker Verification System, End-to-End Speech Translation and Keyword Spotting. Won NAACL2022 Best Demo Award.</description></item>
    <item><title>Open-Less/openless — momentum 77</title><link>https://voice.kymatalabs.com/p/open-less-openless/</link><guid isPermaLink="false">Open-Less/openless</guid><description>Hold a key, speak, release — AI-polished text appears at your cursor in any app. Open-source voice input for macOS &amp; Windows. (按住快捷键说话，松开即得润色后的文字)</description></item>
    <item><title>RVC-Boss/GPT-SoVITS — momentum 76</title><link>https://voice.kymatalabs.com/p/rvc-boss-gpt-sovits/</link><guid isPermaLink="false">RVC-Boss/GPT-SoVITS</guid><description>1 min voice data can also be used to train a good TTS model! (few shot voice cloning)</description></item>
    <item><title>FunAudioLLM/CosyVoice — momentum 76</title><link>https://voice.kymatalabs.com/p/funaudiollm-cosyvoice/</link><guid isPermaLink="false">FunAudioLLM/CosyVoice</guid><description>Multi-lingual large voice generation model, providing inference, training and deployment full-stack ability.</description></item>
    <item><title>alphacep/vosk-api — momentum 76</title><link>https://voice.kymatalabs.com/p/alphacep-vosk-api/</link><guid isPermaLink="false">alphacep/vosk-api</guid><description>Offline speech recognition API for Android, iOS, Raspberry Pi and servers with Python, Java, C# and Node</description></item>
    <item><title>Zackriya-Solutions/meetily — momentum 76</title><link>https://voice.kymatalabs.com/p/zackriya-solutions-meetily/</link><guid isPermaLink="false">Zackriya-Solutions/meetily</guid><description>Privacy first, AI meeting assistant with 4x faster Parakeet/Whisper live transcription, speaker diarization, and Ollama summarization built on Rust. 100% local processing. no cloud required. Meetily (Meetly Ai - https://meetily.ai) is the #1 Self-hosted,  Open-source Ai meeting note taker for macOS </description></item>
    <item><title>livekit/agents — momentum 76</title><link>https://voice.kymatalabs.com/p/livekit-agents/</link><guid isPermaLink="false">livekit/agents</guid><description>A framework for building realtime voice AI agents 🤖🎙️📹</description></item>
    <item><title>TEN-framework/ten-framework — momentum 76</title><link>https://voice.kymatalabs.com/p/ten-framework-ten-framework/</link><guid isPermaLink="false">TEN-framework/ten-framework</guid><description>Open-source framework for conversational voice AI agents</description></item>
    <item><title>QuentinFuxa/WhisperLiveKit — momentum 76</title><link>https://voice.kymatalabs.com/p/quentinfuxa-whisperlivekit/</link><guid isPermaLink="false">QuentinFuxa/WhisperLiveKit</guid><description>Simultaneous speech-to-text models</description></item>
    <item><title>pyannote/pyannote-audio — momentum 76</title><link>https://voice.kymatalabs.com/p/pyannote-pyannote-audio/</link><guid isPermaLink="false">pyannote/pyannote-audio</guid><description>Neural building blocks for speaker diarization: speech activity detection, speaker change detection, overlapped speech detection, speaker embedding</description></item>
    <item><title>KoljaB/RealtimeSTT — momentum 76</title><link>https://voice.kymatalabs.com/p/koljab-realtimestt/</link><guid isPermaLink="false">KoljaB/RealtimeSTT</guid><description>A robust, efficient, low-latency speech-to-text library with advanced voice activity detection, wake word activation and instant transcription.</description></item>
    <item><title>espnet/espnet — momentum 76</title><link>https://voice.kymatalabs.com/p/espnet-espnet/</link><guid isPermaLink="false">espnet/espnet</guid><description>End-to-End Speech Processing Toolkit</description></item>
  </channel>
</rss>
