Ajout sample CPP

rollback to a more functional version but not perfect
Fix collision stuck state + add streaming partial text event
2026-02-21 20:48:10 +01:00 · 2026-02-21 19:49:26 +01:00 · 2026-02-20 20:24:50 +01:00
15 changed files with 1086 additions and 83 deletions
--- a/CPP/elevenlabs-convai-cpp-main/.gitignore
+++ b/CPP/elevenlabs-convai-cpp-main/.gitignore
@@ -0,0 +1,152 @@
+# Build directories
+build/
+cmake-build-*/
+out/
+
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.obj
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Compiled Dynamic libraries
+*.so
+*.dylib
+*.dll
+
+# Fortran module files
+*.mod
+*.smod
+
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+
+# Executables
+*.exe
+*.out
+*.app
+convai_cpp
+
+# CMake
+CMakeCache.txt
+CMakeFiles/
+CMakeScripts/
+Testing/
+Makefile
+cmake_install.cmake
+install_manifest.txt
+compile_commands.json
+CTestTestfile.cmake
+_deps/
+
+# IDE files
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# macOS
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+# Windows
+Thumbs.db
+ehthumbs.db
+Desktop.ini
+$RECYCLE.BIN/
+*.cab
+*.msi
+*.msm
+*.msp
+*.lnk
+
+# Linux
+*~
+.fuse_hidden*
+.directory
+.Trash-*
+.nfs*
+
+# Logs
+*.log
+
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+
+# Coverage directory used by tools like istanbul
+coverage/
+
+# nyc test coverage
+.nyc_output
+
+# Dependency directories
+node_modules/
+
+# Optional npm cache directory
+.npm
+
+# Optional REPL history
+.node_repl_history
+
+# Output of 'npm pack'
+*.tgz
+
+# Yarn Integrity file
+.yarn-integrity
+
+# dotenv environment variables file
+.env
+.env.test
+
+# parcel-bundler cache (https://parceljs.org/)
+.cache
+.parcel-cache
+
+# next.js build output
+.next
+
+# nuxt.js build output
+.nuxt
+
+# vuepress build output
+.vuepress/dist
+
+# Serverless directories
+.serverless
+
+# FuseBox cache
+.fusebox/
+
+# DynamoDB Local files
+.dynamodb/ 
--- a/CPP/elevenlabs-convai-cpp-main/CMakeLists.txt
+++ b/CPP/elevenlabs-convai-cpp-main/CMakeLists.txt
@@ -0,0 +1,42 @@
+cmake_minimum_required(VERSION 3.14)
+
+project(elevenlabs_convai_cpp LANGUAGES CXX)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Find dependencies
+find_package(Boost REQUIRED COMPONENTS system thread)
+find_package(OpenSSL REQUIRED)
+# PortAudio via vcpkg CMake config
+find_package(portaudio CONFIG REQUIRED)
+
+# Find nlohmann_json
+find_package(nlohmann_json 3.11 QUIET)
+
+if(NOT nlohmann_json_FOUND)
+    include(FetchContent)
+    # Fallback: header-only fetch to avoid old CMake policies in upstream CMakeLists
+    FetchContent_Declare(
+      nlohmann_json_src
+      URL https://raw.githubusercontent.com/nlohmann/json/v3.11.2/single_include/nlohmann/json.hpp
+    )
+    FetchContent_MakeAvailable(nlohmann_json_src)
+    add_library(nlohmann_json::nlohmann_json INTERFACE IMPORTED)
+    target_include_directories(nlohmann_json::nlohmann_json INTERFACE ${nlohmann_json_src_SOURCE_DIR}/single_include)
+endif()
+
+add_executable(convai_cpp
+    src/main.cpp
+    src/Conversation.cpp
+    src/DefaultAudioInterface.cpp
+)
+
+target_include_directories(convai_cpp PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+
+# MSVC: set Windows target version and suppress getenv deprecation warning
+if(MSVC)
+    target_compile_definitions(convai_cpp PRIVATE _WIN32_WINNT=0x0A00 _CRT_SECURE_NO_WARNINGS)
+endif()
+
+target_link_libraries(convai_cpp PRIVATE Boost::system Boost::thread OpenSSL::SSL OpenSSL::Crypto portaudio nlohmann_json::nlohmann_json) 
--- a/CPP/elevenlabs-convai-cpp-main/LICENSE
+++ b/CPP/elevenlabs-convai-cpp-main/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Jitendra
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE. 
--- a/CPP/elevenlabs-convai-cpp-main/README.md
+++ b/CPP/elevenlabs-convai-cpp-main/README.md
@@ -0,0 +1,197 @@
+# ElevenLabs Conversational AI - C++ Implementation
+
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+[![C++17](https://img.shields.io/badge/C%2B%2B-17-blue.svg)](https://en.wikipedia.org/wiki/C%2B%2B17)
+[![CMake](https://img.shields.io/badge/CMake-3.14+-green.svg)](https://cmake.org/)
+
+C++ implementation of ElevenLabs Conversational AI client
+
+## Features
+
+- **Real-time Audio Processing**: Full-duplex audio streaming with low-latency playback
+- **WebSocket Integration**: Secure WSS connection to ElevenLabs Conversational AI platform
+- **Cross-platform Audio**: PortAudio-based implementation supporting Windows, macOS, and Linux
+- **Echo Suppression**: Built-in acoustic feedback prevention
+- **Modern C++**: Clean, maintainable C++17 codebase with proper RAII and exception handling
+- **Flexible Architecture**: Modular design allowing easy customization and extension
+
+## Architecture
+
+```mermaid
+graph TB
+    subgraph "User Interface"
+        A[main.cpp] --> B[Conversation]
+    end
+    
+    subgraph "Core Components"
+        B --> C[DefaultAudioInterface]
+        B --> D[WebSocket Client]
+        C --> E[PortAudio]
+        D --> F[Boost.Beast + OpenSSL]
+    end
+    
+    subgraph "ElevenLabs Platform"
+        F --> G[WSS API Endpoint]
+        G --> H[Conversational AI Agent]
+    end
+    
+    subgraph "Audio Flow"
+        I[Microphone] --> C
+        C --> J[Base64 Encoding]
+        J --> D
+        D --> K[Audio Events]
+        K --> L[Base64 Decoding]
+        L --> C
+        C --> M[Speakers]
+    end
+    
+    subgraph "Message Types"
+        N[user_audio_chunk]
+        O[agent_response]
+        P[user_transcript]
+        Q[audio_event]
+        R[ping/pong]
+    end
+    
+    style B fill:#e1f5fe
+    style C fill:#f3e5f5
+    style D fill:#e8f5e8
+    style H fill:#fff3e0
+```
+
+## Quick Start
+
+### Prerequisites
+
+- **C++17 compatible compiler**: GCC 11+, Clang 14+, or MSVC 2022+
+- **CMake** 3.14 or higher
+- **Dependencies** (install via package manager):
+
+#### macOS (Homebrew)
+```bash
+brew install boost openssl portaudio nlohmann-json cmake pkg-config
+```
+
+#### Ubuntu/Debian
+```bash
+sudo apt update
+sudo apt install build-essential cmake pkg-config
+sudo apt install libboost-system-dev libboost-thread-dev
+sudo apt install libssl-dev libportaudio2-dev nlohmann-json3-dev
+```
+
+#### Windows (vcpkg)
+```bash
+vcpkg install boost-system boost-thread openssl portaudio nlohmann-json
+```
+
+### Building
+
+```bash
+# Clone the repository
+git clone https://github.com/Jitendra2603/elevenlabs-convai-cpp.git
+cd elevenlabs-convai-cpp
+
+# Build the project
+mkdir build && cd build
+cmake ..
+cmake --build . --config Release
+```
+
+### Running
+
+```bash
+# Set your agent ID (get this from ElevenLabs dashboard)
+export AGENT_ID="your-agent-id-here"
+
+# Run the demo
+./convai_cpp
+```
+
+The application will:
+1. Connect to your ElevenLabs Conversational AI agent
+2. Start capturing audio from your default microphone
+3. Stream audio to the agent and play responses through speakers
+4. Display conversation transcripts in the terminal
+5. Continue until you press Enter to quit
+
+## 📋 Usage Examples
+
+### Basic Conversation
+```bash
+export AGENT_ID="agent_"
+./convai_cpp
+# Speak into your microphone and hear the AI agent respond
+```
+
+
+## Configuration
+
+### Audio Settings
+
+The audio interface is configured for optimal real-time performance:
+
+- **Sample Rate**: 16 kHz 
+- **Format**: 16-bit PCM mono
+- **Input Buffer**: 250ms (4000 frames)
+- **Output Buffer**: 62.5ms (1000 frames)
+
+### WebSocket Connection
+
+- **Endpoint**: `wss://api.elevenlabs.io/v1/convai/conversation`
+- **Protocol**: WebSocket Secure (WSS) with TLS 1.2+
+- **Authentication**: Optional (required for private agents)
+
+## Project Structure
+
+```
+elevenlabs-convai-cpp/
+├── CMakeLists.txt              # Build configuration
+├── README.md                   # This file
+├── LICENSE                     # MIT license
+├── CONTRIBUTING.md             # Contribution guidelines
+├── .gitignore                  # Git ignore rules
+├── include/                    # Header files
+│   ├── AudioInterface.hpp      # Abstract audio interface
+│   ├── DefaultAudioInterface.hpp # PortAudio implementation
+│   └── Conversation.hpp        # Main conversation handler
+└── src/                        # Source files
+    ├── main.cpp                # Demo application
+    ├── Conversation.cpp        # WebSocket and message handling
+    └── DefaultAudioInterface.cpp # Audio I/O implementation
+```
+
+## Technical Details
+
+### Audio Processing Pipeline
+
+1. **Capture**: PortAudio captures 16-bit PCM audio at 16kHz
+2. **Encoding**: Raw audio is base64-encoded for WebSocket transmission
+3. **Streaming**: Audio chunks sent as `user_audio_chunk` messages
+4. **Reception**: Server sends `audio_event` messages with agent responses
+5. **Decoding**: Base64 audio data decoded back to PCM
+6. **Playback**: Audio queued and played through PortAudio output stream
+
+### Echo Suppression
+
+The implementation includes a simple, effective echo suppression mechanism:
+
+- Microphone input is suppressed during agent speech playback
+- Prevents acoustic feedback loops that cause the agent to respond to itself
+- Uses atomic flags for thread-safe coordination between input/output
+
+### WebSocket Message Handling
+
+Supported message types:
+- `conversation_initiation_client_data` - Session initialization
+- `user_audio_chunk` - Microphone audio data
+- `audio_event` - Agent speech audio
+- `agent_response` - Agent text responses
+- `user_transcript` - Speech-to-text results
+- `ping`/`pong` - Connection keepalive
+
+
+
+## 📝 License
+
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
--- a/CPP/elevenlabs-convai-cpp-main/include/AudioInterface.hpp
+++ b/CPP/elevenlabs-convai-cpp-main/include/AudioInterface.hpp
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <functional>
+#include <vector>
+
+class AudioInterface {
+public:
+    using AudioCallback = std::function<void(const std::vector<char>&)>;
+
+    virtual ~AudioInterface() = default;
+
+    // Starts the audio interface. The callback will be invoked with raw 16-bit PCM mono samples at 16kHz.
+    virtual void start(AudioCallback inputCallback) = 0;
+
+    // Stops audio I/O and releases underlying resources.
+    virtual void stop() = 0;
+
+    // Play audio to the user; audio is 16-bit PCM mono 16kHz.
+    virtual void output(const std::vector<char>& audio) = 0;
+
+    // Immediately stop any buffered / ongoing output.
+    virtual void interrupt() = 0;
+}; 
--- a/CPP/elevenlabs-convai-cpp-main/include/Conversation.hpp
+++ b/CPP/elevenlabs-convai-cpp-main/include/Conversation.hpp
@@ -0,0 +1,72 @@
+#pragma once
+
+#include "AudioInterface.hpp"
+#include <boost/beast/core.hpp>
+#include <boost/beast/websocket.hpp>
+#include <boost/beast/ssl.hpp>
+#include <boost/asio.hpp>
+#include <boost/asio/ssl/stream.hpp>
+#include <boost/asio/ip/tcp.hpp>
+#include <nlohmann/json.hpp>
+
+#include <thread>
+#include <atomic>
+#include <functional>
+
+class Conversation {
+public:
+    using CallbackAgentResponse = std::function<void(const std::string&)>;
+    using CallbackAgentResponseCorrection = std::function<void(const std::string&, const std::string&)>;
+    using CallbackUserTranscript = std::function<void(const std::string&)>;
+    using CallbackLatencyMeasurement = std::function<void(int)>;
+
+    Conversation(
+        const std::string& agentId,
+        bool requiresAuth,
+        std::shared_ptr<AudioInterface> audioInterface,
+        CallbackAgentResponse callbackAgentResponse = nullptr,
+        CallbackAgentResponseCorrection callbackAgentResponseCorrection = nullptr,
+        CallbackUserTranscript callbackUserTranscript = nullptr,
+        CallbackLatencyMeasurement callbackLatencyMeasurement = nullptr
+    );
+
+    ~Conversation();
+
+    void startSession();
+    void endSession();
+    std::string waitForSessionEnd();
+
+    void sendUserMessage(const std::string& text);
+    void registerUserActivity();
+    void sendContextualUpdate(const std::string& content);
+
+private:
+    void run();
+    void handleMessage(const nlohmann::json& message);
+    std::string getWssUrl() const;
+
+    // networking members
+    boost::asio::io_context ioc_;
+    boost::asio::ssl::context sslCtx_{boost::asio::ssl::context::tlsv12_client};
+
+    using tcp = boost::asio::ip::tcp;
+    using websocket_t = boost::beast::websocket::stream<
+        boost::beast::ssl_stream<tcp::socket>>;
+    std::unique_ptr<websocket_t> ws_;
+
+    // general state
+    std::string agentId_;
+    bool requiresAuth_;
+    std::shared_ptr<AudioInterface> audioInterface_;
+
+    CallbackAgentResponse callbackAgentResponse_;
+    CallbackAgentResponseCorrection callbackAgentResponseCorrection_;
+    CallbackUserTranscript callbackUserTranscript_;
+    CallbackLatencyMeasurement callbackLatencyMeasurement_;
+
+    std::thread workerThread_;
+    std::atomic<bool> shouldStop_{false};
+    std::string conversationId_;
+
+    std::atomic<int> lastInterruptId_{0};
+}; 
--- a/CPP/elevenlabs-convai-cpp-main/include/DefaultAudioInterface.hpp
+++ b/CPP/elevenlabs-convai-cpp-main/include/DefaultAudioInterface.hpp
@@ -0,0 +1,45 @@
+#pragma once
+
+#include "AudioInterface.hpp"
+#include <portaudio.h>
+#include <mutex>
+#include <condition_variable>
+#include <queue>
+#include <thread>
+#include <atomic>
+
+class DefaultAudioInterface : public AudioInterface {
+public:
+    static constexpr int INPUT_FRAMES_PER_BUFFER = 4000;  // 250ms @ 16kHz
+    static constexpr int OUTPUT_FRAMES_PER_BUFFER = 1000; // 62.5ms @ 16kHz
+
+    DefaultAudioInterface();
+    ~DefaultAudioInterface() override;
+
+    void start(AudioCallback inputCallback) override;
+    void stop() override;
+    void output(const std::vector<char>& audio) override;
+    void interrupt() override;
+
+private:
+    static int inputCallbackStatic(const void* input, void* output, unsigned long frameCount,
+                                   const PaStreamCallbackTimeInfo* timeInfo, PaStreamCallbackFlags statusFlags,
+                                   void* userData);
+
+    int inputCallbackInternal(const void* input, unsigned long frameCount);
+
+    void outputThreadFunc();
+
+    PaStream* inputStream_{};
+    PaStream* outputStream_{};
+
+    AudioCallback inputCallback_;
+
+    std::queue<std::vector<char>> outputQueue_;
+    std::mutex queueMutex_;
+    std::condition_variable queueCv_;
+
+    std::thread outputThread_;
+    std::atomic<bool> shouldStop_{false};
+    std::atomic<bool> outputPlaying_{false};
+}; 
--- a/CPP/elevenlabs-convai-cpp-main/src/Conversation.cpp
+++ b/CPP/elevenlabs-convai-cpp-main/src/Conversation.cpp
@@ -0,0 +1,230 @@
+#include "Conversation.hpp"
+
+#include <boost/beast/websocket/ssl.hpp>
+#include <boost/beast/websocket.hpp>
+#include <boost/beast/ssl.hpp>
+#include <boost/beast/core/detail/base64.hpp>
+#include <boost/asio/connect.hpp>
+#include <boost/algorithm/string.hpp>
+#include <iostream>
+#include <sstream>
+#include <openssl/ssl.h>
+
+using tcp = boost::asio::ip::tcp;
+namespace ssl = boost::asio::ssl;
+namespace websocket = boost::beast::websocket;
+namespace beast = boost::beast;
+
+static std::string base64Encode(const std::vector<char>& data) {
+    auto encodedSize = beast::detail::base64::encoded_size(data.size());
+    std::string out(encodedSize, '\0');
+    beast::detail::base64::encode(&out[0], data.data(), data.size());
+    return out;
+}
+
+static std::vector<char> base64Decode(const std::string& str) {
+    auto decodedSize = beast::detail::base64::decoded_size(str.size());
+    std::vector<char> out(decodedSize);
+    auto result = beast::detail::base64::decode(out.data(), str.data(), str.size());
+    out.resize(result.first);
+    return out;
+}
+
+static std::string toString(const nlohmann::json& j){
+    if(j.is_string()) return j.get<std::string>();
+    if(j.is_number_integer()) return std::to_string(j.get<int64_t>());
+    return j.dump();
+}
+
+Conversation::Conversation(const std::string& agentId, bool requiresAuth,
+                           std::shared_ptr<AudioInterface> audioInterface,
+                           CallbackAgentResponse callbackAgentResponse,
+                           CallbackAgentResponseCorrection callbackAgentResponseCorrection,
+                           CallbackUserTranscript callbackUserTranscript,
+                           CallbackLatencyMeasurement callbackLatencyMeasurement)
+    : agentId_(agentId),
+      requiresAuth_(requiresAuth),
+      audioInterface_(std::move(audioInterface)),
+      callbackAgentResponse_(std::move(callbackAgentResponse)),
+      callbackAgentResponseCorrection_(std::move(callbackAgentResponseCorrection)),
+      callbackUserTranscript_(std::move(callbackUserTranscript)),
+      callbackLatencyMeasurement_(std::move(callbackLatencyMeasurement)) {
+
+    sslCtx_.set_default_verify_paths();
+}
+
+Conversation::~Conversation() {
+    endSession();
+}
+
+void Conversation::startSession() {
+    shouldStop_.store(false);
+    workerThread_ = std::thread(&Conversation::run, this);
+}
+
+void Conversation::endSession() {
+    shouldStop_.store(true);
+    if (ws_) {
+        beast::error_code ec;
+        ws_->close(websocket::close_code::normal, ec);
+    }
+    if (audioInterface_) {
+        audioInterface_->stop();
+    }
+    if (workerThread_.joinable()) {
+        workerThread_.join();
+    }
+}
+
+std::string Conversation::waitForSessionEnd() {
+    if (workerThread_.joinable()) {
+        workerThread_.join();
+    }
+    return conversationId_;
+}
+
+void Conversation::sendUserMessage(const std::string& text) {
+    if (!ws_) {
+        throw std::runtime_error("Session not started");
+    }
+    nlohmann::json j = {
+        {"type", "user_message"},
+        {"text", text}
+    };
+    ws_->write(boost::asio::buffer(j.dump()));
+}
+
+void Conversation::registerUserActivity() {
+    if (!ws_) throw std::runtime_error("Session not started");
+    nlohmann::json j = {{"type", "user_activity"}};
+    ws_->write(boost::asio::buffer(j.dump()));
+}
+
+void Conversation::sendContextualUpdate(const std::string& content) {
+    if (!ws_) throw std::runtime_error("Session not started");
+    nlohmann::json j = {{"type", "contextual_update"}, {"content", content}};
+    ws_->write(boost::asio::buffer(j.dump()));
+}
+
+std::string Conversation::getWssUrl() const {
+    // Hard-coded base env for demo; in production you'd call ElevenLabs env endpoint.
+    std::ostringstream oss;
+    oss << "wss://api.elevenlabs.io/v1/convai/conversation?agent_id=" << agentId_;
+    return oss.str();
+}
+
+void Conversation::run() {
+    try {
+        auto url = getWssUrl();
+        std::string protocol, host, target;
+        unsigned short port = 443;
+
+        // Very naive parse: wss://host[:port]/path?query
+        if (boost::starts_with(url, "wss://")) {
+            protocol = "wss";
+            host = url.substr(6);
+        } else {
+            throw std::runtime_error("Only wss:// URLs supported in this demo");
+        }
+        auto slashPos = host.find('/');
+        if (slashPos == std::string::npos) {
+            target = "/";
+        } else {
+            target = host.substr(slashPos);
+            host = host.substr(0, slashPos);
+        }
+        auto colonPos = host.find(':');
+        if (colonPos != std::string::npos) {
+            port = static_cast<unsigned short>(std::stoi(host.substr(colonPos + 1)));
+            host = host.substr(0, colonPos);
+        }
+
+        tcp::resolver resolver(ioc_);
+        auto const results = resolver.resolve(host, std::to_string(port));
+
+        beast::ssl_stream<tcp::socket> stream(ioc_, sslCtx_);
+        boost::asio::connect(beast::get_lowest_layer(stream), results);
+        if (!SSL_set_tlsext_host_name(stream.native_handle(), host.c_str())) {
+            throw std::runtime_error("Failed to set SNI hostname on SSL stream");
+        }
+        stream.handshake(ssl::stream_base::client);
+
+        ws_ = std::make_unique<websocket_t>(std::move(stream));
+        ws_->set_option(websocket::stream_base::timeout::suggested(beast::role_type::client));
+        ws_->handshake(host, target);
+
+        // send initiation data
+        nlohmann::json init = {
+            {"type", "conversation_initiation_client_data"},
+            {"custom_llm_extra_body", nlohmann::json::object()},
+            {"conversation_config_override", nlohmann::json::object()},
+            {"dynamic_variables", nlohmann::json::object()}
+        };
+        ws_->write(boost::asio::buffer(init.dump()));
+
+        // Prepare audio callback
+        auto inputCb = [this](const std::vector<char>& audio) {
+            nlohmann::json msg = {
+                {"user_audio_chunk", base64Encode(audio)}
+            };
+            ws_->write(boost::asio::buffer(msg.dump()));
+        };
+        audioInterface_->start(inputCb);
+
+        beast::flat_buffer buffer;
+        while (!shouldStop_.load()) {
+            beast::error_code ec;
+            ws_->read(buffer, ec);
+            if (ec) {
+                std::cerr << "Websocket read error: " << ec.message() << std::endl;
+                break;
+            }
+            auto text = beast::buffers_to_string(buffer.data());
+            buffer.consume(buffer.size());
+            try {
+                auto message = nlohmann::json::parse(text);
+                handleMessage(message);
+            } catch (const std::exception& ex) {
+                std::cerr << "JSON parse error: " << ex.what() << std::endl;
+            }
+        }
+    } catch (const std::exception& ex) {
+        std::cerr << "Conversation error: " << ex.what() << std::endl;
+    }
+}
+
+void Conversation::handleMessage(const nlohmann::json& message) {
+    std::string type = message.value("type", "");
+    if (type == "conversation_initiation_metadata") {
+        conversationId_ = message["conversation_initiation_metadata_event"]["conversation_id"].get<std::string>();
+    } else if (type == "audio") {
+        auto event = message["audio_event"];
+        int eventId = std::stoi(toString(event["event_id"]));
+        if (eventId <= lastInterruptId_.load()) return;
+        auto audioBytes = base64Decode(event["audio_base_64"].get<std::string>());
+        audioInterface_->output(audioBytes);
+    } else if (type == "agent_response" && callbackAgentResponse_) {
+        auto event = message["agent_response_event"];
+        callbackAgentResponse_(event["agent_response"].get<std::string>());
+    } else if (type == "agent_response_correction" && callbackAgentResponseCorrection_) {
+        auto event = message["agent_response_correction_event"];
+        callbackAgentResponseCorrection_(event["original_agent_response"].get<std::string>(),
+                                         event["corrected_agent_response"].get<std::string>());
+    } else if (type == "user_transcript" && callbackUserTranscript_) {
+        auto event = message["user_transcription_event"];
+        callbackUserTranscript_(event["user_transcript"].get<std::string>());
+    } else if (type == "interruption") {
+        auto event = message["interruption_event"];
+        lastInterruptId_.store(std::stoi(toString(event["event_id"])));
+        audioInterface_->interrupt();
+    } else if (type == "ping") {
+        auto event = message["ping_event"];
+        nlohmann::json pong = {{"type", "pong"}, {"event_id", event["event_id"]}};
+        ws_->write(boost::asio::buffer(pong.dump()));
+        if (callbackLatencyMeasurement_ && event.contains("ping_ms")) {
+            int latency = event["ping_ms"].is_number() ? event["ping_ms"].get<int>() : std::stoi(event["ping_ms"].get<std::string>());
+            callbackLatencyMeasurement_(latency);
+        }
+    }
+    // Note: client tool call handling omitted for brevity.
+} 
--- a/CPP/elevenlabs-convai-cpp-main/src/DefaultAudioInterface.cpp
+++ b/CPP/elevenlabs-convai-cpp-main/src/DefaultAudioInterface.cpp
@@ -0,0 +1,131 @@
+#include "DefaultAudioInterface.hpp"
+
+#include <cstring>
+#include <iostream>
+
+DefaultAudioInterface::DefaultAudioInterface() {
+    PaError err = Pa_Initialize();
+    if (err != paNoError) {
+        throw std::runtime_error("PortAudio initialization failed");
+    }
+}
+
+DefaultAudioInterface::~DefaultAudioInterface() {
+    if (!shouldStop_.load()) {
+        stop();
+    }
+    Pa_Terminate();
+}
+
+void DefaultAudioInterface::start(AudioCallback inputCallback) {
+    inputCallback_ = std::move(inputCallback);
+    PaStreamParameters inputParams;
+    std::memset(&inputParams, 0, sizeof(inputParams));
+    inputParams.channelCount = 1;
+    inputParams.device = Pa_GetDefaultInputDevice();
+    inputParams.sampleFormat = paInt16;
+    inputParams.suggestedLatency = Pa_GetDeviceInfo(inputParams.device)->defaultLowInputLatency;
+    inputParams.hostApiSpecificStreamInfo = nullptr;
+
+    PaStreamParameters outputParams;
+    std::memset(&outputParams, 0, sizeof(outputParams));
+    outputParams.channelCount = 1;
+    outputParams.device = Pa_GetDefaultOutputDevice();
+    outputParams.sampleFormat = paInt16;
+    outputParams.suggestedLatency = Pa_GetDeviceInfo(outputParams.device)->defaultLowOutputLatency;
+    outputParams.hostApiSpecificStreamInfo = nullptr;
+
+    PaError err = Pa_OpenStream(&inputStream_, &inputParams, nullptr, 16000, INPUT_FRAMES_PER_BUFFER, paClipOff,
+                                &DefaultAudioInterface::inputCallbackStatic, this);
+    if (err != paNoError) {
+        throw std::runtime_error("Failed to open input stream");
+    }
+
+    err = Pa_OpenStream(&outputStream_, nullptr, &outputParams, 16000, OUTPUT_FRAMES_PER_BUFFER, paClipOff, nullptr, nullptr);
+    if (err != paNoError) {
+        throw std::runtime_error("Failed to open output stream");
+    }
+
+    if ((err = Pa_StartStream(inputStream_)) != paNoError) {
+        throw std::runtime_error("Failed to start input stream");
+    }
+    if ((err = Pa_StartStream(outputStream_)) != paNoError) {
+        throw std::runtime_error("Failed to start output stream");
+    }
+
+    shouldStop_.store(false);
+    outputThread_ = std::thread(&DefaultAudioInterface::outputThreadFunc, this);
+}
+
+void DefaultAudioInterface::stop() {
+    shouldStop_.store(true);
+    queueCv_.notify_all();
+    if (outputThread_.joinable()) {
+        outputThread_.join();
+    }
+
+    if (inputStream_) {
+        Pa_StopStream(inputStream_);
+        Pa_CloseStream(inputStream_);
+        inputStream_ = nullptr;
+    }
+    if (outputStream_) {
+        Pa_StopStream(outputStream_);
+        Pa_CloseStream(outputStream_);
+        outputStream_ = nullptr;
+    }
+}
+
+void DefaultAudioInterface::output(const std::vector<char>& audio) {
+    {
+        std::lock_guard<std::mutex> lg(queueMutex_);
+        outputQueue_.emplace(audio);
+    }
+    queueCv_.notify_one();
+}
+
+void DefaultAudioInterface::interrupt() {
+    std::lock_guard<std::mutex> lg(queueMutex_);
+    std::queue<std::vector<char>> empty;
+    std::swap(outputQueue_, empty);
+}
+
+int DefaultAudioInterface::inputCallbackStatic(const void* input, void* /*output*/, unsigned long frameCount,
+                                               const PaStreamCallbackTimeInfo* /*timeInfo*/, PaStreamCallbackFlags /*statusFlags*/,
+                                               void* userData) {
+    auto* self = static_cast<DefaultAudioInterface*>(userData);
+    return self->inputCallbackInternal(input, frameCount);
+}
+
+int DefaultAudioInterface::inputCallbackInternal(const void* input, unsigned long frameCount) {
+    if (!input || !inputCallback_) {
+        return paContinue;
+    }
+    if (outputPlaying_.load()) {
+        // Suppress microphone input while playing output to avoid echo feedback.
+        return paContinue;
+    }
+    const size_t bytes = frameCount * sizeof(int16_t);
+    std::vector<char> buffer(bytes);
+    std::memcpy(buffer.data(), input, bytes);
+    inputCallback_(buffer);
+    return paContinue;
+}
+
+void DefaultAudioInterface::outputThreadFunc() {
+    while (!shouldStop_.load()) {
+        std::vector<char> audio;
+        {
+            std::unique_lock<std::mutex> lk(queueMutex_);
+            queueCv_.wait(lk, [this] { return shouldStop_.load() || !outputQueue_.empty(); });
+            if (shouldStop_.load()) break;
+            audio = std::move(outputQueue_.front());
+            outputQueue_.pop();
+        }
+        if (!audio.empty() && outputStream_) {
+            outputPlaying_.store(true);
+            Pa_WriteStream(outputStream_, audio.data(), audio.size() / sizeof(int16_t));
+            outputPlaying_.store(false);
+        }
+    }
+} 
--- a/CPP/elevenlabs-convai-cpp-main/src/main.cpp
+++ b/CPP/elevenlabs-convai-cpp-main/src/main.cpp
@@ -0,0 +1,31 @@
+#include "Conversation.hpp"
+#include "DefaultAudioInterface.hpp"
+
+#include <cstdlib>
+#include <iostream>
+#include <memory>
+
+int main() {
+    const char* agentIdEnv = std::getenv("AGENT_ID");
+    if (!agentIdEnv) {
+        std::cerr << "AGENT_ID environment variable must be set" << std::endl;
+        return 1;
+    }
+    std::string agentId(agentIdEnv);
+
+    auto audioInterface = std::make_shared<DefaultAudioInterface>();
+    Conversation conv(agentId, /*requiresAuth*/ false, audioInterface,
+                      [](const std::string& resp) { std::cout << "Agent: " << resp << std::endl; },
+                      [](const std::string& orig, const std::string& corrected) {
+                          std::cout << "Agent correction: " << orig << " -> " << corrected << std::endl; },
+                      [](const std::string& transcript) { std::cout << "User: " << transcript << std::endl; });
+
+    conv.startSession();
+
+    std::cout << "Press Enter to quit..." << std::endl;
+    std::cin.get();
+    conv.endSession();
+    auto convId = conv.waitForSessionEnd();
+    std::cout << "Conversation ID: " << convId << std::endl;
+    return 0;
+} 
--- a/Unreal/PS_AI_Agent/Content/test_AI_Actor.uasset
+++ b/Unreal/PS_AI_Agent/Content/test_AI_Actor.uasset
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsConversationalAgentComponent.cpp
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsConversationalAgentComponent.cpp
@@ -158,6 +158,8 @@ void UElevenLabsConversationalAgentComponent::StartConversation()
 			&UElevenLabsConversationalAgentComponent::HandleInterrupted);
 		WebSocketProxy->OnAgentResponseStarted.AddDynamic(this,
 			&UElevenLabsConversationalAgentComponent::HandleAgentResponseStarted);
+		WebSocketProxy->OnAgentResponsePart.AddDynamic(this,
+			&UElevenLabsConversationalAgentComponent::HandleAgentResponsePart);
 	}

 	// Pass configuration to the proxy before connecting.
@@ -266,7 +268,23 @@ void UElevenLabsConversationalAgentComponent::StopListening()
 	// Flush any partially-accumulated mic audio before signalling end-of-turn.
 	// This ensures the final words aren't discarded just because the last callback
 	// didn't push the buffer over the MicChunkMinBytes threshold.
-	if (MicAccumulationBuffer.Num() > 0 && WebSocketProxy && IsConnected())
+	//
+	// EXCEPT during collision avoidance: bAgentGenerating is already true when
+	// HandleAgentResponseStarted calls StopListening (it sets the flag before calling us).
+	// Flushing audio to a server that is mid-generation can cause it to re-enter
+	// "user speaking" state and stall waiting for more audio that never arrives,
+	// leaving both sides stuck — no audio for the collision response and no response
+	// for subsequent turns.
+	if (bAgentGenerating)
+	{
+		if (MicAccumulationBuffer.Num() > 0)
+		{
+			UE_LOG(LogElevenLabsAgent, Log,
+				TEXT("StopListening: discarding %d bytes of accumulated mic audio (collision — server is mid-generation)."),
+				MicAccumulationBuffer.Num());
+		}
+	}
+	else if (MicAccumulationBuffer.Num() > 0 && WebSocketProxy && IsConnected())
 	{
 		WebSocketProxy->SendAudioChunk(MicAccumulationBuffer);
 	}
@@ -423,7 +441,8 @@ void UElevenLabsConversationalAgentComponent::HandleAgentResponseStarted()
 {
 	// The server has started generating a response (first agent_chat_response_part).
 	// Set bAgentGenerating BEFORE StopListening so that any StartListening call
-	// triggered by the Blueprint's OnAgentStartedGenerating handler is blocked.
+	// triggered by the Blueprint's OnAgentStartedGenerating handler is blocked,
+	// and so that StopListening knows to skip the mic buffer flush (collision path).
 	bAgentGenerating = true;
 	bWaitingForAgentResponse = false;  // Server is generating — response timeout cancelled.

@@ -433,21 +452,37 @@ void UElevenLabsConversationalAgentComponent::HandleAgentResponseStarted()
 	if (bIsListening)
 	{
 		// Collision: server started generating Turn N's response while Turn M (M>N) mic was open.
-		// Log both turn indices so the timeline is unambiguous.
+		// The server's VAD detected a pause in the user's speech and started generating
+		// prematurely — the user hasn't finished speaking yet.
+		//
+		// Stop the mic WITHOUT flushing the accumulated audio buffer (see StopListening's
+		// bAgentGenerating guard). Flushing would send audio to a server that is mid-generation,
+		// causing it to re-enter "user speaking" state and stall — both sides stuck.
+		//
+		// Do NOT send an interrupt here — just let the server's response play out:
+		//   - If audio arrives → EnqueueAgentAudio sets bAgentSpeaking, response plays normally.
+		//   - If audio never arrives → generating timeout (10s) clears bAgentGenerating.
+		// Either way the state machine recovers and Blueprint can reopen the mic.
 		UE_LOG(LogElevenLabsAgent, Log,
 			TEXT("[T+%.2fs] [Turn %d → Turn %d collision] Agent generating Turn %d response — mic (Turn %d) was open, stopping. (%.2fs after turn end)"),
 			T, LastClosedTurnIndex, TurnIndex, LastClosedTurnIndex, TurnIndex, LatencyFromTurnEnd);
 		StopListening();
 	}
-	else
-	{
-		UE_LOG(LogElevenLabsAgent, Log,
-			TEXT("[T+%.2fs] [Turn %d] Agent generating. (%.2fs after turn end)"),
-			T, LastClosedTurnIndex, LatencyFromTurnEnd);
-	}
+
+	UE_LOG(LogElevenLabsAgent, Log,
+		TEXT("[T+%.2fs] [Turn %d] Agent generating. (%.2fs after turn end)"),
+		T, LastClosedTurnIndex, LatencyFromTurnEnd);
 	OnAgentStartedGenerating.Broadcast();
 }

+void UElevenLabsConversationalAgentComponent::HandleAgentResponsePart(const FString& PartialText)
+{
+	if (bEnableAgentPartialResponse)
+	{
+		OnAgentPartialResponse.Broadcast(PartialText);
+	}
+}
+
 // ─────────────────────────────────────────────────────────────────────────────
 // Audio playback
 // ─────────────────────────────────────────────────────────────────────────────
@@ -569,9 +604,15 @@ void UElevenLabsConversationalAgentComponent::OnMicrophoneDataCaptured(const TAr
 {
 	if (!IsConnected() || !bIsListening) return;

+	// Echo suppression: skip sending mic audio while the agent is speaking.
+	// This prevents the agent from hearing its own voice through the speakers,
+	// which would confuse the server's VAD and STT. Matches the approach used
+	// in the official ElevenLabs C++ SDK (outputPlaying_ flag).
+	if (bAgentSpeaking) return;
+
 	// Convert this callback's samples to int16 bytes and accumulate.
-	// WASAPI fires every ~5ms (158 bytes at 16kHz). ElevenLabs needs ≥100ms
-	// (3200 bytes) per chunk for reliable VAD and STT. We hold bytes here
+	// WASAPI fires every ~5ms (158 bytes at 16kHz). ElevenLabs needs ≥250ms
+	// (8000 bytes) per chunk for reliable VAD and STT. We hold bytes here
 	// until we have enough, then send the whole batch in one WebSocket frame.
 	TArray<uint8> PCMBytes = FloatPCMToInt16Bytes(FloatPCM);
 	MicAccumulationBuffer.Append(PCMBytes);
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsWebSocketProxy.cpp
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsWebSocketProxy.cpp
@@ -158,20 +158,6 @@ void UElevenLabsWebSocketProxy::SendUserTurnEnd()
 	// in a loop: part arrives → event → StopListening → SendUserTurnEnd → flag reset → part arrives → loop.
 	// The flag is only reset in SendUserTurnStart() at the beginning of a new user turn.

-	// Clear the interrupt-ignore flag if it was never cleared by an "interruption" server ack.
-	// The ElevenLabs server does not always send the "interruption" acknowledgement reliably.
-	// By the time the user has spoken a full new turn (seconds of audio), any in-flight content
-	// from the previously interrupted generation has long since arrived — it is safe to resume
-	// normal content processing so the server's response to this new turn is not silently discarded.
-	if (bIgnoreIncomingContent)
-	{
-		bIgnoreIncomingContent = false;
-		const double T = UserTurnEndTime - SessionStartTime;
-		UE_LOG(LogElevenLabsWS, Log,
-			TEXT("[T+%.2fs] Cleared interrupt-ignore flag at turn end (server 'interruption' ack was not received — resuming content processing)."),
-			T);
-	}
-
 	const double T = UserTurnEndTime - SessionStartTime;
 	UE_LOG(LogElevenLabsWS, Log, TEXT("[T+%.2fs] User turn ended — server VAD silence detection started (turn_timeout=1s)."), T);
 }
@@ -196,12 +182,7 @@ void UElevenLabsWebSocketProxy::SendInterrupt()
 {
 	if (!IsConnected()) return;

-	// Immediately start discarding in-flight audio and chat response parts from
-	// the generation we are about to interrupt. The server may still send several
-	// frames before it processes our interrupt. We stop ignoring once the server
-	// sends its "interruption" acknowledgement (HandleInterruption).
-	bIgnoreIncomingContent = true;
-	UE_LOG(LogElevenLabsWS, Log, TEXT("Sending interrupt — ignoring incoming content until server acks."));
+	UE_LOG(LogElevenLabsWS, Log, TEXT("Sending interrupt."));

 	TSharedPtr<FJsonObject> Msg = MakeShareable(new FJsonObject());
 	Msg->SetStringField(TEXT("type"), ElevenLabsMessageType::Interrupt);
@@ -400,7 +381,7 @@ void UElevenLabsWebSocketProxy::OnWsMessage(const FString& Message)
 	}
 	else if (MsgType == ElevenLabsMessageType::AgentChatResponsePart)
 	{
-		HandleAgentChatResponsePart();
+		HandleAgentChatResponsePart(Root);
 	}
 	else if (MsgType == ElevenLabsMessageType::AgentResponseCorrection)
 	{
@@ -467,17 +448,9 @@ void UElevenLabsWebSocketProxy::OnWsBinaryMessage(const void* Data, SIZE_T Size,
 		}

 		// Broadcast raw PCM bytes directly to the audio queue.
-		// Discard if we are waiting for an interruption ack (same logic as HandleAudioResponse).
 		TArray<uint8> PCMData = MoveTemp(BinaryFrameBuffer);
 		BinaryFrameBuffer.Reset();
-		if (!bIgnoreIncomingContent)
-		{
-			OnAudioReceived.Broadcast(PCMData);
-		}
-		else
-		{
-			UE_LOG(LogElevenLabsWS, Verbose, TEXT("Discarding binary audio frame (interrupt pending server ack)."));
-		}
+		OnAudioReceived.Broadcast(PCMData);
 	}
 }

@@ -507,15 +480,6 @@ void UElevenLabsWebSocketProxy::HandleConversationInitiation(const TSharedPtr<FJ

 void UElevenLabsWebSocketProxy::HandleAudioResponse(const TSharedPtr<FJsonObject>& Root)
 {
-	// Discard audio that belongs to an interrupted generation.
-	// The server may send several more audio frames after we sent "interrupt" —
-	// they must not restart the speaking state on the client side.
-	if (bIgnoreIncomingContent)
-	{
-		UE_LOG(LogElevenLabsWS, Verbose, TEXT("Discarding audio frame (interrupt pending server ack)."));
-		return;
-	}
-
 	// Expected structure:
 	// { "type": "audio",
 	//   "audio_event": { "audio_base_64": "<base64 PCM>", "event_id": 1 }
@@ -527,6 +491,17 @@ void UElevenLabsWebSocketProxy::HandleAudioResponse(const TSharedPtr<FJsonObject
 		return;
 	}

+	// Discard audio belonging to an interrupted generation (event_id approach).
+	// Matches the official ElevenLabs C++ and Python SDKs: only AUDIO is filtered
+	// by event_id — transcripts, agent_response, etc. are always processed.
+	int32 EventId = 0;
+	(*AudioEvent)->TryGetNumberField(TEXT("event_id"), EventId);
+	if (EventId > 0 && EventId <= LastInterruptEventId)
+	{
+		UE_LOG(LogElevenLabsWS, Verbose, TEXT("Discarding audio event_id=%d (interrupted at %d)."), EventId, LastInterruptEventId);
+		return;
+	}
+
 	FString Base64Audio;
 	if (!(*AudioEvent)->TryGetStringField(TEXT("audio_base_64"), Base64Audio))
 	{
@@ -569,16 +544,6 @@ void UElevenLabsWebSocketProxy::HandleTranscript(const TSharedPtr<FJsonObject>&

 void UElevenLabsWebSocketProxy::HandleAgentResponse(const TSharedPtr<FJsonObject>& Root)
 {
-	// ISSUE-19: discard agent_response that belongs to an interrupted generation.
-	// A stale agent_response from the cancelled turn would set bAgentResponseReceived=true
-	// on the component, allowing the silence-detection Tick to fire OnAgentStoppedSpeaking
-	// at the wrong time (no audio is currently playing for the new turn yet).
-	if (bIgnoreIncomingContent)
-	{
-		UE_LOG(LogElevenLabsWS, Verbose, TEXT("Discarding agent_response (interrupt pending server ack)."));
-		return;
-	}
-
 	// ISSUE-22: reset bAgentResponseStartedFired so OnAgentResponseStarted fires again on
 	// the next turn. In Server VAD mode SendUserTurnStart() is never called — it is the only
 	// other place that resets this flag — so without this reset, OnAgentResponseStarted fires
@@ -602,18 +567,8 @@ void UElevenLabsWebSocketProxy::HandleAgentResponse(const TSharedPtr<FJsonObject
 	OnAgentResponse.Broadcast(ResponseText);
 }

-void UElevenLabsWebSocketProxy::HandleAgentChatResponsePart()
+void UElevenLabsWebSocketProxy::HandleAgentChatResponsePart(const TSharedPtr<FJsonObject>& Root)
 {
-	// Ignore response parts that belong to a generation we have already interrupted.
-	// Without this guard, old parts arriving after SendInterrupt() would re-trigger
-	// OnAgentResponseStarted (bAgentResponseStartedFired was reset in SendUserTurnStart),
-	// causing the component to stop the newly-opened microphone — creating an infinite loop.
-	if (bIgnoreIncomingContent)
-	{
-		UE_LOG(LogElevenLabsWS, Verbose, TEXT("Discarding agent_chat_response_part (interrupt pending server ack)."));
-		return;
-	}
-
 	// agent_chat_response_part = the server is actively generating a response (LLM token stream).
 	// Fire OnAgentResponseStarted once per turn so the component can auto-stop the microphone
 	// if the Blueprint restarted listening before the server finished processing the previous turn.
@@ -628,15 +583,39 @@ void UElevenLabsWebSocketProxy::HandleAgentChatResponsePart()
 			T, LatencyFromTurnEnd);
 		OnAgentResponseStarted.Broadcast();
 	}
-	// Subsequent parts logged at Verbose only (can be dozens per response).
+
+	// Extract the streaming text fragment and broadcast it.
+	// API structure:
+	// { "type": "agent_chat_response_part",
+	//   "agent_chat_response_part_event": { "agent_response_part": "partial text" }
+	// }
+	const TSharedPtr<FJsonObject>* PartEvent = nullptr;
+	if (Root->TryGetObjectField(TEXT("agent_chat_response_part_event"), PartEvent) && PartEvent)
+	{
+		FString PartText;
+		if ((*PartEvent)->TryGetStringField(TEXT("agent_response_part"), PartText) && !PartText.IsEmpty())
+		{
+			OnAgentResponsePart.Broadcast(PartText);
+		}
+	}
 }

 void UElevenLabsWebSocketProxy::HandleInterruption(const TSharedPtr<FJsonObject>& Root)
 {
-	// Server has acknowledged the interruption — the old generation is fully stopped.
-	// Resume accepting incoming audio and chat response parts (for the next turn).
-	bIgnoreIncomingContent = false;
-	UE_LOG(LogElevenLabsWS, Log, TEXT("Agent interrupted (server ack received — resuming content processing)."));
+	// Extract the interrupt event_id so we can filter stale audio frames.
+	// { "type": "interruption", "interruption_event": { "event_id": 42 } }
+	const TSharedPtr<FJsonObject>* InterruptEvent = nullptr;
+	if (Root->TryGetObjectField(TEXT("interruption_event"), InterruptEvent) && InterruptEvent)
+	{
+		int32 EventId = 0;
+		(*InterruptEvent)->TryGetNumberField(TEXT("event_id"), EventId);
+		if (EventId > LastInterruptEventId)
+		{
+			LastInterruptEventId = EventId;
+		}
+	}
+
+	UE_LOG(LogElevenLabsWS, Log, TEXT("Agent interrupted (server ack, LastInterruptEventId=%d)."), LastInterruptEventId);
 	OnInterrupted.Broadcast();
 }

--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsConversationalAgentComponent.h
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsConversationalAgentComponent.h
@@ -43,6 +43,15 @@ DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentInterrupted);
 */
 DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentStartedGenerating);

+/**
+ * Fired for every agent_chat_response_part — streams the agent's text as the LLM
+ * generates it, token by token. Use this for real-time subtitles / text display.
+ * Each call provides the text fragment from that individual part (NOT accumulated).
+ * The final complete text is still available via OnAgentTextResponse (agent_response).
+ */
+DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnAgentPartialResponse,
+	const FString&, PartialText);
+
 /**
 * Fired when the server has not started generating a response within ResponseTimeoutSeconds
 * after the user stopped speaking (StopListening was called).
@@ -138,6 +147,15 @@ public:
 	UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|Events")
 	bool bEnableAgentTextResponse = true;

+	/**
+	 * Forward streaming text parts (agent_chat_response_part events) to the
+	 * OnAgentPartialResponse delegate. Each part is a text fragment as the LLM
+	 * generates it — use this for real-time subtitles that appear while the agent
+	 * speaks, instead of waiting for the full text (OnAgentTextResponse).
+	 */
+	UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|Events")
+	bool bEnableAgentPartialResponse = false;
+
 	/**
 	 * How many seconds to wait for the server to start generating a response
 	 * after the user stops speaking (StopListening) before firing OnAgentResponseTimeout.
@@ -168,6 +186,14 @@ public:
 	UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events")
 	FOnAgentTextResponse OnAgentTextResponse;

+	/**
+	 * Streaming text fragments as the LLM generates them.
+	 * Fires for every agent_chat_response_part — each call gives one text chunk.
+	 * Enable with bEnableAgentPartialResponse.
+	 */
+	UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events")
+	FOnAgentPartialResponse OnAgentPartialResponse;
+
 	UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events")
 	FOnAgentStartedSpeaking OnAgentStartedSpeaking;

@@ -285,6 +311,9 @@ private:
 	UFUNCTION()
 	void HandleAgentResponseStarted();

+	UFUNCTION()
+	void HandleAgentResponsePart(const FString& PartialText);
+
 	// ── Audio playback ────────────────────────────────────────────────────────
 	void InitAudioPlayback();
 	void EnqueueAgentAudio(const TArray<uint8>& PCMData);
@@ -371,5 +400,5 @@ private:
 	// ElevenLabs needs at least ~100ms (3200 bytes) per chunk for reliable VAD/STT.
 	// We accumulate here and only call SendAudioChunk once enough bytes are ready.
 	TArray<uint8> MicAccumulationBuffer;
-	static constexpr int32 MicChunkMinBytes = 3200; // 100ms @ 16kHz 16-bit mono
+	static constexpr int32 MicChunkMinBytes = 8000; // 250ms @ 16kHz 16-bit mono (4000 samples, matches ElevenLabs SDK recommendation)
 };
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsWebSocketProxy.h
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsWebSocketProxy.h
@@ -43,6 +43,11 @@ DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnElevenLabsInterrupted);
 */
 DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnElevenLabsAgentResponseStarted);

+/** Fired for every agent_chat_response_part — streams the LLM text as it is generated.
+ *  PartialText is the text fragment from this individual part (NOT accumulated). */
+DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnElevenLabsAgentResponsePart,
+	const FString&, PartialText);
+

 // ─────────────────────────────────────────────────────────────────────────────
 // WebSocket Proxy
@@ -94,6 +99,10 @@ public:
 	UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events")
 	FOnElevenLabsAgentResponseStarted OnAgentResponseStarted;

+	/** Fired for every agent_chat_response_part with the streaming text fragment. */
+	UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events")
+	FOnElevenLabsAgentResponsePart OnAgentResponsePart;
+
 	// ── Lifecycle ─────────────────────────────────────────────────────────────

 	/**
@@ -182,7 +191,7 @@ private:
 	void HandleAudioResponse(const TSharedPtr<FJsonObject>& Payload);
 	void HandleTranscript(const TSharedPtr<FJsonObject>& Payload);
 	void HandleAgentResponse(const TSharedPtr<FJsonObject>& Payload);
-	void HandleAgentChatResponsePart();
+	void HandleAgentChatResponsePart(const TSharedPtr<FJsonObject>& Payload);
 	void HandleInterruption(const TSharedPtr<FJsonObject>& Payload);
 	void HandlePing(const TSharedPtr<FJsonObject>& Payload);

@@ -217,11 +226,12 @@ private:
 	// Used to compute [T+Xs] session-relative timestamps in all log messages.
 	double SessionStartTime = 0.0;

-	// Set to true in SendInterrupt() so that in-flight audio frames and
-	// agent_chat_response_part messages from the interrupted generation are silently
-	// discarded instead of re-triggering the speaking/generating state.
-	// Cleared when the server sends its "interruption" acknowledgement.
-	bool bIgnoreIncomingContent = false;
+	// ── Interrupt filtering (event_id approach, matching official SDK) ────────
+	// When the server sends an "interruption" event it includes an event_id.
+	// Audio events whose event_id <= LastInterruptEventId belong to the cancelled
+	// generation and must be discarded. Only AUDIO is filtered — transcripts,
+	// agent_response, agent_chat_response_part etc. are always processed.
+	int32 LastInterruptEventId = 0;

 public:
 	// Set by UElevenLabsConversationalAgentComponent before calling Connect().