15 changed files with 82 additions and 1085 deletions
--- a/CPP/elevenlabs-convai-cpp-main/.gitignore
+++ b/CPP/elevenlabs-convai-cpp-main/.gitignore
@ -1,152 +0,0 @@
 # Build directories
 build/
 cmake-build-*/
 out/
 # Compiled Object files
 *.slo
 *.lo
 *.o
 *.obj
 # Precompiled Headers
 *.gch
 *.pch
 # Compiled Dynamic libraries
 *.so
 *.dylib
 *.dll
 # Fortran module files
 *.mod
 *.smod
 # Compiled Static libraries
 *.lai
 *.la
 *.a
 *.lib
 # Executables
 *.exe
 *.out
 *.app
 convai_cpp
 # CMake
 CMakeCache.txt
 CMakeFiles/
 CMakeScripts/
 Testing/
 Makefile
 cmake_install.cmake
 install_manifest.txt
 compile_commands.json
 CTestTestfile.cmake
 _deps/
 # IDE files
 .vscode/
 .idea/
 *.swp
 *.swo
 *~
 # macOS
 .DS_Store
 .AppleDouble
 .LSOverride
 # Thumbnails
 ._*
 # Files that might appear in the root of a volume
 .DocumentRevisions-V100
 .fseventsd
 .Spotlight-V100
 .TemporaryItems
 .Trashes
 .VolumeIcon.icns
 .com.apple.timemachine.donotpresent
 # Directories potentially created on remote AFP share
 .AppleDB
 .AppleDesktop
 Network Trash Folder
 Temporary Items
 .apdisk
 # Windows
 Thumbs.db
 ehthumbs.db
 Desktop.ini
 $RECYCLE.BIN/
 *.cab
 *.msi
 *.msm
 *.msp
 *.lnk
 # Linux
 *~
 .fuse_hidden*
 .directory
 .Trash-*
 .nfs*
 # Logs
 *.log
 # Runtime data
 pids
 *.pid
 *.seed
 *.pid.lock
 # Coverage directory used by tools like istanbul
 coverage/
 # nyc test coverage
 .nyc_output
 # Dependency directories
 node_modules/
 # Optional npm cache directory
 .npm
 # Optional REPL history
 .node_repl_history
 # Output of 'npm pack'
 *.tgz
 # Yarn Integrity file
 .yarn-integrity
 # dotenv environment variables file
 .env
 .env.test
 # parcel-bundler cache (https://parceljs.org/)
 .cache
 .parcel-cache
 # next.js build output
 .next
 # nuxt.js build output
 .nuxt
 # vuepress build output
 .vuepress/dist
 # Serverless directories
 .serverless
 # FuseBox cache
 .fusebox/
 # DynamoDB Local files
 .dynamodb/ 
--- a/CPP/elevenlabs-convai-cpp-main/CMakeLists.txt
+++ b/CPP/elevenlabs-convai-cpp-main/CMakeLists.txt
@ -1,42 +0,0 @@
 cmake_minimum_required(VERSION 3.14)
 project(elevenlabs_convai_cpp LANGUAGES CXX)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 # Find dependencies
 find_package(Boost REQUIRED COMPONENTS system thread)
 find_package(OpenSSL REQUIRED)
 # PortAudio via vcpkg CMake config
 find_package(portaudio CONFIG REQUIRED)
 # Find nlohmann_json
 find_package(nlohmann_json 3.11 QUIET)
 if(NOT nlohmann_json_FOUND)
    include(FetchContent)
    # Fallback: header-only fetch to avoid old CMake policies in upstream CMakeLists
    FetchContent_Declare(
      nlohmann_json_src
      URL https://raw.githubusercontent.com/nlohmann/json/v3.11.2/single_include/nlohmann/json.hpp
    )
    FetchContent_MakeAvailable(nlohmann_json_src)
    add_library(nlohmann_json::nlohmann_json INTERFACE IMPORTED)
    target_include_directories(nlohmann_json::nlohmann_json INTERFACE ${nlohmann_json_src_SOURCE_DIR}/single_include)
 endif()
 add_executable(convai_cpp
    src/main.cpp
    src/Conversation.cpp
    src/DefaultAudioInterface.cpp
 )
 target_include_directories(convai_cpp PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
 # MSVC: set Windows target version and suppress getenv deprecation warning
 if(MSVC)
    target_compile_definitions(convai_cpp PRIVATE _WIN32_WINNT=0x0A00 _CRT_SECURE_NO_WARNINGS)
 endif()
 target_link_libraries(convai_cpp PRIVATE Boost::system Boost::thread OpenSSL::SSL OpenSSL::Crypto portaudio nlohmann_json::nlohmann_json) 
--- a/CPP/elevenlabs-convai-cpp-main/LICENSE
+++ b/CPP/elevenlabs-convai-cpp-main/LICENSE
@ -1,21 +0,0 @@
 MIT License
 Copyright (c) 2024 Jitendra
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE. 
--- a/CPP/elevenlabs-convai-cpp-main/README.md
+++ b/CPP/elevenlabs-convai-cpp-main/README.md
@ -1,197 +0,0 @@
 # ElevenLabs Conversational AI - C++ Implementation
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![C++17](https://img.shields.io/badge/C%2B%2B-17-blue.svg)](https://en.wikipedia.org/wiki/C%2B%2B17)
 [![CMake](https://img.shields.io/badge/CMake-3.14+-green.svg)](https://cmake.org/)
 C++ implementation of ElevenLabs Conversational AI client
 ## Features
 - **Real-time Audio Processing**: Full-duplex audio streaming with low-latency playback
 - **WebSocket Integration**: Secure WSS connection to ElevenLabs Conversational AI platform
 - **Cross-platform Audio**: PortAudio-based implementation supporting Windows, macOS, and Linux
 - **Echo Suppression**: Built-in acoustic feedback prevention
 - **Modern C++**: Clean, maintainable C++17 codebase with proper RAII and exception handling
 - **Flexible Architecture**: Modular design allowing easy customization and extension
 ## Architecture
 ```mermaid
 graph TB
    subgraph "User Interface"
        A[main.cpp] --> B[Conversation]
    end
    subgraph "Core Components"
        B --> C[DefaultAudioInterface]
        B --> D[WebSocket Client]
        C --> E[PortAudio]
        D --> F[Boost.Beast + OpenSSL]
    end
    subgraph "ElevenLabs Platform"
        F --> G[WSS API Endpoint]
        G --> H[Conversational AI Agent]
    end
    subgraph "Audio Flow"
        I[Microphone] --> C
        C --> J[Base64 Encoding]
        J --> D
        D --> K[Audio Events]
        K --> L[Base64 Decoding]
        L --> C
        C --> M[Speakers]
    end
    subgraph "Message Types"
        N[user_audio_chunk]
        O[agent_response]
        P[user_transcript]
        Q[audio_event]
        R[ping/pong]
    end
    style B fill:#e1f5fe
    style C fill:#f3e5f5
    style D fill:#e8f5e8
    style H fill:#fff3e0
 ```
 ## Quick Start
 ### Prerequisites
 - **C++17 compatible compiler**: GCC 11+, Clang 14+, or MSVC 2022+
 - **CMake** 3.14 or higher
 - **Dependencies** (install via package manager):
 #### macOS (Homebrew)
 ```bash
 brew install boost openssl portaudio nlohmann-json cmake pkg-config
 ```
 #### Ubuntu/Debian
 ```bash
 sudo apt update
 sudo apt install build-essential cmake pkg-config
 sudo apt install libboost-system-dev libboost-thread-dev
 sudo apt install libssl-dev libportaudio2-dev nlohmann-json3-dev
 ```
 #### Windows (vcpkg)
 ```bash
 vcpkg install boost-system boost-thread openssl portaudio nlohmann-json
 ```
 ### Building
 ```bash
 # Clone the repository
 git clone https://github.com/Jitendra2603/elevenlabs-convai-cpp.git
 cd elevenlabs-convai-cpp
 # Build the project
 mkdir build && cd build
 cmake ..
 cmake --build . --config Release
 ```
 ### Running
 ```bash
 # Set your agent ID (get this from ElevenLabs dashboard)
 export AGENT_ID="your-agent-id-here"
 # Run the demo
 ./convai_cpp
 ```
 The application will:
 1. Connect to your ElevenLabs Conversational AI agent
 2. Start capturing audio from your default microphone
 3. Stream audio to the agent and play responses through speakers
 4. Display conversation transcripts in the terminal
 5. Continue until you press Enter to quit
 ## 📋 Usage Examples
 ### Basic Conversation
 ```bash
 export AGENT_ID="agent_"
 ./convai_cpp
 # Speak into your microphone and hear the AI agent respond
 ```
 ## Configuration
 ### Audio Settings
 The audio interface is configured for optimal real-time performance:
 - **Sample Rate**: 16 kHz 
 - **Format**: 16-bit PCM mono
 - **Input Buffer**: 250ms (4000 frames)
 - **Output Buffer**: 62.5ms (1000 frames)
 ### WebSocket Connection
 - **Endpoint**: `wss://api.elevenlabs.io/v1/convai/conversation`
 - **Protocol**: WebSocket Secure (WSS) with TLS 1.2+
 - **Authentication**: Optional (required for private agents)
 ## Project Structure
 ```
 elevenlabs-convai-cpp/
 ├── CMakeLists.txt              # Build configuration
 ├── README.md                   # This file
 ├── LICENSE                     # MIT license
 ├── CONTRIBUTING.md             # Contribution guidelines
 ├── .gitignore                  # Git ignore rules
 ├── include/                    # Header files
 │   ├── AudioInterface.hpp      # Abstract audio interface
 │   ├── DefaultAudioInterface.hpp # PortAudio implementation
 │   └── Conversation.hpp        # Main conversation handler
 └── src/                        # Source files
    ├── main.cpp                # Demo application
    ├── Conversation.cpp        # WebSocket and message handling
    └── DefaultAudioInterface.cpp # Audio I/O implementation
 ```
 ## Technical Details
 ### Audio Processing Pipeline
 1. **Capture**: PortAudio captures 16-bit PCM audio at 16kHz
 2. **Encoding**: Raw audio is base64-encoded for WebSocket transmission
 3. **Streaming**: Audio chunks sent as `user_audio_chunk` messages
 4. **Reception**: Server sends `audio_event` messages with agent responses
 5. **Decoding**: Base64 audio data decoded back to PCM
 6. **Playback**: Audio queued and played through PortAudio output stream
 ### Echo Suppression
 The implementation includes a simple, effective echo suppression mechanism:
 - Microphone input is suppressed during agent speech playback
 - Prevents acoustic feedback loops that cause the agent to respond to itself
 - Uses atomic flags for thread-safe coordination between input/output
 ### WebSocket Message Handling
 Supported message types:
 - `conversation_initiation_client_data` - Session initialization
 - `user_audio_chunk` - Microphone audio data
 - `audio_event` - Agent speech audio
 - `agent_response` - Agent text responses
 - `user_transcript` - Speech-to-text results
 - `ping`/`pong` - Connection keepalive
 ## 📝 License
 This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
--- a/CPP/elevenlabs-convai-cpp-main/include/AudioInterface.hpp
+++ b/CPP/elevenlabs-convai-cpp-main/include/AudioInterface.hpp
@ -1,23 +0,0 @@
 #pragma once
 #include <functional>
 #include <vector>
 class AudioInterface {
 public:
    using AudioCallback = std::function<void(const std::vector<char>&)>;
    virtual ~AudioInterface() = default;
    // Starts the audio interface. The callback will be invoked with raw 16-bit PCM mono samples at 16kHz.
    virtual void start(AudioCallback inputCallback) = 0;
    // Stops audio I/O and releases underlying resources.
    virtual void stop() = 0;
    // Play audio to the user; audio is 16-bit PCM mono 16kHz.
    virtual void output(const std::vector<char>& audio) = 0;
    // Immediately stop any buffered / ongoing output.
    virtual void interrupt() = 0;
 }; 
--- a/CPP/elevenlabs-convai-cpp-main/include/Conversation.hpp
+++ b/CPP/elevenlabs-convai-cpp-main/include/Conversation.hpp
@ -1,72 +0,0 @@
 #pragma once
 #include "AudioInterface.hpp"
 #include <boost/beast/core.hpp>
 #include <boost/beast/websocket.hpp>
 #include <boost/beast/ssl.hpp>
 #include <boost/asio.hpp>
 #include <boost/asio/ssl/stream.hpp>
 #include <boost/asio/ip/tcp.hpp>
 #include <nlohmann/json.hpp>
 #include <thread>
 #include <atomic>
 #include <functional>
 class Conversation {
 public:
    using CallbackAgentResponse = std::function<void(const std::string&)>;
    using CallbackAgentResponseCorrection = std::function<void(const std::string&, const std::string&)>;
    using CallbackUserTranscript = std::function<void(const std::string&)>;
    using CallbackLatencyMeasurement = std::function<void(int)>;
    Conversation(
        const std::string& agentId,
        bool requiresAuth,
        std::shared_ptr<AudioInterface> audioInterface,
        CallbackAgentResponse callbackAgentResponse = nullptr,
        CallbackAgentResponseCorrection callbackAgentResponseCorrection = nullptr,
        CallbackUserTranscript callbackUserTranscript = nullptr,
        CallbackLatencyMeasurement callbackLatencyMeasurement = nullptr
    );
    ~Conversation();
    void startSession();
    void endSession();
    std::string waitForSessionEnd();
    void sendUserMessage(const std::string& text);
    void registerUserActivity();
    void sendContextualUpdate(const std::string& content);
 private:
    void run();
    void handleMessage(const nlohmann::json& message);
    std::string getWssUrl() const;
    // networking members
    boost::asio::io_context ioc_;
    boost::asio::ssl::context sslCtx_{boost::asio::ssl::context::tlsv12_client};
    using tcp = boost::asio::ip::tcp;
    using websocket_t = boost::beast::websocket::stream<
        boost::beast::ssl_stream<tcp::socket>>;
    std::unique_ptr<websocket_t> ws_;
    // general state
    std::string agentId_;
    bool requiresAuth_;
    std::shared_ptr<AudioInterface> audioInterface_;
    CallbackAgentResponse callbackAgentResponse_;
    CallbackAgentResponseCorrection callbackAgentResponseCorrection_;
    CallbackUserTranscript callbackUserTranscript_;
    CallbackLatencyMeasurement callbackLatencyMeasurement_;
    std::thread workerThread_;
    std::atomic<bool> shouldStop_{false};
    std::string conversationId_;
    std::atomic<int> lastInterruptId_{0};
 }; 
--- a/CPP/elevenlabs-convai-cpp-main/include/DefaultAudioInterface.hpp
+++ b/CPP/elevenlabs-convai-cpp-main/include/DefaultAudioInterface.hpp
@ -1,45 +0,0 @@
 #pragma once
 #include "AudioInterface.hpp"
 #include <portaudio.h>
 #include <mutex>
 #include <condition_variable>
 #include <queue>
 #include <thread>
 #include <atomic>
 class DefaultAudioInterface : public AudioInterface {
 public:
    static constexpr int INPUT_FRAMES_PER_BUFFER = 4000;  // 250ms @ 16kHz
    static constexpr int OUTPUT_FRAMES_PER_BUFFER = 1000; // 62.5ms @ 16kHz
    DefaultAudioInterface();
    ~DefaultAudioInterface() override;
    void start(AudioCallback inputCallback) override;
    void stop() override;
    void output(const std::vector<char>& audio) override;
    void interrupt() override;
 private:
    static int inputCallbackStatic(const void* input, void* output, unsigned long frameCount,
                                   const PaStreamCallbackTimeInfo* timeInfo, PaStreamCallbackFlags statusFlags,
                                   void* userData);
    int inputCallbackInternal(const void* input, unsigned long frameCount);
    void outputThreadFunc();
    PaStream* inputStream_{};
    PaStream* outputStream_{};
    AudioCallback inputCallback_;
    std::queue<std::vector<char>> outputQueue_;
    std::mutex queueMutex_;
    std::condition_variable queueCv_;
    std::thread outputThread_;
    std::atomic<bool> shouldStop_{false};
    std::atomic<bool> outputPlaying_{false};
 }; 
--- a/CPP/elevenlabs-convai-cpp-main/src/Conversation.cpp
+++ b/CPP/elevenlabs-convai-cpp-main/src/Conversation.cpp
@ -1,230 +0,0 @@
 #include "Conversation.hpp"
 #include <boost/beast/websocket/ssl.hpp>
 #include <boost/beast/websocket.hpp>
 #include <boost/beast/ssl.hpp>
 #include <boost/beast/core/detail/base64.hpp>
 #include <boost/asio/connect.hpp>
 #include <boost/algorithm/string.hpp>
 #include <iostream>
 #include <sstream>
 #include <openssl/ssl.h>
 using tcp = boost::asio::ip::tcp;
 namespace ssl = boost::asio::ssl;
 namespace websocket = boost::beast::websocket;
 namespace beast = boost::beast;
 static std::string base64Encode(const std::vector<char>& data) {
    auto encodedSize = beast::detail::base64::encoded_size(data.size());
    std::string out(encodedSize, '\0');
    beast::detail::base64::encode(&out[0], data.data(), data.size());
    return out;
 }
 static std::vector<char> base64Decode(const std::string& str) {
    auto decodedSize = beast::detail::base64::decoded_size(str.size());
    std::vector<char> out(decodedSize);
    auto result = beast::detail::base64::decode(out.data(), str.data(), str.size());
    out.resize(result.first);
    return out;
 }
 static std::string toString(const nlohmann::json& j){
    if(j.is_string()) return j.get<std::string>();
    if(j.is_number_integer()) return std::to_string(j.get<int64_t>());
    return j.dump();
 }
 Conversation::Conversation(const std::string& agentId, bool requiresAuth,
                           std::shared_ptr<AudioInterface> audioInterface,
                           CallbackAgentResponse callbackAgentResponse,
                           CallbackAgentResponseCorrection callbackAgentResponseCorrection,
                           CallbackUserTranscript callbackUserTranscript,
                           CallbackLatencyMeasurement callbackLatencyMeasurement)
    : agentId_(agentId),
      requiresAuth_(requiresAuth),
      audioInterface_(std::move(audioInterface)),
      callbackAgentResponse_(std::move(callbackAgentResponse)),
      callbackAgentResponseCorrection_(std::move(callbackAgentResponseCorrection)),
      callbackUserTranscript_(std::move(callbackUserTranscript)),
      callbackLatencyMeasurement_(std::move(callbackLatencyMeasurement)) {
    sslCtx_.set_default_verify_paths();
 }
 Conversation::~Conversation() {
    endSession();
 }
 void Conversation::startSession() {
    shouldStop_.store(false);
    workerThread_ = std::thread(&Conversation::run, this);
 }
 void Conversation::endSession() {
    shouldStop_.store(true);
    if (ws_) {
        beast::error_code ec;
        ws_->close(websocket::close_code::normal, ec);
    }
    if (audioInterface_) {
        audioInterface_->stop();
    }
    if (workerThread_.joinable()) {
        workerThread_.join();
    }
 }
 std::string Conversation::waitForSessionEnd() {
    if (workerThread_.joinable()) {
        workerThread_.join();
    }
    return conversationId_;
 }
 void Conversation::sendUserMessage(const std::string& text) {
    if (!ws_) {
        throw std::runtime_error("Session not started");
    }
    nlohmann::json j = {
        {"type", "user_message"},
        {"text", text}
    };
    ws_->write(boost::asio::buffer(j.dump()));
 }
 void Conversation::registerUserActivity() {
    if (!ws_) throw std::runtime_error("Session not started");
    nlohmann::json j = {{"type", "user_activity"}};
    ws_->write(boost::asio::buffer(j.dump()));
 }
 void Conversation::sendContextualUpdate(const std::string& content) {
    if (!ws_) throw std::runtime_error("Session not started");
    nlohmann::json j = {{"type", "contextual_update"}, {"content", content}};
    ws_->write(boost::asio::buffer(j.dump()));
 }
 std::string Conversation::getWssUrl() const {
    // Hard-coded base env for demo; in production you'd call ElevenLabs env endpoint.
    std::ostringstream oss;
    oss << "wss://api.elevenlabs.io/v1/convai/conversation?agent_id=" << agentId_;
    return oss.str();
 }
 void Conversation::run() {
    try {
        auto url = getWssUrl();
        std::string protocol, host, target;
        unsigned short port = 443;
        // Very naive parse: wss://host[:port]/path?query
        if (boost::starts_with(url, "wss://")) {
            protocol = "wss";
            host = url.substr(6);
        } else {
            throw std::runtime_error("Only wss:// URLs supported in this demo");
        }
        auto slashPos = host.find('/');
        if (slashPos == std::string::npos) {
            target = "/";
        } else {
            target = host.substr(slashPos);
            host = host.substr(0, slashPos);
        }
        auto colonPos = host.find(':');
        if (colonPos != std::string::npos) {
            port = static_cast<unsigned short>(std::stoi(host.substr(colonPos + 1)));
            host = host.substr(0, colonPos);
        }
        tcp::resolver resolver(ioc_);
        auto const results = resolver.resolve(host, std::to_string(port));
        beast::ssl_stream<tcp::socket> stream(ioc_, sslCtx_);
        boost::asio::connect(beast::get_lowest_layer(stream), results);
        if (!SSL_set_tlsext_host_name(stream.native_handle(), host.c_str())) {
            throw std::runtime_error("Failed to set SNI hostname on SSL stream");
        }
        stream.handshake(ssl::stream_base::client);
        ws_ = std::make_unique<websocket_t>(std::move(stream));
        ws_->set_option(websocket::stream_base::timeout::suggested(beast::role_type::client));
        ws_->handshake(host, target);
        // send initiation data
        nlohmann::json init = {
            {"type", "conversation_initiation_client_data"},
            {"custom_llm_extra_body", nlohmann::json::object()},
            {"conversation_config_override", nlohmann::json::object()},
            {"dynamic_variables", nlohmann::json::object()}
        };
        ws_->write(boost::asio::buffer(init.dump()));
        // Prepare audio callback
        auto inputCb = [this](const std::vector<char>& audio) {
            nlohmann::json msg = {
                {"user_audio_chunk", base64Encode(audio)}
            };
            ws_->write(boost::asio::buffer(msg.dump()));
        };
        audioInterface_->start(inputCb);
        beast::flat_buffer buffer;
        while (!shouldStop_.load()) {
            beast::error_code ec;
            ws_->read(buffer, ec);
            if (ec) {
                std::cerr << "Websocket read error: " << ec.message() << std::endl;
                break;
            }
            auto text = beast::buffers_to_string(buffer.data());
            buffer.consume(buffer.size());
            try {
                auto message = nlohmann::json::parse(text);
                handleMessage(message);
            } catch (const std::exception& ex) {
                std::cerr << "JSON parse error: " << ex.what() << std::endl;
            }
        }
    } catch (const std::exception& ex) {
        std::cerr << "Conversation error: " << ex.what() << std::endl;
    }
 }
 void Conversation::handleMessage(const nlohmann::json& message) {
    std::string type = message.value("type", "");
    if (type == "conversation_initiation_metadata") {
        conversationId_ = message["conversation_initiation_metadata_event"]["conversation_id"].get<std::string>();
    } else if (type == "audio") {
        auto event = message["audio_event"];
        int eventId = std::stoi(toString(event["event_id"]));
        if (eventId <= lastInterruptId_.load()) return;
        auto audioBytes = base64Decode(event["audio_base_64"].get<std::string>());
        audioInterface_->output(audioBytes);
    } else if (type == "agent_response" && callbackAgentResponse_) {
        auto event = message["agent_response_event"];
        callbackAgentResponse_(event["agent_response"].get<std::string>());
    } else if (type == "agent_response_correction" && callbackAgentResponseCorrection_) {
        auto event = message["agent_response_correction_event"];
        callbackAgentResponseCorrection_(event["original_agent_response"].get<std::string>(),
                                         event["corrected_agent_response"].get<std::string>());
    } else if (type == "user_transcript" && callbackUserTranscript_) {
        auto event = message["user_transcription_event"];
        callbackUserTranscript_(event["user_transcript"].get<std::string>());
    } else if (type == "interruption") {
        auto event = message["interruption_event"];
        lastInterruptId_.store(std::stoi(toString(event["event_id"])));
        audioInterface_->interrupt();
    } else if (type == "ping") {
        auto event = message["ping_event"];
        nlohmann::json pong = {{"type", "pong"}, {"event_id", event["event_id"]}};
        ws_->write(boost::asio::buffer(pong.dump()));
        if (callbackLatencyMeasurement_ && event.contains("ping_ms")) {
            int latency = event["ping_ms"].is_number() ? event["ping_ms"].get<int>() : std::stoi(event["ping_ms"].get<std::string>());
            callbackLatencyMeasurement_(latency);
        }
    }
    // Note: client tool call handling omitted for brevity.
 } 
--- a/CPP/elevenlabs-convai-cpp-main/src/DefaultAudioInterface.cpp
+++ b/CPP/elevenlabs-convai-cpp-main/src/DefaultAudioInterface.cpp
@ -1,131 +0,0 @@
 #include "DefaultAudioInterface.hpp"
 #include <cstring>
 #include <iostream>
 DefaultAudioInterface::DefaultAudioInterface() {
    PaError err = Pa_Initialize();
    if (err != paNoError) {
        throw std::runtime_error("PortAudio initialization failed");
    }
 }
 DefaultAudioInterface::~DefaultAudioInterface() {
    if (!shouldStop_.load()) {
        stop();
    }
    Pa_Terminate();
 }
 void DefaultAudioInterface::start(AudioCallback inputCallback) {
    inputCallback_ = std::move(inputCallback);
    PaStreamParameters inputParams;
    std::memset(&inputParams, 0, sizeof(inputParams));
    inputParams.channelCount = 1;
    inputParams.device = Pa_GetDefaultInputDevice();
    inputParams.sampleFormat = paInt16;
    inputParams.suggestedLatency = Pa_GetDeviceInfo(inputParams.device)->defaultLowInputLatency;
    inputParams.hostApiSpecificStreamInfo = nullptr;
    PaStreamParameters outputParams;
    std::memset(&outputParams, 0, sizeof(outputParams));
    outputParams.channelCount = 1;
    outputParams.device = Pa_GetDefaultOutputDevice();
    outputParams.sampleFormat = paInt16;
    outputParams.suggestedLatency = Pa_GetDeviceInfo(outputParams.device)->defaultLowOutputLatency;
    outputParams.hostApiSpecificStreamInfo = nullptr;
    PaError err = Pa_OpenStream(&inputStream_, &inputParams, nullptr, 16000, INPUT_FRAMES_PER_BUFFER, paClipOff,
                                &DefaultAudioInterface::inputCallbackStatic, this);
    if (err != paNoError) {
        throw std::runtime_error("Failed to open input stream");
    }
    err = Pa_OpenStream(&outputStream_, nullptr, &outputParams, 16000, OUTPUT_FRAMES_PER_BUFFER, paClipOff, nullptr, nullptr);
    if (err != paNoError) {
        throw std::runtime_error("Failed to open output stream");
    }
    if ((err = Pa_StartStream(inputStream_)) != paNoError) {
        throw std::runtime_error("Failed to start input stream");
    }
    if ((err = Pa_StartStream(outputStream_)) != paNoError) {
        throw std::runtime_error("Failed to start output stream");
    }
    shouldStop_.store(false);
    outputThread_ = std::thread(&DefaultAudioInterface::outputThreadFunc, this);
 }
 void DefaultAudioInterface::stop() {
    shouldStop_.store(true);
    queueCv_.notify_all();
    if (outputThread_.joinable()) {
        outputThread_.join();
    }
    if (inputStream_) {
        Pa_StopStream(inputStream_);
        Pa_CloseStream(inputStream_);
        inputStream_ = nullptr;
    }
    if (outputStream_) {
        Pa_StopStream(outputStream_);
        Pa_CloseStream(outputStream_);
        outputStream_ = nullptr;
    }
 }
 void DefaultAudioInterface::output(const std::vector<char>& audio) {
    {
        std::lock_guard<std::mutex> lg(queueMutex_);
        outputQueue_.emplace(audio);
    }
    queueCv_.notify_one();
 }
 void DefaultAudioInterface::interrupt() {
    std::lock_guard<std::mutex> lg(queueMutex_);
    std::queue<std::vector<char>> empty;
    std::swap(outputQueue_, empty);
 }
 int DefaultAudioInterface::inputCallbackStatic(const void* input, void* /*output*/, unsigned long frameCount,
                                               const PaStreamCallbackTimeInfo* /*timeInfo*/, PaStreamCallbackFlags /*statusFlags*/,
                                               void* userData) {
    auto* self = static_cast<DefaultAudioInterface*>(userData);
    return self->inputCallbackInternal(input, frameCount);
 }
 int DefaultAudioInterface::inputCallbackInternal(const void* input, unsigned long frameCount) {
    if (!input || !inputCallback_) {
        return paContinue;
    }
    if (outputPlaying_.load()) {
        // Suppress microphone input while playing output to avoid echo feedback.
        return paContinue;
    }
    const size_t bytes = frameCount * sizeof(int16_t);
    std::vector<char> buffer(bytes);
    std::memcpy(buffer.data(), input, bytes);
    inputCallback_(buffer);
    return paContinue;
 }
 void DefaultAudioInterface::outputThreadFunc() {
    while (!shouldStop_.load()) {
        std::vector<char> audio;
        {
            std::unique_lock<std::mutex> lk(queueMutex_);
            queueCv_.wait(lk, [this] { return shouldStop_.load() || !outputQueue_.empty(); });
            if (shouldStop_.load()) break;
            audio = std::move(outputQueue_.front());
            outputQueue_.pop();
        }
        if (!audio.empty() && outputStream_) {
            outputPlaying_.store(true);
            Pa_WriteStream(outputStream_, audio.data(), audio.size() / sizeof(int16_t));
            outputPlaying_.store(false);
        }
    }
 } 
--- a/CPP/elevenlabs-convai-cpp-main/src/main.cpp
+++ b/CPP/elevenlabs-convai-cpp-main/src/main.cpp
@ -1,31 +0,0 @@
 #include "Conversation.hpp"
 #include "DefaultAudioInterface.hpp"
 #include <cstdlib>
 #include <iostream>
 #include <memory>
 int main() {
    const char* agentIdEnv = std::getenv("AGENT_ID");
    if (!agentIdEnv) {
        std::cerr << "AGENT_ID environment variable must be set" << std::endl;
        return 1;
    }
    std::string agentId(agentIdEnv);
    auto audioInterface = std::make_shared<DefaultAudioInterface>();
    Conversation conv(agentId, /*requiresAuth*/ false, audioInterface,
                      [](const std::string& resp) { std::cout << "Agent: " << resp << std::endl; },
                      [](const std::string& orig, const std::string& corrected) {
                          std::cout << "Agent correction: " << orig << " -> " << corrected << std::endl; },
                      [](const std::string& transcript) { std::cout << "User: " << transcript << std::endl; });
    conv.startSession();
    std::cout << "Press Enter to quit..." << std::endl;
    std::cin.get();
    conv.endSession();
    auto convId = conv.waitForSessionEnd();
    std::cout << "Conversation ID: " << convId << std::endl;
    return 0;
 } 
--- a/Unreal/PS_AI_Agent/Content/test_AI_Actor.uasset
+++ b/Unreal/PS_AI_Agent/Content/test_AI_Actor.uasset
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsConversationalAgentComponent.cpp
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsConversationalAgentComponent.cpp
@ -158,8 +158,6 @@ void UElevenLabsConversationalAgentComponent::StartConversation()
 			&UElevenLabsConversationalAgentComponent::HandleInterrupted);
 		WebSocketProxy->OnAgentResponseStarted.AddDynamic(this,
 			&UElevenLabsConversationalAgentComponent::HandleAgentResponseStarted);
 		WebSocketProxy->OnAgentResponsePart.AddDynamic(this,
 			&UElevenLabsConversationalAgentComponent::HandleAgentResponsePart);
 	}
 	// Pass configuration to the proxy before connecting.
@ -268,23 +266,7 @@ void UElevenLabsConversationalAgentComponent::StopListening()
 	// Flush any partially-accumulated mic audio before signalling end-of-turn.
 	// This ensures the final words aren't discarded just because the last callback
 	// didn't push the buffer over the MicChunkMinBytes threshold.
-	//
+	if (MicAccumulationBuffer.Num() > 0 && WebSocketProxy && IsConnected())
 	// EXCEPT during collision avoidance: bAgentGenerating is already true when
 	// HandleAgentResponseStarted calls StopListening (it sets the flag before calling us).
 	// Flushing audio to a server that is mid-generation can cause it to re-enter
 	// "user speaking" state and stall waiting for more audio that never arrives,
 	// leaving both sides stuck — no audio for the collision response and no response
 	// for subsequent turns.
 	if (bAgentGenerating)
 	{
 		if (MicAccumulationBuffer.Num() > 0)
 		{
 			UE_LOG(LogElevenLabsAgent, Log,
 				TEXT("StopListening: discarding %d bytes of accumulated mic audio (collision — server is mid-generation)."),
 				MicAccumulationBuffer.Num());
 		}
 	}
 	else if (MicAccumulationBuffer.Num() > 0 && WebSocketProxy && IsConnected())
 	{
 		WebSocketProxy->SendAudioChunk(MicAccumulationBuffer);
 	}
@ -441,8 +423,7 @@ void UElevenLabsConversationalAgentComponent::HandleAgentResponseStarted()
 {
 	// The server has started generating a response (first agent_chat_response_part).
 	// Set bAgentGenerating BEFORE StopListening so that any StartListening call
-	// triggered by the Blueprint's OnAgentStartedGenerating handler is blocked,
+	// triggered by the Blueprint's OnAgentStartedGenerating handler is blocked.
 	// and so that StopListening knows to skip the mic buffer flush (collision path).
 	bAgentGenerating = true;
 	bWaitingForAgentResponse = false;  // Server is generating — response timeout cancelled.
@ -452,37 +433,21 @@ void UElevenLabsConversationalAgentComponent::HandleAgentResponseStarted()
 	if (bIsListening)
 	{
 		// Collision: server started generating Turn N's response while Turn M (M>N) mic was open.
-		// The server's VAD detected a pause in the user's speech and started generating
+		// Log both turn indices so the timeline is unambiguous.
 		// prematurely — the user hasn't finished speaking yet.
 		//
 		// Stop the mic WITHOUT flushing the accumulated audio buffer (see StopListening's
 		// bAgentGenerating guard). Flushing would send audio to a server that is mid-generation,
 		// causing it to re-enter "user speaking" state and stall — both sides stuck.
 		//
 		// Do NOT send an interrupt here — just let the server's response play out:
 		//   - If audio arrives → EnqueueAgentAudio sets bAgentSpeaking, response plays normally.
 		//   - If audio never arrives → generating timeout (10s) clears bAgentGenerating.
 		// Either way the state machine recovers and Blueprint can reopen the mic.
 		UE_LOG(LogElevenLabsAgent, Log,
 			TEXT("[T+%.2fs] [Turn %d → Turn %d collision] Agent generating Turn %d response — mic (Turn %d) was open, stopping. (%.2fs after turn end)"),
 			T, LastClosedTurnIndex, TurnIndex, LastClosedTurnIndex, TurnIndex, LatencyFromTurnEnd);
 		StopListening();
 	}
-
+	else
 	{
 		UE_LOG(LogElevenLabsAgent, Log,
 			TEXT("[T+%.2fs] [Turn %d] Agent generating. (%.2fs after turn end)"),
 			T, LastClosedTurnIndex, LatencyFromTurnEnd);
 	}
 	OnAgentStartedGenerating.Broadcast();
 }
 void UElevenLabsConversationalAgentComponent::HandleAgentResponsePart(const FString& PartialText)
 {
 	if (bEnableAgentPartialResponse)
 	{
 		OnAgentPartialResponse.Broadcast(PartialText);
 	}
 }
 // ─────────────────────────────────────────────────────────────────────────────
 // Audio playback
 // ─────────────────────────────────────────────────────────────────────────────
@ -604,15 +569,9 @@ void UElevenLabsConversationalAgentComponent::OnMicrophoneDataCaptured(const TAr
 {
 	if (!IsConnected() || !bIsListening) return;
 	// Echo suppression: skip sending mic audio while the agent is speaking.
 	// This prevents the agent from hearing its own voice through the speakers,
 	// which would confuse the server's VAD and STT. Matches the approach used
 	// in the official ElevenLabs C++ SDK (outputPlaying_ flag).
 	if (bAgentSpeaking) return;
 	// Convert this callback's samples to int16 bytes and accumulate.
-	// WASAPI fires every ~5ms (158 bytes at 16kHz). ElevenLabs needs ≥250ms
+	// WASAPI fires every ~5ms (158 bytes at 16kHz). ElevenLabs needs ≥100ms
-	// (8000 bytes) per chunk for reliable VAD and STT. We hold bytes here
+	// (3200 bytes) per chunk for reliable VAD and STT. We hold bytes here
 	// until we have enough, then send the whole batch in one WebSocket frame.
 	TArray<uint8> PCMBytes = FloatPCMToInt16Bytes(FloatPCM);
 	MicAccumulationBuffer.Append(PCMBytes);
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsWebSocketProxy.cpp
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsWebSocketProxy.cpp
@ -158,6 +158,20 @@ void UElevenLabsWebSocketProxy::SendUserTurnEnd()
 	// in a loop: part arrives → event → StopListening → SendUserTurnEnd → flag reset → part arrives → loop.
 	// The flag is only reset in SendUserTurnStart() at the beginning of a new user turn.
 	// Clear the interrupt-ignore flag if it was never cleared by an "interruption" server ack.
 	// The ElevenLabs server does not always send the "interruption" acknowledgement reliably.
 	// By the time the user has spoken a full new turn (seconds of audio), any in-flight content
 	// from the previously interrupted generation has long since arrived — it is safe to resume
 	// normal content processing so the server's response to this new turn is not silently discarded.
 	if (bIgnoreIncomingContent)
 	{
 		bIgnoreIncomingContent = false;
 		const double T = UserTurnEndTime - SessionStartTime;
 		UE_LOG(LogElevenLabsWS, Log,
 			TEXT("[T+%.2fs] Cleared interrupt-ignore flag at turn end (server 'interruption' ack was not received — resuming content processing)."),
 			T);
 	}
 	const double T = UserTurnEndTime - SessionStartTime;
 	UE_LOG(LogElevenLabsWS, Log, TEXT("[T+%.2fs] User turn ended — server VAD silence detection started (turn_timeout=1s)."), T);
 }
@ -182,7 +196,12 @@ void UElevenLabsWebSocketProxy::SendInterrupt()
 {
 	if (!IsConnected()) return;
-	UE_LOG(LogElevenLabsWS, Log, TEXT("Sending interrupt."));
+	// Immediately start discarding in-flight audio and chat response parts from
 	// the generation we are about to interrupt. The server may still send several
 	// frames before it processes our interrupt. We stop ignoring once the server
 	// sends its "interruption" acknowledgement (HandleInterruption).
 	bIgnoreIncomingContent = true;
 	UE_LOG(LogElevenLabsWS, Log, TEXT("Sending interrupt — ignoring incoming content until server acks."));
 	TSharedPtr<FJsonObject> Msg = MakeShareable(new FJsonObject());
 	Msg->SetStringField(TEXT("type"), ElevenLabsMessageType::Interrupt);
@ -381,7 +400,7 @@ void UElevenLabsWebSocketProxy::OnWsMessage(const FString& Message)
 	}
 	else if (MsgType == ElevenLabsMessageType::AgentChatResponsePart)
 	{
-		HandleAgentChatResponsePart(Root);
+		HandleAgentChatResponsePart();
 	}
 	else if (MsgType == ElevenLabsMessageType::AgentResponseCorrection)
 	{
@ -448,10 +467,18 @@ void UElevenLabsWebSocketProxy::OnWsBinaryMessage(const void* Data, SIZE_T Size,
 		}
 		// Broadcast raw PCM bytes directly to the audio queue.
 		// Discard if we are waiting for an interruption ack (same logic as HandleAudioResponse).
 		TArray<uint8> PCMData = MoveTemp(BinaryFrameBuffer);
 		BinaryFrameBuffer.Reset();
 		if (!bIgnoreIncomingContent)
 		{
 			OnAudioReceived.Broadcast(PCMData);
 		}
 		else
 		{
 			UE_LOG(LogElevenLabsWS, Verbose, TEXT("Discarding binary audio frame (interrupt pending server ack)."));
 		}
 	}
 }
 // ─────────────────────────────────────────────────────────────────────────────
@ -480,6 +507,15 @@ void UElevenLabsWebSocketProxy::HandleConversationInitiation(const TSharedPtr<FJ
 void UElevenLabsWebSocketProxy::HandleAudioResponse(const TSharedPtr<FJsonObject>& Root)
 {
 	// Discard audio that belongs to an interrupted generation.
 	// The server may send several more audio frames after we sent "interrupt" —
 	// they must not restart the speaking state on the client side.
 	if (bIgnoreIncomingContent)
 	{
 		UE_LOG(LogElevenLabsWS, Verbose, TEXT("Discarding audio frame (interrupt pending server ack)."));
 		return;
 	}
 	// Expected structure:
 	// { "type": "audio",
 	//   "audio_event": { "audio_base_64": "<base64 PCM>", "event_id": 1 }
@ -491,17 +527,6 @@ void UElevenLabsWebSocketProxy::HandleAudioResponse(const TSharedPtr<FJsonObject
 		return;
 	}
 	// Discard audio belonging to an interrupted generation (event_id approach).
 	// Matches the official ElevenLabs C++ and Python SDKs: only AUDIO is filtered
 	// by event_id — transcripts, agent_response, etc. are always processed.
 	int32 EventId = 0;
 	(*AudioEvent)->TryGetNumberField(TEXT("event_id"), EventId);
 	if (EventId > 0 && EventId <= LastInterruptEventId)
 	{
 		UE_LOG(LogElevenLabsWS, Verbose, TEXT("Discarding audio event_id=%d (interrupted at %d)."), EventId, LastInterruptEventId);
 		return;
 	}
 	FString Base64Audio;
 	if (!(*AudioEvent)->TryGetStringField(TEXT("audio_base_64"), Base64Audio))
 	{
@ -544,6 +569,16 @@ void UElevenLabsWebSocketProxy::HandleTranscript(const TSharedPtr<FJsonObject>&
 void UElevenLabsWebSocketProxy::HandleAgentResponse(const TSharedPtr<FJsonObject>& Root)
 {
 	// ISSUE-19: discard agent_response that belongs to an interrupted generation.
 	// A stale agent_response from the cancelled turn would set bAgentResponseReceived=true
 	// on the component, allowing the silence-detection Tick to fire OnAgentStoppedSpeaking
 	// at the wrong time (no audio is currently playing for the new turn yet).
 	if (bIgnoreIncomingContent)
 	{
 		UE_LOG(LogElevenLabsWS, Verbose, TEXT("Discarding agent_response (interrupt pending server ack)."));
 		return;
 	}
 	// ISSUE-22: reset bAgentResponseStartedFired so OnAgentResponseStarted fires again on
 	// the next turn. In Server VAD mode SendUserTurnStart() is never called — it is the only
 	// other place that resets this flag — so without this reset, OnAgentResponseStarted fires
@ -567,8 +602,18 @@ void UElevenLabsWebSocketProxy::HandleAgentResponse(const TSharedPtr<FJsonObject
 	OnAgentResponse.Broadcast(ResponseText);
 }
-void UElevenLabsWebSocketProxy::HandleAgentChatResponsePart(const TSharedPtr<FJsonObject>& Root)
+void UElevenLabsWebSocketProxy::HandleAgentChatResponsePart()
 {
 	// Ignore response parts that belong to a generation we have already interrupted.
 	// Without this guard, old parts arriving after SendInterrupt() would re-trigger
 	// OnAgentResponseStarted (bAgentResponseStartedFired was reset in SendUserTurnStart),
 	// causing the component to stop the newly-opened microphone — creating an infinite loop.
 	if (bIgnoreIncomingContent)
 	{
 		UE_LOG(LogElevenLabsWS, Verbose, TEXT("Discarding agent_chat_response_part (interrupt pending server ack)."));
 		return;
 	}
 	// agent_chat_response_part = the server is actively generating a response (LLM token stream).
 	// Fire OnAgentResponseStarted once per turn so the component can auto-stop the microphone
 	// if the Blueprint restarted listening before the server finished processing the previous turn.
@ -583,39 +628,15 @@ void UElevenLabsWebSocketProxy::HandleAgentChatResponsePart(const TSharedPtr<FJs
 			T, LatencyFromTurnEnd);
 		OnAgentResponseStarted.Broadcast();
 	}
-
+	// Subsequent parts logged at Verbose only (can be dozens per response).
 	// Extract the streaming text fragment and broadcast it.
 	// API structure:
 	// { "type": "agent_chat_response_part",
 	//   "agent_chat_response_part_event": { "agent_response_part": "partial text" }
 	// }
 	const TSharedPtr<FJsonObject>* PartEvent = nullptr;
 	if (Root->TryGetObjectField(TEXT("agent_chat_response_part_event"), PartEvent) && PartEvent)
 	{
 		FString PartText;
 		if ((*PartEvent)->TryGetStringField(TEXT("agent_response_part"), PartText) && !PartText.IsEmpty())
 		{
 			OnAgentResponsePart.Broadcast(PartText);
 		}
 	}
 }
 void UElevenLabsWebSocketProxy::HandleInterruption(const TSharedPtr<FJsonObject>& Root)
 {
-	// Extract the interrupt event_id so we can filter stale audio frames.
+	// Server has acknowledged the interruption — the old generation is fully stopped.
-	// { "type": "interruption", "interruption_event": { "event_id": 42 } }
+	// Resume accepting incoming audio and chat response parts (for the next turn).
-	const TSharedPtr<FJsonObject>* InterruptEvent = nullptr;
+	bIgnoreIncomingContent = false;
-	if (Root->TryGetObjectField(TEXT("interruption_event"), InterruptEvent) && InterruptEvent)
+	UE_LOG(LogElevenLabsWS, Log, TEXT("Agent interrupted (server ack received — resuming content processing)."));
 	{
 		int32 EventId = 0;
 		(*InterruptEvent)->TryGetNumberField(TEXT("event_id"), EventId);
 		if (EventId > LastInterruptEventId)
 		{
 			LastInterruptEventId = EventId;
 		}
 	}
 	UE_LOG(LogElevenLabsWS, Log, TEXT("Agent interrupted (server ack, LastInterruptEventId=%d)."), LastInterruptEventId);
 	OnInterrupted.Broadcast();
 }
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsConversationalAgentComponent.h
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsConversationalAgentComponent.h
@ -43,15 +43,6 @@ DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentInterrupted);
 */
 DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentStartedGenerating);
 /**
 * Fired for every agent_chat_response_part — streams the agent's text as the LLM
 * generates it, token by token. Use this for real-time subtitles / text display.
 * Each call provides the text fragment from that individual part (NOT accumulated).
 * The final complete text is still available via OnAgentTextResponse (agent_response).
 */
 DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnAgentPartialResponse,
 	const FString&, PartialText);
 /**
 * Fired when the server has not started generating a response within ResponseTimeoutSeconds
 * after the user stopped speaking (StopListening was called).
@ -147,15 +138,6 @@ public:
 	UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|Events")
 	bool bEnableAgentTextResponse = true;
 	/**
 	 * Forward streaming text parts (agent_chat_response_part events) to the
 	 * OnAgentPartialResponse delegate. Each part is a text fragment as the LLM
 	 * generates it — use this for real-time subtitles that appear while the agent
 	 * speaks, instead of waiting for the full text (OnAgentTextResponse).
 	 */
 	UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|Events")
 	bool bEnableAgentPartialResponse = false;
 	/**
 	 * How many seconds to wait for the server to start generating a response
 	 * after the user stops speaking (StopListening) before firing OnAgentResponseTimeout.
@ -186,14 +168,6 @@ public:
 	UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events")
 	FOnAgentTextResponse OnAgentTextResponse;
 	/**
 	 * Streaming text fragments as the LLM generates them.
 	 * Fires for every agent_chat_response_part — each call gives one text chunk.
 	 * Enable with bEnableAgentPartialResponse.
 	 */
 	UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events")
 	FOnAgentPartialResponse OnAgentPartialResponse;
 	UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events")
 	FOnAgentStartedSpeaking OnAgentStartedSpeaking;
@ -311,9 +285,6 @@ private:
 	UFUNCTION()
 	void HandleAgentResponseStarted();
 	UFUNCTION()
 	void HandleAgentResponsePart(const FString& PartialText);
 	// ── Audio playback ────────────────────────────────────────────────────────
 	void InitAudioPlayback();
 	void EnqueueAgentAudio(const TArray<uint8>& PCMData);
@ -400,5 +371,5 @@ private:
 	// ElevenLabs needs at least ~100ms (3200 bytes) per chunk for reliable VAD/STT.
 	// We accumulate here and only call SendAudioChunk once enough bytes are ready.
 	TArray<uint8> MicAccumulationBuffer;
-	static constexpr int32 MicChunkMinBytes = 8000; // 250ms @ 16kHz 16-bit mono (4000 samples, matches ElevenLabs SDK recommendation)
+	static constexpr int32 MicChunkMinBytes = 3200; // 100ms @ 16kHz 16-bit mono
 };
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsWebSocketProxy.h
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsWebSocketProxy.h
@ -43,11 +43,6 @@ DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnElevenLabsInterrupted);
 */
 DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnElevenLabsAgentResponseStarted);
 /** Fired for every agent_chat_response_part — streams the LLM text as it is generated.
 *  PartialText is the text fragment from this individual part (NOT accumulated). */
 DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnElevenLabsAgentResponsePart,
 	const FString&, PartialText);
 // ─────────────────────────────────────────────────────────────────────────────
 // WebSocket Proxy
@ -99,10 +94,6 @@ public:
 	UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events")
 	FOnElevenLabsAgentResponseStarted OnAgentResponseStarted;
 	/** Fired for every agent_chat_response_part with the streaming text fragment. */
 	UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events")
 	FOnElevenLabsAgentResponsePart OnAgentResponsePart;
 	// ── Lifecycle ─────────────────────────────────────────────────────────────
 	/**
@ -191,7 +182,7 @@ private:
 	void HandleAudioResponse(const TSharedPtr<FJsonObject>& Payload);
 	void HandleTranscript(const TSharedPtr<FJsonObject>& Payload);
 	void HandleAgentResponse(const TSharedPtr<FJsonObject>& Payload);
-	void HandleAgentChatResponsePart(const TSharedPtr<FJsonObject>& Payload);
+	void HandleAgentChatResponsePart();
 	void HandleInterruption(const TSharedPtr<FJsonObject>& Payload);
 	void HandlePing(const TSharedPtr<FJsonObject>& Payload);
@ -226,12 +217,11 @@ private:
 	// Used to compute [T+Xs] session-relative timestamps in all log messages.
 	double SessionStartTime = 0.0;
-	// ── Interrupt filtering (event_id approach, matching official SDK) ────────
+	// Set to true in SendInterrupt() so that in-flight audio frames and
-	// When the server sends an "interruption" event it includes an event_id.
+	// agent_chat_response_part messages from the interrupted generation are silently
-	// Audio events whose event_id <= LastInterruptEventId belong to the cancelled
+	// discarded instead of re-triggering the speaking/generating state.
-	// generation and must be discarded. Only AUDIO is filtered — transcripts,
+	// Cleared when the server sends its "interruption" acknowledgement.
-	// agent_response, agent_chat_response_part etc. are always processed.
+	bool bIgnoreIncomingContent = false;
 	int32 LastInterruptEventId = 0;
 public:
 	// Set by UElevenLabsConversationalAgentComponent before calling Connect().