From 86b7d9744e4f7b16634effac6eba10531ec4cf64 Mon Sep 17 00:00:00 2001 From: "j.foucher" Date: Mon, 2 Mar 2026 18:27:40 +0100 Subject: [PATCH] =?UTF-8?q?Opus-compress=20mic=20audio=20on=20client?= =?UTF-8?q?=E2=86=92server=20relay=20path=20(~16x=20bandwidth=20reduction)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Create OpusEncoder on ALL machines (was Authority-only) — clients now encode mic audio, server decodes it; server still encodes agent audio - FeedExternalAudio / OnMicrophoneDataCaptured: Opus-encode accumulated PCM buffer before sending via ServerRelayMicAudio RPC on client path (~200 bytes/100ms instead of 3200 bytes = ~16 Kbits/s vs 256 Kbits/s) - ServerRelayMicAudio_Implementation: auto-detect Opus (size < raw chunk) and decode back to PCM before forwarding to WebSocket - Add public DecompressMicAudio() helper for clean API access from InteractionComponent relay without exposing private Opus members - Graceful fallback: if Opus unavailable, raw PCM is sent/received as before Co-Authored-By: Claude Opus 4.6 --- .../PS_AI_ConvAgent_ElevenLabsComponent.cpp | 83 +++++++++++++++++-- .../PS_AI_ConvAgent_InteractionComponent.cpp | 16 +++- .../PS_AI_ConvAgent_ElevenLabsComponent.h | 14 +++- 3 files changed, 100 insertions(+), 13 deletions(-) diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp index 6001fb3..80b9a0e 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp @@ -630,7 +630,26 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::FeedExternalAudio(const TArray // Route through relay (clients can't call Server RPCs on NPC actors). if (auto* Relay = FindLocalRelayComponent()) { - Relay->ServerRelayMicAudio(GetOwner(), MicAccumulationBuffer); + // Opus-compress mic audio before sending over the network. + // 3200 bytes raw PCM → ~200 bytes Opus (~16x reduction). + if (OpusEncoder.IsValid()) + { + uint32 CompressedSize = static_cast(OpusWorkBuffer.Num()); + OpusEncoder->Encode(MicAccumulationBuffer.GetData(), + MicAccumulationBuffer.Num(), + OpusWorkBuffer.GetData(), CompressedSize); + if (CompressedSize > 0) + { + TArray Compressed; + Compressed.Append(OpusWorkBuffer.GetData(), CompressedSize); + Relay->ServerRelayMicAudio(GetOwner(), Compressed); + } + } + else + { + // Fallback: raw PCM (no Opus encoder available). + Relay->ServerRelayMicAudio(GetOwner(), MicAccumulationBuffer); + } } else { @@ -641,6 +660,33 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::FeedExternalAudio(const TArray } } +// ───────────────────────────────────────────────────────────────────────────── +// Network audio helpers (used by InteractionComponent relay) +// ───────────────────────────────────────────────────────────────────────────── +bool UPS_AI_ConvAgent_ElevenLabsComponent::DecompressMicAudio( + const TArray& CompressedData, TArray& OutPCM) const +{ + if (!OpusDecoder.IsValid() || CompressedData.Num() >= GetMicChunkMinBytes()) + { + return false; // Not Opus-compressed or no decoder. + } + + const uint32 MaxDecoded = 16000 * 2; // 1 sec of 16kHz 16-bit mono + OutPCM.SetNumUninitialized(MaxDecoded); + uint32 DecodedSize = MaxDecoded; + OpusDecoder->Decode(CompressedData.GetData(), CompressedData.Num(), + OutPCM.GetData(), DecodedSize); + + if (DecodedSize == 0) return false; + OutPCM.SetNum(DecodedSize, EAllowShrinking::No); + return true; +} + +int32 UPS_AI_ConvAgent_ElevenLabsComponent::GetMicChunkMinBytesPublic() const +{ + return GetMicChunkMinBytes(); +} + // ───────────────────────────────────────────────────────────────────────────── // State queries // ───────────────────────────────────────────────────────────────────────────── @@ -1313,9 +1359,26 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::OnMicrophoneDataCaptured(const TArray else { // Route through relay (clients can't call Server RPCs on NPC actors). + // Opus-compress before sending — same logic as FeedExternalAudio. if (auto* Relay = FindLocalRelayComponent()) { - Relay->ServerRelayMicAudio(GetOwner(), MicAccumulationBuffer); + if (OpusEncoder.IsValid()) + { + uint32 CompressedSize = static_cast(OpusWorkBuffer.Num()); + OpusEncoder->Encode(MicAccumulationBuffer.GetData(), + MicAccumulationBuffer.Num(), + OpusWorkBuffer.GetData(), CompressedSize); + if (CompressedSize > 0) + { + TArray Compressed; + Compressed.Append(OpusWorkBuffer.GetData(), CompressedSize); + Relay->ServerRelayMicAudio(GetOwner(), Compressed); + } + } + else + { + Relay->ServerRelayMicAudio(GetOwner(), MicAccumulationBuffer); + } } else { @@ -1654,14 +1717,16 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::InitOpusCodec() FVoiceModule& VoiceModule = FVoiceModule::Get(); const ENetRole Role = GetOwnerRole(); - if (Role == ROLE_Authority) - { - OpusEncoder = VoiceModule.CreateVoiceEncoder( - PS_AI_ConvAgent_Audio_ElevenLabs::SampleRate, - PS_AI_ConvAgent_Audio_ElevenLabs::Channels, - EAudioEncodeHint::VoiceEncode_Voice); - } + // Encoder: on Authority it encodes agent audio for multicast to clients. + // On clients it encodes mic audio for relay to server (~16x compression). + OpusEncoder = VoiceModule.CreateVoiceEncoder( + PS_AI_ConvAgent_Audio_ElevenLabs::SampleRate, + PS_AI_ConvAgent_Audio_ElevenLabs::Channels, + EAudioEncodeHint::VoiceEncode_Voice); + + // Decoder: on clients it decodes agent audio from multicast. + // On Authority it decodes mic audio arriving via relay from clients. OpusDecoder = VoiceModule.CreateVoiceDecoder( PS_AI_ConvAgent_Audio_ElevenLabs::SampleRate, PS_AI_ConvAgent_Audio_ElevenLabs::Channels); diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_InteractionComponent.cpp b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_InteractionComponent.cpp index b79c457..d1003fb 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_InteractionComponent.cpp +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_InteractionComponent.cpp @@ -603,13 +603,25 @@ void UPS_AI_ConvAgent_InteractionComponent::ServerRelayEndConversation_Implement } void UPS_AI_ConvAgent_InteractionComponent::ServerRelayMicAudio_Implementation( - AActor* AgentActor, const TArray& PCMBytes) + AActor* AgentActor, const TArray& AudioBytes) { if (!AgentActor) return; auto* Agent = AgentActor->FindComponentByClass(); if (!Agent) return; - Agent->ServerSendMicAudio_Implementation(PCMBytes); + // Clients Opus-encode mic audio before sending via relay (~200 bytes + // instead of 3200 bytes per 100ms chunk). Decode back to raw PCM here + // before forwarding to the WebSocket which expects uncompressed int16. + TArray DecodedPCM; + if (Agent->DecompressMicAudio(AudioBytes, DecodedPCM)) + { + Agent->ServerSendMicAudio_Implementation(DecodedPCM); + } + else + { + // Raw PCM fallback (no Opus or data is already uncompressed). + Agent->ServerSendMicAudio_Implementation(AudioBytes); + } } void UPS_AI_ConvAgent_InteractionComponent::ServerRelaySendText_Implementation( diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_ElevenLabsComponent.h b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_ElevenLabsComponent.h index c9e768a..0b02b74 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_ElevenLabsComponent.h +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_ElevenLabsComponent.h @@ -460,6 +460,16 @@ public: FActorComponentTickFunction* ThisTickFunction) override; virtual void GetLifetimeReplicatedProps(TArray& OutLifetimeProps) const override; + // ── Network audio helpers (used by InteractionComponent relay) ──────── + + /** Decompress Opus-encoded mic audio back to raw PCM. + * Returns true and fills OutPCM on success; returns false if no Opus + * decoder is available or the data doesn't look compressed. */ + bool DecompressMicAudio(const TArray& CompressedData, TArray& OutPCM) const; + + /** Minimum raw PCM bytes expected per mic chunk (used to detect Opus vs raw). */ + int32 GetMicChunkMinBytesPublic() const; + private: // ── Network OnRep handlers ─────────────────────────────────────────────── UFUNCTION() @@ -602,8 +612,8 @@ private: int32 GetMicChunkMinBytes() const { return MicChunkDurationMs * 32; } // ── Opus codec (network audio compression) ─────────────────────────────── - TSharedPtr OpusEncoder; // Server only - TSharedPtr OpusDecoder; // All clients + TSharedPtr OpusEncoder; // All: server encodes agent audio, clients encode mic audio + TSharedPtr OpusDecoder; // All: clients decode agent audio, server decodes mic audio TArray OpusWorkBuffer; // Reusable scratch buffer for encode/decode void InitOpusCodec();