Compare commits

...

2 Commits

Author SHA1 Message Date
1c4dbfc402 Merge branch 'main' of ssh://git.polymorph.fr:5070/j.foucher/PS_AI_Agent 2026-03-03 09:30:44 +01:00
86b7d9744e Opus-compress mic audio on client→server relay path (~16x bandwidth reduction)
- Create OpusEncoder on ALL machines (was Authority-only) — clients now
  encode mic audio, server decodes it; server still encodes agent audio
- FeedExternalAudio / OnMicrophoneDataCaptured: Opus-encode accumulated
  PCM buffer before sending via ServerRelayMicAudio RPC on client path
  (~200 bytes/100ms instead of 3200 bytes = ~16 Kbits/s vs 256 Kbits/s)
- ServerRelayMicAudio_Implementation: auto-detect Opus (size < raw chunk)
  and decode back to PCM before forwarding to WebSocket
- Add public DecompressMicAudio() helper for clean API access from
  InteractionComponent relay without exposing private Opus members
- Graceful fallback: if Opus unavailable, raw PCM is sent/received as before

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 18:27:40 +01:00
3 changed files with 100 additions and 13 deletions

View File

@ -630,8 +630,27 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::FeedExternalAudio(const TArray<float>
// Route through relay (clients can't call Server RPCs on NPC actors).
if (auto* Relay = FindLocalRelayComponent())
{
// Opus-compress mic audio before sending over the network.
// 3200 bytes raw PCM → ~200 bytes Opus (~16x reduction).
if (OpusEncoder.IsValid())
{
uint32 CompressedSize = static_cast<uint32>(OpusWorkBuffer.Num());
OpusEncoder->Encode(MicAccumulationBuffer.GetData(),
MicAccumulationBuffer.Num(),
OpusWorkBuffer.GetData(), CompressedSize);
if (CompressedSize > 0)
{
TArray<uint8> Compressed;
Compressed.Append(OpusWorkBuffer.GetData(), CompressedSize);
Relay->ServerRelayMicAudio(GetOwner(), Compressed);
}
}
else
{
// Fallback: raw PCM (no Opus encoder available).
Relay->ServerRelayMicAudio(GetOwner(), MicAccumulationBuffer);
}
}
else
{
ServerSendMicAudio(MicAccumulationBuffer);
@ -641,6 +660,33 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::FeedExternalAudio(const TArray<float>
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Network audio helpers (used by InteractionComponent relay)
// ─────────────────────────────────────────────────────────────────────────────
bool UPS_AI_ConvAgent_ElevenLabsComponent::DecompressMicAudio(
const TArray<uint8>& CompressedData, TArray<uint8>& OutPCM) const
{
if (!OpusDecoder.IsValid() || CompressedData.Num() >= GetMicChunkMinBytes())
{
return false; // Not Opus-compressed or no decoder.
}
const uint32 MaxDecoded = 16000 * 2; // 1 sec of 16kHz 16-bit mono
OutPCM.SetNumUninitialized(MaxDecoded);
uint32 DecodedSize = MaxDecoded;
OpusDecoder->Decode(CompressedData.GetData(), CompressedData.Num(),
OutPCM.GetData(), DecodedSize);
if (DecodedSize == 0) return false;
OutPCM.SetNum(DecodedSize, EAllowShrinking::No);
return true;
}
int32 UPS_AI_ConvAgent_ElevenLabsComponent::GetMicChunkMinBytesPublic() const
{
return GetMicChunkMinBytes();
}
// ─────────────────────────────────────────────────────────────────────────────
// State queries
// ─────────────────────────────────────────────────────────────────────────────
@ -1313,10 +1359,27 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::OnMicrophoneDataCaptured(const TArray
else
{
// Route through relay (clients can't call Server RPCs on NPC actors).
// Opus-compress before sending — same logic as FeedExternalAudio.
if (auto* Relay = FindLocalRelayComponent())
{
if (OpusEncoder.IsValid())
{
uint32 CompressedSize = static_cast<uint32>(OpusWorkBuffer.Num());
OpusEncoder->Encode(MicAccumulationBuffer.GetData(),
MicAccumulationBuffer.Num(),
OpusWorkBuffer.GetData(), CompressedSize);
if (CompressedSize > 0)
{
TArray<uint8> Compressed;
Compressed.Append(OpusWorkBuffer.GetData(), CompressedSize);
Relay->ServerRelayMicAudio(GetOwner(), Compressed);
}
}
else
{
Relay->ServerRelayMicAudio(GetOwner(), MicAccumulationBuffer);
}
}
else
{
ServerSendMicAudio(MicAccumulationBuffer);
@ -1654,14 +1717,16 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::InitOpusCodec()
FVoiceModule& VoiceModule = FVoiceModule::Get();
const ENetRole Role = GetOwnerRole();
if (Role == ROLE_Authority)
{
// Encoder: on Authority it encodes agent audio for multicast to clients.
// On clients it encodes mic audio for relay to server (~16x compression).
OpusEncoder = VoiceModule.CreateVoiceEncoder(
PS_AI_ConvAgent_Audio_ElevenLabs::SampleRate,
PS_AI_ConvAgent_Audio_ElevenLabs::Channels,
EAudioEncodeHint::VoiceEncode_Voice);
}
// Decoder: on clients it decodes agent audio from multicast.
// On Authority it decodes mic audio arriving via relay from clients.
OpusDecoder = VoiceModule.CreateVoiceDecoder(
PS_AI_ConvAgent_Audio_ElevenLabs::SampleRate,
PS_AI_ConvAgent_Audio_ElevenLabs::Channels);

View File

@ -603,13 +603,25 @@ void UPS_AI_ConvAgent_InteractionComponent::ServerRelayEndConversation_Implement
}
void UPS_AI_ConvAgent_InteractionComponent::ServerRelayMicAudio_Implementation(
AActor* AgentActor, const TArray<uint8>& PCMBytes)
AActor* AgentActor, const TArray<uint8>& AudioBytes)
{
if (!AgentActor) return;
auto* Agent = AgentActor->FindComponentByClass<UPS_AI_ConvAgent_ElevenLabsComponent>();
if (!Agent) return;
Agent->ServerSendMicAudio_Implementation(PCMBytes);
// Clients Opus-encode mic audio before sending via relay (~200 bytes
// instead of 3200 bytes per 100ms chunk). Decode back to raw PCM here
// before forwarding to the WebSocket which expects uncompressed int16.
TArray<uint8> DecodedPCM;
if (Agent->DecompressMicAudio(AudioBytes, DecodedPCM))
{
Agent->ServerSendMicAudio_Implementation(DecodedPCM);
}
else
{
// Raw PCM fallback (no Opus or data is already uncompressed).
Agent->ServerSendMicAudio_Implementation(AudioBytes);
}
}
void UPS_AI_ConvAgent_InteractionComponent::ServerRelaySendText_Implementation(

View File

@ -460,6 +460,16 @@ public:
FActorComponentTickFunction* ThisTickFunction) override;
virtual void GetLifetimeReplicatedProps(TArray<FLifetimeProperty>& OutLifetimeProps) const override;
// ── Network audio helpers (used by InteractionComponent relay) ────────
/** Decompress Opus-encoded mic audio back to raw PCM.
* Returns true and fills OutPCM on success; returns false if no Opus
* decoder is available or the data doesn't look compressed. */
bool DecompressMicAudio(const TArray<uint8>& CompressedData, TArray<uint8>& OutPCM) const;
/** Minimum raw PCM bytes expected per mic chunk (used to detect Opus vs raw). */
int32 GetMicChunkMinBytesPublic() const;
private:
// ── Network OnRep handlers ───────────────────────────────────────────────
UFUNCTION()
@ -602,8 +612,8 @@ private:
int32 GetMicChunkMinBytes() const { return MicChunkDurationMs * 32; }
// ── Opus codec (network audio compression) ───────────────────────────────
TSharedPtr<IVoiceEncoder> OpusEncoder; // Server only
TSharedPtr<IVoiceDecoder> OpusDecoder; // All clients
TSharedPtr<IVoiceEncoder> OpusEncoder; // All: server encodes agent audio, clients encode mic audio
TSharedPtr<IVoiceDecoder> OpusDecoder; // All: clients decode agent audio, server decodes mic audio
TArray<uint8> OpusWorkBuffer; // Reusable scratch buffer for encode/decode
void InitOpusCodec();