Fix network replication: head/eyes tracking, facial expressions, lip sync and audio on remote clients

- OnRep_ConversationState now sets PostureComponent TargetActor from
  replicated NetConversatingPlayer so remote clients see head/eyes tracking
- Activate FacialExpressionComponent and LipSyncComponent on remote clients
  (OnAgentConnected never fires on clients since WebSocket is server-only)
- Fix audio race condition: MulticastAgentStartedSpeaking no longer sets
  bAgentSpeaking prematurely, letting EnqueueAgentAudio handle the full
  first-chunk initialization (pre-buffer, Play(), state reset)
- Add diagnostic logging to MulticastReceiveAgentAudio for silent failures
  (OpusDecoder invalid, LOD culling, decode failure)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
j.foucher 2026-03-02 14:17:49 +01:00
parent 9ee7960855
commit 11255db576

View File

@ -4,6 +4,8 @@
#include "PS_AI_ConvAgent_AgentConfig_ElevenLabs.h"
#include "PS_AI_ConvAgent_MicrophoneCaptureComponent.h"
#include "PS_AI_ConvAgent_PostureComponent.h"
#include "PS_AI_ConvAgent_FacialExpressionComponent.h"
#include "PS_AI_ConvAgent_LipSyncComponent.h"
#include "PS_AI_ConvAgent_InteractionSubsystem.h"
#include "PS_AI_ConvAgent.h"
@ -1171,6 +1173,46 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::GetLifetimeReplicatedProps(
void UPS_AI_ConvAgent_ElevenLabsComponent::OnRep_ConversationState()
{
AActor* Owner = GetOwner();
if (Owner)
{
// Update posture target on all clients so the NPC head/eyes track the
// conversating player. TargetActor is normally set by InteractionComponent
// on the local pawn, but remote clients never run that code path.
if (UPS_AI_ConvAgent_PostureComponent* Posture = Owner->FindComponentByClass<UPS_AI_ConvAgent_PostureComponent>())
{
if (bNetIsConversing && NetConversatingPlayer)
{
if (APawn* PlayerPawn = NetConversatingPlayer->GetPawn())
{
Posture->TargetActor = PlayerPawn;
Posture->ResetBodyTarget();
Posture->bEnableBodyTracking = true;
}
}
else
{
Posture->TargetActor = nullptr;
Posture->bEnableBodyTracking = false;
}
}
// Activate/deactivate facial expressions and lip sync for remote clients.
// On the server, this is handled by OnAgentConnected/OnAgentDisconnected,
// but those events never fire on clients (no WebSocket connection).
if (UPS_AI_ConvAgent_FacialExpressionComponent* FacialExpr =
Owner->FindComponentByClass<UPS_AI_ConvAgent_FacialExpressionComponent>())
{
FacialExpr->bActive = bNetIsConversing;
}
if (UPS_AI_ConvAgent_LipSyncComponent* LipSync =
Owner->FindComponentByClass<UPS_AI_ConvAgent_LipSyncComponent>())
{
LipSync->bActive = bNetIsConversing;
}
}
if (!bNetIsConversing)
{
// Conversation ended on server — clean up local playback.
@ -1280,12 +1322,28 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::MulticastReceiveAgentAudio_Implementa
{
// Server already handled playback in HandleAudioReceived.
if (GetOwnerRole() == ROLE_Authority) return;
if (!OpusDecoder.IsValid()) return;
if (!OpusDecoder.IsValid())
{
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning,
TEXT("[NET] MulticastReceiveAgentAudio: OpusDecoder is INVALID — audio dropped. FVoiceModule available: %s"),
FVoiceModule::IsAvailable() ? TEXT("YES") : TEXT("NO"));
return;
}
// LOD: skip audio if too far (unless this client is the speaker).
const float Dist = GetDistanceToLocalPlayer();
const bool bIsSpeaker = IsLocalPlayerConversating();
if (!bIsSpeaker && AudioLODCullDistance > 0.f && Dist > AudioLODCullDistance) return;
if (!bIsSpeaker && AudioLODCullDistance > 0.f && Dist > AudioLODCullDistance)
{
if (bDebug && DebugVerbosity >= 2)
{
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
TEXT("[NET] MulticastReceiveAgentAudio: LOD culled (dist=%.0f > cull=%.0f)"),
Dist, AudioLODCullDistance);
}
return;
}
// Decode Opus → PCM.
const uint32 MaxDecompressedSize = 16000 * 2; // 1 second of 16kHz 16-bit mono
@ -1295,9 +1353,24 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::MulticastReceiveAgentAudio_Implementa
OpusDecoder->Decode(OpusData.GetData(), OpusData.Num(),
PCMBuffer.GetData(), DecompressedSize);
if (DecompressedSize == 0) return;
if (DecompressedSize == 0)
{
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning,
TEXT("[NET] MulticastReceiveAgentAudio: Opus decode failed (0 bytes output from %d bytes input)"),
OpusData.Num());
return;
}
PCMBuffer.SetNum(DecompressedSize);
if (bDebug)
{
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
TEXT("[NET] MulticastReceiveAgentAudio: decoded %d bytes Opus → %d bytes PCM | bAgentSpeaking=%s | AudioComp playing=%s"),
OpusData.Num(), DecompressedSize,
bAgentSpeaking ? TEXT("true") : TEXT("false"),
(AudioPlaybackComponent && AudioPlaybackComponent->IsPlaying()) ? TEXT("true") : TEXT("false"));
}
// Local playback.
EnqueueAgentAudio(PCMBuffer);
@ -1311,8 +1384,12 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::MulticastReceiveAgentAudio_Implementa
void UPS_AI_ConvAgent_ElevenLabsComponent::MulticastAgentStartedSpeaking_Implementation()
{
if (GetOwnerRole() == ROLE_Authority) return;
bAgentSpeaking = true;
OnAgentStartedSpeaking.Broadcast();
// NOTE: Do NOT set bAgentSpeaking here. This reliable RPC arrives BEFORE
// the first audio chunk (unreliable). Setting bAgentSpeaking=true here would
// cause EnqueueAgentAudio() to skip the first-chunk initialization path
// (pre-buffer, Play(), state reset). Instead, let EnqueueAgentAudio() handle
// the transition naturally when the first audio data actually arrives.
// OnAgentStartedSpeaking is broadcast from EnqueueAgentAudio on all roles.
}
void UPS_AI_ConvAgent_ElevenLabsComponent::MulticastAgentStoppedSpeaking_Implementation()