Fix network replication: head/eyes tracking, facial expressions, lip sync and audio on remote clients

- OnRep_ConversationState now sets PostureComponent TargetActor from replicated NetConversatingPlayer so remote clients see head/eyes tracking - Activate FacialExpressionComponent and LipSyncComponent on remote clients (OnAgentConnected never fires on clients since WebSocket is server-only) - Fix audio race condition: MulticastAgentStartedSpeaking no longer sets bAgentSpeaking prematurely, letting EnqueueAgentAudio handle the full first-chunk initialization (pre-buffer, Play(), state reset) - Add diagnostic logging to MulticastReceiveAgentAudio for silent failures (OpusDecoder invalid, LOD culling, decode failure) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 14:17:49 +01:00
parent 9ee7960855
commit 11255db576
1 changed files with 82 additions and 5 deletions
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp
@@ -4,6 +4,8 @@
 #include "PS_AI_ConvAgent_AgentConfig_ElevenLabs.h"
 #include "PS_AI_ConvAgent_MicrophoneCaptureComponent.h"
 #include "PS_AI_ConvAgent_PostureComponent.h"
+#include "PS_AI_ConvAgent_FacialExpressionComponent.h"
+#include "PS_AI_ConvAgent_LipSyncComponent.h"
 #include "PS_AI_ConvAgent_InteractionSubsystem.h"
 #include "PS_AI_ConvAgent.h"

@@ -1171,6 +1173,46 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::GetLifetimeReplicatedProps(

 void UPS_AI_ConvAgent_ElevenLabsComponent::OnRep_ConversationState()
 {
+	AActor* Owner = GetOwner();
+
+	if (Owner)
+	{
+		// Update posture target on all clients so the NPC head/eyes track the
+		// conversating player. TargetActor is normally set by InteractionComponent
+		// on the local pawn, but remote clients never run that code path.
+		if (UPS_AI_ConvAgent_PostureComponent* Posture = Owner->FindComponentByClass<UPS_AI_ConvAgent_PostureComponent>())
+		{
+			if (bNetIsConversing && NetConversatingPlayer)
+			{
+				if (APawn* PlayerPawn = NetConversatingPlayer->GetPawn())
+				{
+					Posture->TargetActor = PlayerPawn;
+					Posture->ResetBodyTarget();
+					Posture->bEnableBodyTracking = true;
+				}
+			}
+			else
+			{
+				Posture->TargetActor = nullptr;
+				Posture->bEnableBodyTracking = false;
+			}
+		}
+
+		// Activate/deactivate facial expressions and lip sync for remote clients.
+		// On the server, this is handled by OnAgentConnected/OnAgentDisconnected,
+		// but those events never fire on clients (no WebSocket connection).
+		if (UPS_AI_ConvAgent_FacialExpressionComponent* FacialExpr =
+			Owner->FindComponentByClass<UPS_AI_ConvAgent_FacialExpressionComponent>())
+		{
+			FacialExpr->bActive = bNetIsConversing;
+		}
+		if (UPS_AI_ConvAgent_LipSyncComponent* LipSync =
+			Owner->FindComponentByClass<UPS_AI_ConvAgent_LipSyncComponent>())
+		{
+			LipSync->bActive = bNetIsConversing;
+		}
+	}
+
 	if (!bNetIsConversing)
 	{
 		// Conversation ended on server — clean up local playback.
@@ -1280,12 +1322,28 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::MulticastReceiveAgentAudio_Implementa
 {
 	// Server already handled playback in HandleAudioReceived.
 	if (GetOwnerRole() == ROLE_Authority) return;
-	if (!OpusDecoder.IsValid()) return;
+
+	if (!OpusDecoder.IsValid())
+	{
+		UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning,
+			TEXT("[NET] MulticastReceiveAgentAudio: OpusDecoder is INVALID — audio dropped. FVoiceModule available: %s"),
+			FVoiceModule::IsAvailable() ? TEXT("YES") : TEXT("NO"));
+		return;
+	}

 	// LOD: skip audio if too far (unless this client is the speaker).
 	const float Dist = GetDistanceToLocalPlayer();
 	const bool bIsSpeaker = IsLocalPlayerConversating();
-	if (!bIsSpeaker && AudioLODCullDistance > 0.f && Dist > AudioLODCullDistance) return;
+	if (!bIsSpeaker && AudioLODCullDistance > 0.f && Dist > AudioLODCullDistance)
+	{
+		if (bDebug && DebugVerbosity >= 2)
+		{
+			UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
+				TEXT("[NET] MulticastReceiveAgentAudio: LOD culled (dist=%.0f > cull=%.0f)"),
+				Dist, AudioLODCullDistance);
+		}
+		return;
+	}

 	// Decode Opus → PCM.
 	const uint32 MaxDecompressedSize = 16000 * 2; // 1 second of 16kHz 16-bit mono
@@ -1295,9 +1353,24 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::MulticastReceiveAgentAudio_Implementa
 	OpusDecoder->Decode(OpusData.GetData(), OpusData.Num(),
 		PCMBuffer.GetData(), DecompressedSize);

-	if (DecompressedSize == 0) return;
+	if (DecompressedSize == 0)
+	{
+		UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning,
+			TEXT("[NET] MulticastReceiveAgentAudio: Opus decode failed (0 bytes output from %d bytes input)"),
+			OpusData.Num());
+		return;
+	}
 	PCMBuffer.SetNum(DecompressedSize);

+	if (bDebug)
+	{
+		UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
+			TEXT("[NET] MulticastReceiveAgentAudio: decoded %d bytes Opus → %d bytes PCM | bAgentSpeaking=%s | AudioComp playing=%s"),
+			OpusData.Num(), DecompressedSize,
+			bAgentSpeaking ? TEXT("true") : TEXT("false"),
+			(AudioPlaybackComponent && AudioPlaybackComponent->IsPlaying()) ? TEXT("true") : TEXT("false"));
+	}
+
 	// Local playback.
 	EnqueueAgentAudio(PCMBuffer);

@@ -1311,8 +1384,12 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::MulticastReceiveAgentAudio_Implementa
 void UPS_AI_ConvAgent_ElevenLabsComponent::MulticastAgentStartedSpeaking_Implementation()
 {
 	if (GetOwnerRole() == ROLE_Authority) return;
-	bAgentSpeaking = true;
-	OnAgentStartedSpeaking.Broadcast();
+	// NOTE: Do NOT set bAgentSpeaking here. This reliable RPC arrives BEFORE
+	// the first audio chunk (unreliable). Setting bAgentSpeaking=true here would
+	// cause EnqueueAgentAudio() to skip the first-chunk initialization path
+	// (pre-buffer, Play(), state reset). Instead, let EnqueueAgentAudio() handle
+	// the transition naturally when the first audio data actually arrives.
+	// OnAgentStartedSpeaking is broadcast from EnqueueAgentAudio on all roles.
 }

 void UPS_AI_ConvAgent_ElevenLabsComponent::MulticastAgentStoppedSpeaking_Implementation()