Fix network replication: head/eyes tracking, facial expressions, lip sync and audio on remote clients
- OnRep_ConversationState now sets PostureComponent TargetActor from replicated NetConversatingPlayer so remote clients see head/eyes tracking - Activate FacialExpressionComponent and LipSyncComponent on remote clients (OnAgentConnected never fires on clients since WebSocket is server-only) - Fix audio race condition: MulticastAgentStartedSpeaking no longer sets bAgentSpeaking prematurely, letting EnqueueAgentAudio handle the full first-chunk initialization (pre-buffer, Play(), state reset) - Add diagnostic logging to MulticastReceiveAgentAudio for silent failures (OpusDecoder invalid, LOD culling, decode failure) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
9ee7960855
commit
11255db576
@ -4,6 +4,8 @@
|
||||
#include "PS_AI_ConvAgent_AgentConfig_ElevenLabs.h"
|
||||
#include "PS_AI_ConvAgent_MicrophoneCaptureComponent.h"
|
||||
#include "PS_AI_ConvAgent_PostureComponent.h"
|
||||
#include "PS_AI_ConvAgent_FacialExpressionComponent.h"
|
||||
#include "PS_AI_ConvAgent_LipSyncComponent.h"
|
||||
#include "PS_AI_ConvAgent_InteractionSubsystem.h"
|
||||
#include "PS_AI_ConvAgent.h"
|
||||
|
||||
@ -1171,6 +1173,46 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::GetLifetimeReplicatedProps(
|
||||
|
||||
void UPS_AI_ConvAgent_ElevenLabsComponent::OnRep_ConversationState()
|
||||
{
|
||||
AActor* Owner = GetOwner();
|
||||
|
||||
if (Owner)
|
||||
{
|
||||
// Update posture target on all clients so the NPC head/eyes track the
|
||||
// conversating player. TargetActor is normally set by InteractionComponent
|
||||
// on the local pawn, but remote clients never run that code path.
|
||||
if (UPS_AI_ConvAgent_PostureComponent* Posture = Owner->FindComponentByClass<UPS_AI_ConvAgent_PostureComponent>())
|
||||
{
|
||||
if (bNetIsConversing && NetConversatingPlayer)
|
||||
{
|
||||
if (APawn* PlayerPawn = NetConversatingPlayer->GetPawn())
|
||||
{
|
||||
Posture->TargetActor = PlayerPawn;
|
||||
Posture->ResetBodyTarget();
|
||||
Posture->bEnableBodyTracking = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Posture->TargetActor = nullptr;
|
||||
Posture->bEnableBodyTracking = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Activate/deactivate facial expressions and lip sync for remote clients.
|
||||
// On the server, this is handled by OnAgentConnected/OnAgentDisconnected,
|
||||
// but those events never fire on clients (no WebSocket connection).
|
||||
if (UPS_AI_ConvAgent_FacialExpressionComponent* FacialExpr =
|
||||
Owner->FindComponentByClass<UPS_AI_ConvAgent_FacialExpressionComponent>())
|
||||
{
|
||||
FacialExpr->bActive = bNetIsConversing;
|
||||
}
|
||||
if (UPS_AI_ConvAgent_LipSyncComponent* LipSync =
|
||||
Owner->FindComponentByClass<UPS_AI_ConvAgent_LipSyncComponent>())
|
||||
{
|
||||
LipSync->bActive = bNetIsConversing;
|
||||
}
|
||||
}
|
||||
|
||||
if (!bNetIsConversing)
|
||||
{
|
||||
// Conversation ended on server — clean up local playback.
|
||||
@ -1280,12 +1322,28 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::MulticastReceiveAgentAudio_Implementa
|
||||
{
|
||||
// Server already handled playback in HandleAudioReceived.
|
||||
if (GetOwnerRole() == ROLE_Authority) return;
|
||||
if (!OpusDecoder.IsValid()) return;
|
||||
|
||||
if (!OpusDecoder.IsValid())
|
||||
{
|
||||
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning,
|
||||
TEXT("[NET] MulticastReceiveAgentAudio: OpusDecoder is INVALID — audio dropped. FVoiceModule available: %s"),
|
||||
FVoiceModule::IsAvailable() ? TEXT("YES") : TEXT("NO"));
|
||||
return;
|
||||
}
|
||||
|
||||
// LOD: skip audio if too far (unless this client is the speaker).
|
||||
const float Dist = GetDistanceToLocalPlayer();
|
||||
const bool bIsSpeaker = IsLocalPlayerConversating();
|
||||
if (!bIsSpeaker && AudioLODCullDistance > 0.f && Dist > AudioLODCullDistance) return;
|
||||
if (!bIsSpeaker && AudioLODCullDistance > 0.f && Dist > AudioLODCullDistance)
|
||||
{
|
||||
if (bDebug && DebugVerbosity >= 2)
|
||||
{
|
||||
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
|
||||
TEXT("[NET] MulticastReceiveAgentAudio: LOD culled (dist=%.0f > cull=%.0f)"),
|
||||
Dist, AudioLODCullDistance);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Decode Opus → PCM.
|
||||
const uint32 MaxDecompressedSize = 16000 * 2; // 1 second of 16kHz 16-bit mono
|
||||
@ -1295,9 +1353,24 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::MulticastReceiveAgentAudio_Implementa
|
||||
OpusDecoder->Decode(OpusData.GetData(), OpusData.Num(),
|
||||
PCMBuffer.GetData(), DecompressedSize);
|
||||
|
||||
if (DecompressedSize == 0) return;
|
||||
if (DecompressedSize == 0)
|
||||
{
|
||||
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning,
|
||||
TEXT("[NET] MulticastReceiveAgentAudio: Opus decode failed (0 bytes output from %d bytes input)"),
|
||||
OpusData.Num());
|
||||
return;
|
||||
}
|
||||
PCMBuffer.SetNum(DecompressedSize);
|
||||
|
||||
if (bDebug)
|
||||
{
|
||||
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
|
||||
TEXT("[NET] MulticastReceiveAgentAudio: decoded %d bytes Opus → %d bytes PCM | bAgentSpeaking=%s | AudioComp playing=%s"),
|
||||
OpusData.Num(), DecompressedSize,
|
||||
bAgentSpeaking ? TEXT("true") : TEXT("false"),
|
||||
(AudioPlaybackComponent && AudioPlaybackComponent->IsPlaying()) ? TEXT("true") : TEXT("false"));
|
||||
}
|
||||
|
||||
// Local playback.
|
||||
EnqueueAgentAudio(PCMBuffer);
|
||||
|
||||
@ -1311,8 +1384,12 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::MulticastReceiveAgentAudio_Implementa
|
||||
void UPS_AI_ConvAgent_ElevenLabsComponent::MulticastAgentStartedSpeaking_Implementation()
|
||||
{
|
||||
if (GetOwnerRole() == ROLE_Authority) return;
|
||||
bAgentSpeaking = true;
|
||||
OnAgentStartedSpeaking.Broadcast();
|
||||
// NOTE: Do NOT set bAgentSpeaking here. This reliable RPC arrives BEFORE
|
||||
// the first audio chunk (unreliable). Setting bAgentSpeaking=true here would
|
||||
// cause EnqueueAgentAudio() to skip the first-chunk initialization path
|
||||
// (pre-buffer, Play(), state reset). Instead, let EnqueueAgentAudio() handle
|
||||
// the transition naturally when the first audio data actually arrives.
|
||||
// OnAgentStartedSpeaking is broadcast from EnqueueAgentAudio on all roles.
|
||||
}
|
||||
|
||||
void UPS_AI_ConvAgent_ElevenLabsComponent::MulticastAgentStoppedSpeaking_Implementation()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user