Fix network saturation, lazy init, body tracking, and mic race condition

- Silence gate: skip sending silent mic audio over network RPCs on clients
  (~256 Kbits/s saved when not speaking, fixes chaotic teleporting)
- Lazy init: defer InteractionComponent mic creation from BeginPlay to
  TickComponent with IsLocallyControlled guard (fixes "No owning connection"
  from server-side replicas of remote pawns)
- Body tracking: use bNetIsConversing as fallback for IsConnected() on
  clients where WebSocket doesn't exist
- EvaluateBestAgent: null-check NetConversatingPawn before comparison
- MicCaptureComponent: use TWeakObjectPtr in AsyncTask lambda to prevent
  FMRSWRecursiveAccessDetector race on component destruction

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
j.foucher 2026-03-02 17:23:26 +01:00
parent 3c4389a43d
commit 215cb398fd
4 changed files with 115 additions and 37 deletions

View File

@ -592,6 +592,27 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::FeedExternalAudio(const TArray<float>
return;
}
// ── Network silence gate ────────────────────────────────────────────────
// On clients the mic audio is sent via unreliable RPCs (~3200 bytes every
// 100ms = ~256 Kbits/s). Sending silence saturates the connection and
// starves movement replication, causing chaotic teleporting.
// Skip silent chunks on clients only — the server path uses a local
// WebSocket and doesn't touch the network, so it keeps the full stream
// for proper ElevenLabs VAD (voice-activity detection).
if (GetOwnerRole() != ROLE_Authority)
{
float SumSq = 0.0f;
for (float Sample : FloatPCM)
{
SumSq += Sample * Sample;
}
const float Rms = FMath::Sqrt(SumSq / FMath::Max(1, FloatPCM.Num()));
if (Rms < 0.005f) // ~-46 dBFS — well below any speech level
{
return;
}
}
TArray<uint8> PCMBytes = FloatPCMToInt16Bytes(FloatPCM);
FScopeLock Lock(&MicSendLock);
@ -1237,6 +1258,21 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::OnMicrophoneDataCaptured(const TArray
return;
}
// Network silence gate — same logic as FeedExternalAudio (see that function for details).
if (GetOwnerRole() != ROLE_Authority)
{
float SumSq = 0.0f;
for (float Sample : FloatPCM)
{
SumSq += Sample * Sample;
}
const float Rms = FMath::Sqrt(SumSq / FMath::Max(1, FloatPCM.Num()));
if (Rms < 0.005f)
{
return;
}
}
// Convert this callback's samples to int16 bytes and accumulate.
// WASAPI fires every ~5ms (158 bytes at 16kHz). ElevenLabs needs ≥100ms
// (3200 bytes) per chunk for reliable VAD and STT. We hold bytes here

View File

@ -31,36 +31,10 @@ void UPS_AI_ConvAgent_InteractionComponent::BeginPlay()
{
Super::BeginPlay();
AActor* Owner = GetOwner();
if (!Owner) return;
// Only run interaction logic on the locally controlled pawn.
// In a listen server, the server-side copy of a remote client's pawn also has
// this component, but it must NOT tick, evaluate agents, or create mic components.
// The client handles all interaction locally and routes through relay RPCs.
// Without this guard, the server-side tick would start conversations using
// GetFirstPlayerController() = server's PC, setting NetConversatingPawn to the
// wrong player (server instead of client).
APawn* OwnerPawn = Cast<APawn>(Owner);
if (OwnerPawn && !OwnerPawn->IsLocallyControlled())
{
SetComponentTickEnabled(false);
return;
}
// Create mic capture component on the pawn.
MicComponent = Owner->FindComponentByClass<UPS_AI_ConvAgent_MicrophoneCaptureComponent>();
if (!MicComponent)
{
MicComponent = NewObject<UPS_AI_ConvAgent_MicrophoneCaptureComponent>(
Owner, TEXT("PS_AI_ConvAgent_InteractionMic"));
MicComponent->RegisterComponent();
}
// Bind mic audio callback.
MicComponent->OnAudioCaptured.AddUObject(this,
&UPS_AI_ConvAgent_InteractionComponent::OnMicAudioCaptured);
// Mic creation and agent evaluation are deferred to TickComponent.
// IsLocallyControlled() may return false in BeginPlay when the PlayerController
// hasn't been replicated/possessed yet (common on remote clients at join time).
// TickComponent performs the check reliably and does lazy init.
}
void UPS_AI_ConvAgent_InteractionComponent::EndPlay(const EEndPlayReason::Type EndPlayReason)
@ -99,13 +73,61 @@ void UPS_AI_ConvAgent_InteractionComponent::EndPlay(const EEndPlayReason::Type E
}
// ─────────────────────────────────────────────────────────────────────────────
// Tick — agent selection
// Tick — lazy init + agent selection
// ─────────────────────────────────────────────────────────────────────────────
void UPS_AI_ConvAgent_InteractionComponent::TickComponent(float DeltaTime, ELevelTick TickType,
FActorComponentTickFunction* ThisTickFunction)
{
Super::TickComponent(DeltaTime, TickType, ThisTickFunction);
// ── Lazy init ─────────────────────────────────────────────────────────
// Deferred from BeginPlay because IsLocallyControlled() is unreliable
// before the PlayerController has been replicated and possessed the pawn
// (common issue on remote clients at join time).
if (!bInitialized)
{
APawn* OwnerPawn = Cast<APawn>(GetOwner());
if (!OwnerPawn)
{
// Not on a pawn — disable tick entirely to avoid wasting CPU.
PrimaryComponentTick.SetTickFunctionEnable(false);
return;
}
// Wait until the pawn has a valid controller (possession completed).
if (!OwnerPawn->GetController())
{
return; // Try again next tick.
}
if (!OwnerPawn->IsLocallyControlled())
{
// This is a server-side replica of a remote player's pawn.
// InteractionComponent only makes sense on the locally controlled pawn.
// Disable tick to stop wasting CPU and prevent "No owning connection" errors
// (calling Server RPCs from a non-owning context).
PrimaryComponentTick.SetTickFunctionEnable(false);
return;
}
// ── This is the locally controlled pawn — create the mic component ──
MicComponent = NewObject<UPS_AI_ConvAgent_MicrophoneCaptureComponent>(
GetOwner(), TEXT("PS_AI_ConvAgent_Mic_Interaction"));
MicComponent->RegisterComponent();
MicComponent->OnAudioCaptured.AddUObject(this,
&UPS_AI_ConvAgent_InteractionComponent::OnMicAudioCaptured);
bInitialized = true;
if (bDebug)
{
UE_LOG(LogPS_AI_ConvAgent_Select, Log,
TEXT("InteractionComponent initialized on locally controlled pawn: %s"),
*OwnerPawn->GetName());
}
}
// ── Agent selection (runs every tick after init) ──────────────────────
UPS_AI_ConvAgent_ElevenLabsComponent* BestAgent = EvaluateBestAgent();
// Check if selection changed.
@ -157,7 +179,11 @@ UPS_AI_ConvAgent_ElevenLabsComponent* UPS_AI_ConvAgent_InteractionComponent::Eva
// Network: skip agents that are in conversation with a different player.
// Use NetConversatingPawn (replicated to all) instead of NetConversatingPlayer
// (NULL on remote clients because APlayerController has bOnlyRelevantToOwner=true).
if (Agent->bNetIsConversing && Agent->NetConversatingPawn != LocalPawn)
// Null-check NetConversatingPawn: it may not have replicated yet when
// bNetIsConversing arrives first (OnRep ordering is not guaranteed).
if (Agent->bNetIsConversing
&& Agent->NetConversatingPawn
&& Agent->NetConversatingPawn != LocalPawn)
{
continue;
}
@ -467,7 +493,9 @@ void UPS_AI_ConvAgent_InteractionComponent::AttachPostureTarget(
// so HandleAgentResponseStarted won't fire again until the player speaks.
// On first interaction the agent isn't connected yet, so we start with
// eyes+head only and let HandleAgentResponseStarted enable body later.
Posture->bEnableBodyTracking = AgentPtr->IsConnected();
// Network: on clients IsConnected() is always false (no local WebSocket),
// so also check the replicated bNetIsConversing flag.
Posture->bEnableBodyTracking = AgentPtr->IsConnected() || AgentPtr->bNetIsConversing;
if (bDebug)
{

View File

@ -115,13 +115,22 @@ void UPS_AI_ConvAgent_MicrophoneCaptureComponent::OnAudioGenerate(
// UE's FMulticastDelegate with AddUObject uses weak object pointer checks that
// are not thread-safe — broadcasting from the WASAPI thread causes the invocation
// to be silently skipped. The game thread dispatch adds ~8ms latency but is required.
//
// Capture a TWeakObjectPtr instead of raw `this` to prevent accessing a
// destroyed component. The WASAPI callback can fire after EndPlay/GC has
// collected the component — checking the weak pointer on the game thread
// avoids the "trivially relocated" FMRSWRecursiveAccessDetector ensure.
if (bCapturing)
{
AsyncTask(ENamedThreads::GameThread, [this, Captured = MoveTemp(Resampled)]()
TWeakObjectPtr<UPS_AI_ConvAgent_MicrophoneCaptureComponent> WeakThis(this);
AsyncTask(ENamedThreads::GameThread, [WeakThis, Captured = MoveTemp(Resampled)]()
{
if (bCapturing)
if (UPS_AI_ConvAgent_MicrophoneCaptureComponent* Self = WeakThis.Get())
{
OnAudioCaptured.Broadcast(Captured);
if (Self->bCapturing)
{
Self->OnAudioCaptured.Broadcast(Captured);
}
}
});
}

View File

@ -263,10 +263,15 @@ private:
/** Currently selected agent (weak pointer for safety). */
TWeakObjectPtr<UPS_AI_ConvAgent_ElevenLabsComponent> SelectedAgent;
/** Microphone capture component (created on the pawn in BeginPlay). */
/** Microphone capture component (created lazily in TickComponent). */
UPROPERTY()
UPS_AI_ConvAgent_MicrophoneCaptureComponent* MicComponent = nullptr;
/** True once the one-time lazy init in TickComponent has completed.
* Deferred from BeginPlay because IsLocallyControlled() may return false
* before the PlayerController has been replicated/possessed. */
bool bInitialized = false;
// ── Posture timers ───────────────────────────────────────────────────────
FTimerHandle PostureAttachTimerHandle;