Fix network saturation, lazy init, body tracking, and mic race condition
- Silence gate: skip sending silent mic audio over network RPCs on clients (~256 Kbits/s saved when not speaking, fixes chaotic teleporting) - Lazy init: defer InteractionComponent mic creation from BeginPlay to TickComponent with IsLocallyControlled guard (fixes "No owning connection" from server-side replicas of remote pawns) - Body tracking: use bNetIsConversing as fallback for IsConnected() on clients where WebSocket doesn't exist - EvaluateBestAgent: null-check NetConversatingPawn before comparison - MicCaptureComponent: use TWeakObjectPtr in AsyncTask lambda to prevent FMRSWRecursiveAccessDetector race on component destruction Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
3c4389a43d
commit
215cb398fd
@ -592,6 +592,27 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::FeedExternalAudio(const TArray<float>
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Network silence gate ────────────────────────────────────────────────
|
||||
// On clients the mic audio is sent via unreliable RPCs (~3200 bytes every
|
||||
// 100ms = ~256 Kbits/s). Sending silence saturates the connection and
|
||||
// starves movement replication, causing chaotic teleporting.
|
||||
// Skip silent chunks on clients only — the server path uses a local
|
||||
// WebSocket and doesn't touch the network, so it keeps the full stream
|
||||
// for proper ElevenLabs VAD (voice-activity detection).
|
||||
if (GetOwnerRole() != ROLE_Authority)
|
||||
{
|
||||
float SumSq = 0.0f;
|
||||
for (float Sample : FloatPCM)
|
||||
{
|
||||
SumSq += Sample * Sample;
|
||||
}
|
||||
const float Rms = FMath::Sqrt(SumSq / FMath::Max(1, FloatPCM.Num()));
|
||||
if (Rms < 0.005f) // ~-46 dBFS — well below any speech level
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
TArray<uint8> PCMBytes = FloatPCMToInt16Bytes(FloatPCM);
|
||||
|
||||
FScopeLock Lock(&MicSendLock);
|
||||
@ -1237,6 +1258,21 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::OnMicrophoneDataCaptured(const TArray
|
||||
return;
|
||||
}
|
||||
|
||||
// Network silence gate — same logic as FeedExternalAudio (see that function for details).
|
||||
if (GetOwnerRole() != ROLE_Authority)
|
||||
{
|
||||
float SumSq = 0.0f;
|
||||
for (float Sample : FloatPCM)
|
||||
{
|
||||
SumSq += Sample * Sample;
|
||||
}
|
||||
const float Rms = FMath::Sqrt(SumSq / FMath::Max(1, FloatPCM.Num()));
|
||||
if (Rms < 0.005f)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert this callback's samples to int16 bytes and accumulate.
|
||||
// WASAPI fires every ~5ms (158 bytes at 16kHz). ElevenLabs needs ≥100ms
|
||||
// (3200 bytes) per chunk for reliable VAD and STT. We hold bytes here
|
||||
|
||||
@ -31,36 +31,10 @@ void UPS_AI_ConvAgent_InteractionComponent::BeginPlay()
|
||||
{
|
||||
Super::BeginPlay();
|
||||
|
||||
AActor* Owner = GetOwner();
|
||||
if (!Owner) return;
|
||||
|
||||
// Only run interaction logic on the locally controlled pawn.
|
||||
// In a listen server, the server-side copy of a remote client's pawn also has
|
||||
// this component, but it must NOT tick, evaluate agents, or create mic components.
|
||||
// The client handles all interaction locally and routes through relay RPCs.
|
||||
// Without this guard, the server-side tick would start conversations using
|
||||
// GetFirstPlayerController() = server's PC, setting NetConversatingPawn to the
|
||||
// wrong player (server instead of client).
|
||||
APawn* OwnerPawn = Cast<APawn>(Owner);
|
||||
if (OwnerPawn && !OwnerPawn->IsLocallyControlled())
|
||||
{
|
||||
SetComponentTickEnabled(false);
|
||||
return;
|
||||
}
|
||||
|
||||
// Create mic capture component on the pawn.
|
||||
MicComponent = Owner->FindComponentByClass<UPS_AI_ConvAgent_MicrophoneCaptureComponent>();
|
||||
if (!MicComponent)
|
||||
{
|
||||
MicComponent = NewObject<UPS_AI_ConvAgent_MicrophoneCaptureComponent>(
|
||||
Owner, TEXT("PS_AI_ConvAgent_InteractionMic"));
|
||||
MicComponent->RegisterComponent();
|
||||
}
|
||||
|
||||
// Bind mic audio callback.
|
||||
MicComponent->OnAudioCaptured.AddUObject(this,
|
||||
&UPS_AI_ConvAgent_InteractionComponent::OnMicAudioCaptured);
|
||||
|
||||
// Mic creation and agent evaluation are deferred to TickComponent.
|
||||
// IsLocallyControlled() may return false in BeginPlay when the PlayerController
|
||||
// hasn't been replicated/possessed yet (common on remote clients at join time).
|
||||
// TickComponent performs the check reliably and does lazy init.
|
||||
}
|
||||
|
||||
void UPS_AI_ConvAgent_InteractionComponent::EndPlay(const EEndPlayReason::Type EndPlayReason)
|
||||
@ -99,13 +73,61 @@ void UPS_AI_ConvAgent_InteractionComponent::EndPlay(const EEndPlayReason::Type E
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Tick — agent selection
|
||||
// Tick — lazy init + agent selection
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
void UPS_AI_ConvAgent_InteractionComponent::TickComponent(float DeltaTime, ELevelTick TickType,
|
||||
FActorComponentTickFunction* ThisTickFunction)
|
||||
{
|
||||
Super::TickComponent(DeltaTime, TickType, ThisTickFunction);
|
||||
|
||||
// ── Lazy init ─────────────────────────────────────────────────────────
|
||||
// Deferred from BeginPlay because IsLocallyControlled() is unreliable
|
||||
// before the PlayerController has been replicated and possessed the pawn
|
||||
// (common issue on remote clients at join time).
|
||||
if (!bInitialized)
|
||||
{
|
||||
APawn* OwnerPawn = Cast<APawn>(GetOwner());
|
||||
if (!OwnerPawn)
|
||||
{
|
||||
// Not on a pawn — disable tick entirely to avoid wasting CPU.
|
||||
PrimaryComponentTick.SetTickFunctionEnable(false);
|
||||
return;
|
||||
}
|
||||
|
||||
// Wait until the pawn has a valid controller (possession completed).
|
||||
if (!OwnerPawn->GetController())
|
||||
{
|
||||
return; // Try again next tick.
|
||||
}
|
||||
|
||||
if (!OwnerPawn->IsLocallyControlled())
|
||||
{
|
||||
// This is a server-side replica of a remote player's pawn.
|
||||
// InteractionComponent only makes sense on the locally controlled pawn.
|
||||
// Disable tick to stop wasting CPU and prevent "No owning connection" errors
|
||||
// (calling Server RPCs from a non-owning context).
|
||||
PrimaryComponentTick.SetTickFunctionEnable(false);
|
||||
return;
|
||||
}
|
||||
|
||||
// ── This is the locally controlled pawn — create the mic component ──
|
||||
MicComponent = NewObject<UPS_AI_ConvAgent_MicrophoneCaptureComponent>(
|
||||
GetOwner(), TEXT("PS_AI_ConvAgent_Mic_Interaction"));
|
||||
MicComponent->RegisterComponent();
|
||||
MicComponent->OnAudioCaptured.AddUObject(this,
|
||||
&UPS_AI_ConvAgent_InteractionComponent::OnMicAudioCaptured);
|
||||
|
||||
bInitialized = true;
|
||||
|
||||
if (bDebug)
|
||||
{
|
||||
UE_LOG(LogPS_AI_ConvAgent_Select, Log,
|
||||
TEXT("InteractionComponent initialized on locally controlled pawn: %s"),
|
||||
*OwnerPawn->GetName());
|
||||
}
|
||||
}
|
||||
|
||||
// ── Agent selection (runs every tick after init) ──────────────────────
|
||||
UPS_AI_ConvAgent_ElevenLabsComponent* BestAgent = EvaluateBestAgent();
|
||||
|
||||
// Check if selection changed.
|
||||
@ -157,7 +179,11 @@ UPS_AI_ConvAgent_ElevenLabsComponent* UPS_AI_ConvAgent_InteractionComponent::Eva
|
||||
// Network: skip agents that are in conversation with a different player.
|
||||
// Use NetConversatingPawn (replicated to all) instead of NetConversatingPlayer
|
||||
// (NULL on remote clients because APlayerController has bOnlyRelevantToOwner=true).
|
||||
if (Agent->bNetIsConversing && Agent->NetConversatingPawn != LocalPawn)
|
||||
// Null-check NetConversatingPawn: it may not have replicated yet when
|
||||
// bNetIsConversing arrives first (OnRep ordering is not guaranteed).
|
||||
if (Agent->bNetIsConversing
|
||||
&& Agent->NetConversatingPawn
|
||||
&& Agent->NetConversatingPawn != LocalPawn)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
@ -467,7 +493,9 @@ void UPS_AI_ConvAgent_InteractionComponent::AttachPostureTarget(
|
||||
// so HandleAgentResponseStarted won't fire again until the player speaks.
|
||||
// On first interaction the agent isn't connected yet, so we start with
|
||||
// eyes+head only and let HandleAgentResponseStarted enable body later.
|
||||
Posture->bEnableBodyTracking = AgentPtr->IsConnected();
|
||||
// Network: on clients IsConnected() is always false (no local WebSocket),
|
||||
// so also check the replicated bNetIsConversing flag.
|
||||
Posture->bEnableBodyTracking = AgentPtr->IsConnected() || AgentPtr->bNetIsConversing;
|
||||
|
||||
if (bDebug)
|
||||
{
|
||||
|
||||
@ -115,13 +115,22 @@ void UPS_AI_ConvAgent_MicrophoneCaptureComponent::OnAudioGenerate(
|
||||
// UE's FMulticastDelegate with AddUObject uses weak object pointer checks that
|
||||
// are not thread-safe — broadcasting from the WASAPI thread causes the invocation
|
||||
// to be silently skipped. The game thread dispatch adds ~8ms latency but is required.
|
||||
//
|
||||
// Capture a TWeakObjectPtr instead of raw `this` to prevent accessing a
|
||||
// destroyed component. The WASAPI callback can fire after EndPlay/GC has
|
||||
// collected the component — checking the weak pointer on the game thread
|
||||
// avoids the "trivially relocated" FMRSWRecursiveAccessDetector ensure.
|
||||
if (bCapturing)
|
||||
{
|
||||
AsyncTask(ENamedThreads::GameThread, [this, Captured = MoveTemp(Resampled)]()
|
||||
TWeakObjectPtr<UPS_AI_ConvAgent_MicrophoneCaptureComponent> WeakThis(this);
|
||||
AsyncTask(ENamedThreads::GameThread, [WeakThis, Captured = MoveTemp(Resampled)]()
|
||||
{
|
||||
if (bCapturing)
|
||||
if (UPS_AI_ConvAgent_MicrophoneCaptureComponent* Self = WeakThis.Get())
|
||||
{
|
||||
OnAudioCaptured.Broadcast(Captured);
|
||||
if (Self->bCapturing)
|
||||
{
|
||||
Self->OnAudioCaptured.Broadcast(Captured);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@ -263,10 +263,15 @@ private:
|
||||
/** Currently selected agent (weak pointer for safety). */
|
||||
TWeakObjectPtr<UPS_AI_ConvAgent_ElevenLabsComponent> SelectedAgent;
|
||||
|
||||
/** Microphone capture component (created on the pawn in BeginPlay). */
|
||||
/** Microphone capture component (created lazily in TickComponent). */
|
||||
UPROPERTY()
|
||||
UPS_AI_ConvAgent_MicrophoneCaptureComponent* MicComponent = nullptr;
|
||||
|
||||
/** True once the one-time lazy init in TickComponent has completed.
|
||||
* Deferred from BeginPlay because IsLocallyControlled() may return false
|
||||
* before the PlayerController has been replicated/possessed. */
|
||||
bool bInitialized = false;
|
||||
|
||||
// ── Posture timers ───────────────────────────────────────────────────────
|
||||
|
||||
FTimerHandle PostureAttachTimerHandle;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user