Fix multi-player regressions: audio/text drops, thread safety

1. StartConversationWithSelectedAgent: remove early return when WebSocket
   is already connected (persistent mode). Always call ServerJoinConversation
   so the pawn is added to NetConnectedPawns and bNetIsConversing is set.

2. ServerSendMicAudioFromPlayer: bypass speaker arbitration in standalone
   mode (<=1 connected pawn). Send audio directly to avoid silent drops
   caused by pawn not being in NetConnectedPawns array. Add warning logs
   for multi-player drops to aid debugging.

3. OnMicrophoneDataCaptured: restore direct WebSocketProxy->SendAudioChunk
   on the server path. This callback runs on the WASAPI audio thread —
   accessing game-thread state (NetConnectedPawns, LastSpeakTime) was
   causing undefined behavior. Internal mic is always the local player,
   no speaker arbitration needed.

4. StopListening flush: send directly to WebSocket (active speaker already
   established, no arbitration needed for the tail of the current turn).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
j.foucher 2026-03-05 15:33:43 +01:00
parent ca10689bb6
commit c922fd304c
2 changed files with 59 additions and 30 deletions

View File

@ -543,14 +543,13 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::StopListening()
{ {
if (GetOwnerRole() == ROLE_Authority) if (GetOwnerRole() == ROLE_Authority)
{ {
// Route through speaker arbitration so the correct player's // Flush the final chunk directly to WebSocket.
// final chunk is attributed properly in multi-player. // This is the tail of the current turn — the active speaker
APawn* LocalPawn = nullptr; // is already established, no arbitration needed.
if (const APlayerController* PC = GetWorld()->GetFirstPlayerController()) if (WebSocketProxy && IsConnected())
{ {
LocalPawn = PC->GetPawn(); WebSocketProxy->SendAudioChunk(MicAccumulationBuffer);
} }
ServerSendMicAudioFromPlayer(LocalPawn, MicAccumulationBuffer);
} }
else else
{ {
@ -1523,16 +1522,14 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::OnMicrophoneDataCaptured(const TArray
{ {
if (GetOwnerRole() == ROLE_Authority) if (GetOwnerRole() == ROLE_Authority)
{ {
// Route through speaker arbitration (local mic = local player). // Internal mic = local player on the server. Send directly to WebSocket.
APawn* LocalPawn = nullptr; // This callback runs on the WASAPI audio thread — accessing game-thread
if (UWorld* World = GetWorld()) // state (NetConnectedPawns, LastSpeakTime, etc.) is NOT safe here.
// Speaker arbitration is only needed for multi-player external mic.
if (WebSocketProxy && WebSocketProxy->IsConnected())
{ {
if (APlayerController* PC = World->GetFirstPlayerController()) WebSocketProxy->SendAudioChunk(MicAccumulationBuffer);
{
LocalPawn = PC->GetPawn();
}
} }
ServerSendMicAudioFromPlayer(LocalPawn, MicAccumulationBuffer);
} }
else else
{ {
@ -1872,8 +1869,37 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::ServerSendMicAudio_Implementation(
void UPS_AI_ConvAgent_ElevenLabsComponent::ServerSendMicAudioFromPlayer( void UPS_AI_ConvAgent_ElevenLabsComponent::ServerSendMicAudioFromPlayer(
APawn* SpeakerPawn, const TArray<uint8>& PCMBytes) APawn* SpeakerPawn, const TArray<uint8>& PCMBytes)
{ {
if (!SpeakerPawn || !WebSocketProxy || !WebSocketProxy->IsConnected()) return; if (!WebSocketProxy || !WebSocketProxy->IsConnected()) return;
if (!NetConnectedPawns.Contains(SpeakerPawn)) return;
// Standalone / single-player: bypass speaker arbitration entirely.
// There's only one player — no need for Contains check or speaker switching.
if (NetConnectedPawns.Num() <= 1)
{
WebSocketProxy->SendAudioChunk(PCMBytes);
// Keep speaker state in sync for debug display.
if (SpeakerPawn && NetActiveSpeakerPawn != SpeakerPawn)
{
SetActiveSpeaker(SpeakerPawn);
}
return;
}
// Multi-player path: full speaker arbitration.
if (!SpeakerPawn)
{
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning,
TEXT("ServerSendMicAudioFromPlayer: null SpeakerPawn — audio dropped."));
return;
}
if (!NetConnectedPawns.Contains(SpeakerPawn))
{
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning,
TEXT("ServerSendMicAudioFromPlayer: pawn %s not in NetConnectedPawns (%d players) — audio dropped."),
*SpeakerPawn->GetName(), NetConnectedPawns.Num());
return;
}
const double Now = FPlatformTime::Seconds(); const double Now = FPlatformTime::Seconds();
LastSpeakTime.FindOrAdd(SpeakerPawn) = Now; LastSpeakTime.FindOrAdd(SpeakerPawn) = Now;

View File

@ -498,29 +498,32 @@ void UPS_AI_ConvAgent_InteractionComponent::StartConversationWithSelectedAgent()
return; return;
} }
if (Agent->IsConnected() || Agent->bNetIsConversing)
{
if (bDebug)
{
UE_LOG(LogPS_AI_ConvAgent_Select, Log, TEXT("StartConversationWithSelectedAgent: agent already connected/conversing."));
}
return;
}
if (bDebug) if (bDebug)
{ {
UE_LOG(LogPS_AI_ConvAgent_Select, Log, TEXT("StartConversationWithSelectedAgent: starting conversation with %s"), UE_LOG(LogPS_AI_ConvAgent_Select, Log, TEXT("StartConversationWithSelectedAgent: %s (connected=%s conversing=%s)"),
Agent->GetOwner() ? *Agent->GetOwner()->GetName() : TEXT("(null)")); Agent->GetOwner() ? *Agent->GetOwner()->GetName() : TEXT("(null)"),
Agent->IsConnected() ? TEXT("true") : TEXT("false"),
Agent->bNetIsConversing ? TEXT("true") : TEXT("false"));
} }
// Route through relay on clients (can't call Server RPCs on NPC actors). // Always call Join (idempotent) — even if the WebSocket is already connected
// (persistent session mode), we need to add the pawn to NetConnectedPawns
// and set bNetIsConversing to true.
if (GetOwnerRole() == ROLE_Authority || (GetWorld() && GetWorld()->GetNetMode() == NM_Standalone)) if (GetOwnerRole() == ROLE_Authority || (GetWorld() && GetWorld()->GetNetMode() == NM_Standalone))
{ {
Agent->StartConversation(); APlayerController* PC = nullptr;
if (APawn* Pawn = Cast<APawn>(GetOwner()))
{
PC = Cast<APlayerController>(Pawn->GetController());
}
if (PC)
{
Agent->ServerJoinConversation_Implementation(PC);
}
} }
else else
{ {
ServerRelayStartConversation(Agent->GetOwner()); ServerRelayJoinConversation(Agent->GetOwner());
} }
// Ensure mic is capturing so we can route audio to the agent. // Ensure mic is capturing so we can route audio to the agent.