Fix multi-player regressions: audio/text drops, thread safety

1. StartConversationWithSelectedAgent: remove early return when WebSocket is already connected (persistent mode). Always call ServerJoinConversation so the pawn is added to NetConnectedPawns and bNetIsConversing is set. 2. ServerSendMicAudioFromPlayer: bypass speaker arbitration in standalone mode (<=1 connected pawn). Send audio directly to avoid silent drops caused by pawn not being in NetConnectedPawns array. Add warning logs for multi-player drops to aid debugging. 3. OnMicrophoneDataCaptured: restore direct WebSocketProxy->SendAudioChunk on the server path. This callback runs on the WASAPI audio thread — accessing game-thread state (NetConnectedPawns, LastSpeakTime) was causing undefined behavior. Internal mic is always the local player, no speaker arbitration needed. 4. StopListening flush: send directly to WebSocket (active speaker already established, no arbitration needed for the tail of the current turn). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 15:33:43 +01:00 · 2026-03-05 15:33:43 +01:00 · c922fd304c
commit c922fd304c
parent ca10689bb6
2 changed files with 59 additions and 30 deletions
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp
@ -543,14 +543,13 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::StopListening()
 		{
 			if (GetOwnerRole() == ROLE_Authority)
 			{
-				// Route through speaker arbitration so the correct player's
+				// Flush the final chunk directly to WebSocket.
-				// final chunk is attributed properly in multi-player.
+				// This is the tail of the current turn — the active speaker
-				APawn* LocalPawn = nullptr;
+				// is already established, no arbitration needed.
-				if (const APlayerController* PC = GetWorld()->GetFirstPlayerController())
+				if (WebSocketProxy && IsConnected())
 				{
-					LocalPawn = PC->GetPawn();
+					WebSocketProxy->SendAudioChunk(MicAccumulationBuffer);
 				}
 				ServerSendMicAudioFromPlayer(LocalPawn, MicAccumulationBuffer);
 			}
 			else
 			{
@ -1523,16 +1522,14 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::OnMicrophoneDataCaptured(const TArray
 	{
 		if (GetOwnerRole() == ROLE_Authority)
 		{
-			// Route through speaker arbitration (local mic = local player).
+			// Internal mic = local player on the server. Send directly to WebSocket.
-			APawn* LocalPawn = nullptr;
+			// This callback runs on the WASAPI audio thread — accessing game-thread
-			if (UWorld* World = GetWorld())
+			// state (NetConnectedPawns, LastSpeakTime, etc.) is NOT safe here.
 			// Speaker arbitration is only needed for multi-player external mic.
 			if (WebSocketProxy && WebSocketProxy->IsConnected())
 			{
-				if (APlayerController* PC = World->GetFirstPlayerController())
+				WebSocketProxy->SendAudioChunk(MicAccumulationBuffer);
 				{
 					LocalPawn = PC->GetPawn();
 				}
 			}
 			ServerSendMicAudioFromPlayer(LocalPawn, MicAccumulationBuffer);
 		}
 		else
 		{
@ -1872,8 +1869,37 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::ServerSendMicAudio_Implementation(
 void UPS_AI_ConvAgent_ElevenLabsComponent::ServerSendMicAudioFromPlayer(
 	APawn* SpeakerPawn, const TArray<uint8>& PCMBytes)
 {
-	if (!SpeakerPawn || !WebSocketProxy || !WebSocketProxy->IsConnected()) return;
+	if (!WebSocketProxy || !WebSocketProxy->IsConnected()) return;
-	if (!NetConnectedPawns.Contains(SpeakerPawn)) return;
+
 	// Standalone / single-player: bypass speaker arbitration entirely.
 	// There's only one player — no need for Contains check or speaker switching.
 	if (NetConnectedPawns.Num() <= 1)
 	{
 		WebSocketProxy->SendAudioChunk(PCMBytes);
 		// Keep speaker state in sync for debug display.
 		if (SpeakerPawn && NetActiveSpeakerPawn != SpeakerPawn)
 		{
 			SetActiveSpeaker(SpeakerPawn);
 		}
 		return;
 	}
 	// Multi-player path: full speaker arbitration.
 	if (!SpeakerPawn)
 	{
 		UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning,
 			TEXT("ServerSendMicAudioFromPlayer: null SpeakerPawn — audio dropped."));
 		return;
 	}
 	if (!NetConnectedPawns.Contains(SpeakerPawn))
 	{
 		UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning,
 			TEXT("ServerSendMicAudioFromPlayer: pawn %s not in NetConnectedPawns (%d players) — audio dropped."),
 			*SpeakerPawn->GetName(), NetConnectedPawns.Num());
 		return;
 	}
 	const double Now = FPlatformTime::Seconds();
 	LastSpeakTime.FindOrAdd(SpeakerPawn) = Now;
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_InteractionComponent.cpp
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_InteractionComponent.cpp
@ -498,29 +498,32 @@ void UPS_AI_ConvAgent_InteractionComponent::StartConversationWithSelectedAgent()
 		return;
 	}
 	if (Agent->IsConnected() || Agent->bNetIsConversing)
 	{
 		if (bDebug)
 		{
 			UE_LOG(LogPS_AI_ConvAgent_Select, Log, TEXT("StartConversationWithSelectedAgent: agent already connected/conversing."));
 		}
 		return;
 	}
 	if (bDebug)
 	{
-		UE_LOG(LogPS_AI_ConvAgent_Select, Log, TEXT("StartConversationWithSelectedAgent: starting conversation with %s"),
+		UE_LOG(LogPS_AI_ConvAgent_Select, Log, TEXT("StartConversationWithSelectedAgent: %s (connected=%s conversing=%s)"),
-			Agent->GetOwner() ? *Agent->GetOwner()->GetName() : TEXT("(null)"));
+			Agent->GetOwner() ? *Agent->GetOwner()->GetName() : TEXT("(null)"),
 			Agent->IsConnected() ? TEXT("true") : TEXT("false"),
 			Agent->bNetIsConversing ? TEXT("true") : TEXT("false"));
 	}
-	// Route through relay on clients (can't call Server RPCs on NPC actors).
+	// Always call Join (idempotent) — even if the WebSocket is already connected
 	// (persistent session mode), we need to add the pawn to NetConnectedPawns
 	// and set bNetIsConversing to true.
 	if (GetOwnerRole() == ROLE_Authority || (GetWorld() && GetWorld()->GetNetMode() == NM_Standalone))
 	{
-		Agent->StartConversation();
+		APlayerController* PC = nullptr;
 		if (APawn* Pawn = Cast<APawn>(GetOwner()))
 		{
 			PC = Cast<APlayerController>(Pawn->GetController());
 		}
 		if (PC)
 		{
 			Agent->ServerJoinConversation_Implementation(PC);
 		}
 	}
 	else
 	{
-		ServerRelayStartConversation(Agent->GetOwner());
+		ServerRelayJoinConversation(Agent->GetOwner());
 	}
 	// Ensure mic is capturing so we can route audio to the agent.