Fix body expression sync, conversation stability, and persistent session disconnect

- Sync body animation with actual audio playback via new OnAudioPlaybackStarted delegate instead of OnAgentStartedSpeaking (accounts for pre-buffer delay) - Fix stale pre-buffer broadcasts by cancelling bPreBuffering on silence detection and guarding pre-buffer timeout with bAgentSpeaking check - Smooth body crossfade using FInterpTo instead of linear interpolation - Add conversation lock in EvaluateBestAgent: keep agent selected during active conversation regardless of view cone (distance-only check prevents deselect flicker on fast camera turns) - Broadcast OnAgentDisconnected in persistent session EndConversation so all expression components (body, facial, lip sync, gaze) properly deactivate when the player leaves the interaction zone Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 12:18:35 +01:00
parent 2e96e3c766
commit fb641d5aa4
5 changed files with 93 additions and 61 deletions
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_BodyExpressionComponent.cpp
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_BodyExpressionComponent.cpp
@@ -44,7 +44,7 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::BeginPlay()
 			this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationConnected);
 		Agent->OnAgentDisconnected.AddDynamic(
 			this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationDisconnected);
-		Agent->OnAgentStartedSpeaking.AddDynamic(
+		Agent->OnAudioPlaybackStarted.AddDynamic(
 			this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStarted);
 		Agent->OnAgentStoppedSpeaking.AddDynamic(
 			this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStopped);
@@ -97,7 +97,7 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::EndPlay(const EEndPlayReason::Typ
 			this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationConnected);
 		AgentComponent->OnAgentDisconnected.RemoveDynamic(
 			this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationDisconnected);
-		AgentComponent->OnAgentStartedSpeaking.RemoveDynamic(
+		AgentComponent->OnAudioPlaybackStarted.RemoveDynamic(
 			this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStarted);
 		AgentComponent->OnAgentStoppedSpeaking.RemoveDynamic(
 			this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStopped);
@@ -185,49 +185,25 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::SwitchToNewAnim(UAnimSequence* Ne
 	if (!bForce && NewAnim == ActiveAnim) return;
 	if (!NewAnim) return;

-	if (CrossfadeAlpha < 1.0f && PrevAnim)
+	// Always start a fresh crossfade from whatever is currently active.
+	// If a crossfade was in progress, the old PrevAnim is lost, but the
+	// transition FROM the current ActiveAnim (at its current time) to the
+	// new anim will always be smooth and predictable.
+	PrevAnim = ActiveAnim;
+	PrevPlaybackTime = ActivePlaybackTime;
+
+	ActiveAnim = NewAnim;
+	ActivePlaybackTime = 0.0f;
+	CrossfadeAlpha = 0.0f;
+
+	if (bDebug && DebugVerbosity >= 1)
 	{
-		// Mid-crossfade: a crossfade is already in progress.
-		// DON'T reset CrossfadeAlpha — just swap the target animation.
-		// This preserves PrevAnim's contribution and avoids a visual pop.
-		//
-		//   Before: Blend(PrevAnim, OldActive, alpha)   e.g. 70% Prev + 30% Active
-		//   After:  Blend(PrevAnim, NewAnim,   alpha)   e.g. 70% Prev + 30% New
-		//
-		// The crossfade continues naturally — New fades in, Prev fades out.
-		// Pop is only 30% * (New@0 - OldActive@t) instead of 70% * (Prev - Active).
-		ActiveAnim = NewAnim;
-		ActivePlaybackTime = 0.0f;
-		// CrossfadeAlpha stays where it is — continuity
-
-		if (bDebug && DebugVerbosity >= 1)
-		{
-			UE_LOG(LogPS_AI_ConvAgent_BodyExpr, Log,
-				TEXT("Body anim switch (MID-CROSSFADE α=%.2f): target → %s (%s, %s)"),
-				CrossfadeAlpha, *NewAnim->GetName(),
-				bIsSpeaking ? TEXT("speaking") : TEXT("idle"),
-				*UEnum::GetValueAsString(ActiveEmotion));
-		}
-	}
-	else
-	{
-		// No crossfade in progress — normal switch with full crossfade
-		PrevAnim = ActiveAnim;
-		PrevPlaybackTime = ActivePlaybackTime;
-
-		ActiveAnim = NewAnim;
-		ActivePlaybackTime = 0.0f;
-		CrossfadeAlpha = 0.0f;
-
-		if (bDebug && DebugVerbosity >= 1)
-		{
-			UE_LOG(LogPS_AI_ConvAgent_BodyExpr, Log,
-				TEXT("Body anim switch: %s → %s (%s, %s)"),
-				PrevAnim ? *PrevAnim->GetName() : TEXT("(none)"),
-				*NewAnim->GetName(),
-				bIsSpeaking ? TEXT("speaking") : TEXT("idle"),
-				*UEnum::GetValueAsString(ActiveEmotion));
-		}
+		UE_LOG(LogPS_AI_ConvAgent_BodyExpr, Log,
+			TEXT("Body anim switch: %s → %s (%s, %s)"),
+			PrevAnim ? *PrevAnim->GetName() : TEXT("(none)"),
+			*NewAnim->GetName(),
+			bIsSpeaking ? TEXT("speaking") : TEXT("idle"),
+			*UEnum::GetValueAsString(ActiveEmotion));
 	}
 }

@@ -387,7 +363,7 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::TickComponent(
 					this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationConnected);
 				Agent->OnAgentDisconnected.AddDynamic(
 					this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationDisconnected);
-				Agent->OnAgentStartedSpeaking.AddDynamic(
+				Agent->OnAudioPlaybackStarted.AddDynamic(
 					this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStarted);
 				Agent->OnAgentStoppedSpeaking.AddDynamic(
 					this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStopped);
@@ -480,12 +456,16 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::TickComponent(

 	if (CrossfadeAlpha < 1.0f)
 	{
-		const float BlendSpeed = 1.0f / FMath::Max(0.05f, EmotionBlendDuration);
-		CrossfadeAlpha = FMath::Min(1.0f, CrossfadeAlpha + DeltaTime * BlendSpeed);
+		// Exponential ease-out: fast start, gradual approach to 1.0.
+		// Factor of 3 compensates for FInterpTo's exponential decay
+		// reaching ~95% in EmotionBlendDuration seconds.
+		const float InterpSpeed = 3.0f / FMath::Max(0.05f, EmotionBlendDuration);
+		CrossfadeAlpha = FMath::FInterpTo(CrossfadeAlpha, 1.0f, DeltaTime, InterpSpeed);

-		// Crossfade complete — release previous anim
-		if (CrossfadeAlpha >= 1.0f)
+		// Snap to 1.0 when close enough, release previous anim
+		if (CrossfadeAlpha > 0.999f)
 		{
+			CrossfadeAlpha = 1.0f;
 			PrevAnim = nullptr;
 			PrevPlaybackTime = 0.0f;
 		}
@@ -498,8 +478,8 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::TickComponent(
 		CurrentSnapshot.PrevAnim = PrevAnim;
 		CurrentSnapshot.ActiveTime = ActivePlaybackTime;
 		CurrentSnapshot.PrevTime = PrevPlaybackTime;
-		// Apply SmoothStep for ease-in-out crossfade (raw alpha is linear)
-		CurrentSnapshot.CrossfadeAlpha = FMath::SmoothStep(0.0f, 1.0f, CrossfadeAlpha);
+		// FInterpTo already provides exponential easing — pass alpha directly.
+		CurrentSnapshot.CrossfadeAlpha = CrossfadeAlpha;
 		CurrentSnapshot.ActivationAlpha = CurrentActiveAlpha;
 		CurrentSnapshot.BlendWeight = BlendWeight;
 	}
@@ -526,9 +506,6 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::DrawDebugHUD() const
 	FString ActiveName = ActiveAnim ? ActiveAnim->GetName() : TEXT("(none)");
 	FString PrevName = PrevAnim ? PrevAnim->GetName() : TEXT("---");

-	// Smoothed crossfade for display
-	const float SmoothedCrossfade = FMath::SmoothStep(0.0f, 1.0f, CrossfadeAlpha);
-
 	// State label
 	FString StateStr;
 	if (!bActive)
@@ -562,8 +539,8 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::DrawDebugHUD() const

 	GEngine->AddOnScreenDebugMessage(BaseKey + 3, DisplayTime,
 		CrossfadeAlpha < 1.0f ? WarnColor : MainColor,
-		FString::Printf(TEXT("  Crossfade: %.3f (smooth: %.3f)  Prev: %s"),
-			CrossfadeAlpha, SmoothedCrossfade, *PrevName));
+		FString::Printf(TEXT("  Crossfade: %.3f  Prev: %s"),
+			CrossfadeAlpha, *PrevName));

 	GEngine->AddOnScreenDebugMessage(BaseKey + 4, DisplayTime, MainColor,
 		FString::Printf(TEXT("  Emotion: %s (%s)  Weight: %.2f"),
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp
@@ -145,9 +145,15 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
 					TEXT("[T+%.2fs] [Turn %d] Pre-buffer timeout (%dms). Starting playback."),
 					Tpb, LastClosedTurnIndex, AudioPreBufferMs);
 			}
-			if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
+			// Only start playback if the agent is still speaking.
+			// If silence detection already set bAgentSpeaking=false, this is stale.
+			if (bAgentSpeaking)
 			{
-				AudioPlaybackComponent->Play();
+				if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
+				{
+					AudioPlaybackComponent->Play();
+				}
+				OnAudioPlaybackStarted.Broadcast();
 			}
 		}
 	}
@@ -223,6 +229,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
 			{
 				bHardTimeoutFired = bHardTimeout && !bAgentResponseReceived;
 				bAgentSpeaking = false;
+				bPreBuffering = false;  // Cancel pending pre-buffer to prevent stale OnAudioPlaybackStarted.
 				bAgentResponseReceived = false;
 				SilentTickCount = 0;
 				bShouldBroadcastStopped = true;
@@ -367,9 +374,17 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EndConversation()
 			{
 				bIntentionalDisconnect = true;
 				WebSocketProxy->Disconnect();
+				// OnClosed callback will fire OnAgentDisconnected.
 				WebSocketProxy = nullptr;
 			}
 		}
+		else
+		{
+			// Persistent mode: WebSocket stays alive but the interaction is over.
+			// Broadcast OnAgentDisconnected so expression components deactivate
+			// (body, facial, etc.). The WebSocket OnClosed never fires here.
+			OnAgentDisconnected.Broadcast(1000, TEXT("EndConversation (persistent)"));
+		}

 		// Reset replicated state so other players can talk to this NPC.
 		bNetIsConversing = false;
@@ -1333,9 +1348,13 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
 					Tpb2, LastClosedTurnIndex, AudioPreBufferMs);
 			}
 		}
-		else if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
+		else
 		{
-			AudioPlaybackComponent->Play();
+			if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
+			{
+				AudioPlaybackComponent->Play();
+			}
+			OnAudioPlaybackStarted.Broadcast();
 		}
 	}
 	else if (bPreBuffering)
@@ -1361,6 +1380,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
 			{
 				AudioPlaybackComponent->Play();
 			}
+			OnAudioPlaybackStarted.Broadcast();
 		}
 		SilentTickCount = 0;
 	}
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_InteractionComponent.cpp
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_InteractionComponent.cpp
@@ -165,6 +165,26 @@ UPS_AI_ConvAgent_ElevenLabsComponent* UPS_AI_ConvAgent_InteractionComponent::Eva

 	UPS_AI_ConvAgent_ElevenLabsComponent* CurrentAgent = SelectedAgent.Get();

+	// ── Conversation lock ──────────────────────────────────────────────
+	// While we're actively conversing with an agent, keep it selected as
+	// long as it's within interaction distance — ignore the view cone.
+	// This prevents deselect/reselect flicker when the player turns quickly
+	// (which would cause spurious OnAgentConnected re-broadcasts in
+	// persistent session mode).
+	if (CurrentAgent && CurrentAgent->bNetIsConversing)
+	{
+		if (AActor* AgentActor = CurrentAgent->GetOwner())
+		{
+			const FVector AgentLoc = AgentActor->GetActorLocation()
+				+ FVector(0.0f, 0.0f, AgentEyeLevelOffset);
+			const float DistSq = (AgentLoc - ViewLocation).SizeSquared();
+			if (DistSq <= MaxDistSq)
+			{
+				return CurrentAgent; // Keep conversing agent selected.
+			}
+		}
+	}
+
 	// Get local player's pawn for occupied-NPC check.
 	// Use pawn (replicated to ALL clients) instead of PlayerController
 	// (only replicated to owning client due to bOnlyRelevantToOwner=true).
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_BodyExpressionComponent.h
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_BodyExpressionComponent.h
@@ -86,7 +86,7 @@ public:
 	UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|BodyExpression",
 		meta = (ClampMin = "0.1", ClampMax = "3.0",
 		ToolTip = "How long (seconds) to crossfade between animations.\n0.5 = snappy, 1.5 = smooth."))
-	float EmotionBlendDuration = 0.5f;
+	float EmotionBlendDuration = 1.0f;

 	/** Overall blend weight for body expressions. 1.0 = full, 0.5 = subtle. */
 	UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|BodyExpression",
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_ElevenLabsComponent.h
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_ElevenLabsComponent.h
@@ -39,6 +39,14 @@ DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentStartedSpeaking);
 DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentStoppedSpeaking);
 DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentInterrupted);

+/**
+ * Fired when audio playback actually starts — AFTER any pre-buffering delay.
+ * Unlike OnAgentStartedSpeaking (which fires at the first audio chunk arrival),
+ * this fires when the AudioComponent calls Play(), meaning the audio is now audible.
+ * Use this when you need animation/behaviour synced with audible speech.
+ */
+DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAudioPlaybackStarted);
+
 /**
 * Fired when the server sends its first agent_chat_response_part — i.e. the moment
 * the LLM starts generating, well before audio arrives.
@@ -253,6 +261,13 @@ public:
 		meta = (ToolTip = "Fires when the agent starts speaking (first audio chunk). Use for lip-sync or UI feedback."))
 	FOnAgentStartedSpeaking OnAgentStartedSpeaking;

+	/** Fired when audio playback actually starts — AFTER any pre-buffering delay.
+	 *  Unlike OnAgentStartedSpeaking (first chunk arrival), this fires when audio is audible.
+	 *  Use this for body/gesture animations that should be synced with audible speech. */
+	UPROPERTY(BlueprintAssignable, Category = "PS AI ConvAgent|ElevenLabs|Events",
+		meta = (ToolTip = "Fires when audio playback actually starts (after pre-buffering).\nSynced with audible speech. Use for body animations."))
+	FOnAudioPlaybackStarted OnAudioPlaybackStarted;
+
 	/** Fired when the agent finishes playing all audio. Use this to re-open the microphone (in Server VAD mode without interruption) or update UI. */
 	UPROPERTY(BlueprintAssignable, Category = "PS AI ConvAgent|ElevenLabs|Events",
 		meta = (ToolTip = "Fires when the agent finishes speaking. Use to re-open the mic or update UI."))