From fb641d5aa4dee46422c55fe520f85d30c35bfdaa Mon Sep 17 00:00:00 2001 From: "j.foucher" Date: Thu, 5 Mar 2026 12:18:35 +0100 Subject: [PATCH] Fix body expression sync, conversation stability, and persistent session disconnect - Sync body animation with actual audio playback via new OnAudioPlaybackStarted delegate instead of OnAgentStartedSpeaking (accounts for pre-buffer delay) - Fix stale pre-buffer broadcasts by cancelling bPreBuffering on silence detection and guarding pre-buffer timeout with bAgentSpeaking check - Smooth body crossfade using FInterpTo instead of linear interpolation - Add conversation lock in EvaluateBestAgent: keep agent selected during active conversation regardless of view cone (distance-only check prevents deselect flicker on fast camera turns) - Broadcast OnAgentDisconnected in persistent session EndConversation so all expression components (body, facial, lip sync, gaze) properly deactivate when the player leaves the interaction zone Co-Authored-By: Claude Opus 4.6 --- ...S_AI_ConvAgent_BodyExpressionComponent.cpp | 89 +++++++------------ .../PS_AI_ConvAgent_ElevenLabsComponent.cpp | 28 +++++- .../PS_AI_ConvAgent_InteractionComponent.cpp | 20 +++++ .../PS_AI_ConvAgent_BodyExpressionComponent.h | 2 +- .../PS_AI_ConvAgent_ElevenLabsComponent.h | 15 ++++ 5 files changed, 93 insertions(+), 61 deletions(-) diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_BodyExpressionComponent.cpp b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_BodyExpressionComponent.cpp index 631247a..2a3ed1f 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_BodyExpressionComponent.cpp +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_BodyExpressionComponent.cpp @@ -44,7 +44,7 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::BeginPlay() this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationConnected); Agent->OnAgentDisconnected.AddDynamic( this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationDisconnected); - Agent->OnAgentStartedSpeaking.AddDynamic( + Agent->OnAudioPlaybackStarted.AddDynamic( this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStarted); Agent->OnAgentStoppedSpeaking.AddDynamic( this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStopped); @@ -97,7 +97,7 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::EndPlay(const EEndPlayReason::Typ this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationConnected); AgentComponent->OnAgentDisconnected.RemoveDynamic( this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationDisconnected); - AgentComponent->OnAgentStartedSpeaking.RemoveDynamic( + AgentComponent->OnAudioPlaybackStarted.RemoveDynamic( this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStarted); AgentComponent->OnAgentStoppedSpeaking.RemoveDynamic( this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStopped); @@ -185,49 +185,25 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::SwitchToNewAnim(UAnimSequence* Ne if (!bForce && NewAnim == ActiveAnim) return; if (!NewAnim) return; - if (CrossfadeAlpha < 1.0f && PrevAnim) + // Always start a fresh crossfade from whatever is currently active. + // If a crossfade was in progress, the old PrevAnim is lost, but the + // transition FROM the current ActiveAnim (at its current time) to the + // new anim will always be smooth and predictable. + PrevAnim = ActiveAnim; + PrevPlaybackTime = ActivePlaybackTime; + + ActiveAnim = NewAnim; + ActivePlaybackTime = 0.0f; + CrossfadeAlpha = 0.0f; + + if (bDebug && DebugVerbosity >= 1) { - // Mid-crossfade: a crossfade is already in progress. - // DON'T reset CrossfadeAlpha — just swap the target animation. - // This preserves PrevAnim's contribution and avoids a visual pop. - // - // Before: Blend(PrevAnim, OldActive, alpha) e.g. 70% Prev + 30% Active - // After: Blend(PrevAnim, NewAnim, alpha) e.g. 70% Prev + 30% New - // - // The crossfade continues naturally — New fades in, Prev fades out. - // Pop is only 30% * (New@0 - OldActive@t) instead of 70% * (Prev - Active). - ActiveAnim = NewAnim; - ActivePlaybackTime = 0.0f; - // CrossfadeAlpha stays where it is — continuity - - if (bDebug && DebugVerbosity >= 1) - { - UE_LOG(LogPS_AI_ConvAgent_BodyExpr, Log, - TEXT("Body anim switch (MID-CROSSFADE α=%.2f): target → %s (%s, %s)"), - CrossfadeAlpha, *NewAnim->GetName(), - bIsSpeaking ? TEXT("speaking") : TEXT("idle"), - *UEnum::GetValueAsString(ActiveEmotion)); - } - } - else - { - // No crossfade in progress — normal switch with full crossfade - PrevAnim = ActiveAnim; - PrevPlaybackTime = ActivePlaybackTime; - - ActiveAnim = NewAnim; - ActivePlaybackTime = 0.0f; - CrossfadeAlpha = 0.0f; - - if (bDebug && DebugVerbosity >= 1) - { - UE_LOG(LogPS_AI_ConvAgent_BodyExpr, Log, - TEXT("Body anim switch: %s → %s (%s, %s)"), - PrevAnim ? *PrevAnim->GetName() : TEXT("(none)"), - *NewAnim->GetName(), - bIsSpeaking ? TEXT("speaking") : TEXT("idle"), - *UEnum::GetValueAsString(ActiveEmotion)); - } + UE_LOG(LogPS_AI_ConvAgent_BodyExpr, Log, + TEXT("Body anim switch: %s → %s (%s, %s)"), + PrevAnim ? *PrevAnim->GetName() : TEXT("(none)"), + *NewAnim->GetName(), + bIsSpeaking ? TEXT("speaking") : TEXT("idle"), + *UEnum::GetValueAsString(ActiveEmotion)); } } @@ -387,7 +363,7 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::TickComponent( this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationConnected); Agent->OnAgentDisconnected.AddDynamic( this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationDisconnected); - Agent->OnAgentStartedSpeaking.AddDynamic( + Agent->OnAudioPlaybackStarted.AddDynamic( this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStarted); Agent->OnAgentStoppedSpeaking.AddDynamic( this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStopped); @@ -480,12 +456,16 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::TickComponent( if (CrossfadeAlpha < 1.0f) { - const float BlendSpeed = 1.0f / FMath::Max(0.05f, EmotionBlendDuration); - CrossfadeAlpha = FMath::Min(1.0f, CrossfadeAlpha + DeltaTime * BlendSpeed); + // Exponential ease-out: fast start, gradual approach to 1.0. + // Factor of 3 compensates for FInterpTo's exponential decay + // reaching ~95% in EmotionBlendDuration seconds. + const float InterpSpeed = 3.0f / FMath::Max(0.05f, EmotionBlendDuration); + CrossfadeAlpha = FMath::FInterpTo(CrossfadeAlpha, 1.0f, DeltaTime, InterpSpeed); - // Crossfade complete — release previous anim - if (CrossfadeAlpha >= 1.0f) + // Snap to 1.0 when close enough, release previous anim + if (CrossfadeAlpha > 0.999f) { + CrossfadeAlpha = 1.0f; PrevAnim = nullptr; PrevPlaybackTime = 0.0f; } @@ -498,8 +478,8 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::TickComponent( CurrentSnapshot.PrevAnim = PrevAnim; CurrentSnapshot.ActiveTime = ActivePlaybackTime; CurrentSnapshot.PrevTime = PrevPlaybackTime; - // Apply SmoothStep for ease-in-out crossfade (raw alpha is linear) - CurrentSnapshot.CrossfadeAlpha = FMath::SmoothStep(0.0f, 1.0f, CrossfadeAlpha); + // FInterpTo already provides exponential easing — pass alpha directly. + CurrentSnapshot.CrossfadeAlpha = CrossfadeAlpha; CurrentSnapshot.ActivationAlpha = CurrentActiveAlpha; CurrentSnapshot.BlendWeight = BlendWeight; } @@ -526,9 +506,6 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::DrawDebugHUD() const FString ActiveName = ActiveAnim ? ActiveAnim->GetName() : TEXT("(none)"); FString PrevName = PrevAnim ? PrevAnim->GetName() : TEXT("---"); - // Smoothed crossfade for display - const float SmoothedCrossfade = FMath::SmoothStep(0.0f, 1.0f, CrossfadeAlpha); - // State label FString StateStr; if (!bActive) @@ -562,8 +539,8 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::DrawDebugHUD() const GEngine->AddOnScreenDebugMessage(BaseKey + 3, DisplayTime, CrossfadeAlpha < 1.0f ? WarnColor : MainColor, - FString::Printf(TEXT(" Crossfade: %.3f (smooth: %.3f) Prev: %s"), - CrossfadeAlpha, SmoothedCrossfade, *PrevName)); + FString::Printf(TEXT(" Crossfade: %.3f Prev: %s"), + CrossfadeAlpha, *PrevName)); GEngine->AddOnScreenDebugMessage(BaseKey + 4, DisplayTime, MainColor, FString::Printf(TEXT(" Emotion: %s (%s) Weight: %.2f"), diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp index be507a5..dc4db4e 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp @@ -145,9 +145,15 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel TEXT("[T+%.2fs] [Turn %d] Pre-buffer timeout (%dms). Starting playback."), Tpb, LastClosedTurnIndex, AudioPreBufferMs); } - if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying()) + // Only start playback if the agent is still speaking. + // If silence detection already set bAgentSpeaking=false, this is stale. + if (bAgentSpeaking) { - AudioPlaybackComponent->Play(); + if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying()) + { + AudioPlaybackComponent->Play(); + } + OnAudioPlaybackStarted.Broadcast(); } } } @@ -223,6 +229,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel { bHardTimeoutFired = bHardTimeout && !bAgentResponseReceived; bAgentSpeaking = false; + bPreBuffering = false; // Cancel pending pre-buffer to prevent stale OnAudioPlaybackStarted. bAgentResponseReceived = false; SilentTickCount = 0; bShouldBroadcastStopped = true; @@ -367,9 +374,17 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EndConversation() { bIntentionalDisconnect = true; WebSocketProxy->Disconnect(); + // OnClosed callback will fire OnAgentDisconnected. WebSocketProxy = nullptr; } } + else + { + // Persistent mode: WebSocket stays alive but the interaction is over. + // Broadcast OnAgentDisconnected so expression components deactivate + // (body, facial, etc.). The WebSocket OnClosed never fires here. + OnAgentDisconnected.Broadcast(1000, TEXT("EndConversation (persistent)")); + } // Reset replicated state so other players can talk to this NPC. bNetIsConversing = false; @@ -1333,9 +1348,13 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray Tpb2, LastClosedTurnIndex, AudioPreBufferMs); } } - else if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying()) + else { - AudioPlaybackComponent->Play(); + if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying()) + { + AudioPlaybackComponent->Play(); + } + OnAudioPlaybackStarted.Broadcast(); } } else if (bPreBuffering) @@ -1361,6 +1380,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray { AudioPlaybackComponent->Play(); } + OnAudioPlaybackStarted.Broadcast(); } SilentTickCount = 0; } diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_InteractionComponent.cpp b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_InteractionComponent.cpp index edff176..f56adbf 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_InteractionComponent.cpp +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_InteractionComponent.cpp @@ -165,6 +165,26 @@ UPS_AI_ConvAgent_ElevenLabsComponent* UPS_AI_ConvAgent_InteractionComponent::Eva UPS_AI_ConvAgent_ElevenLabsComponent* CurrentAgent = SelectedAgent.Get(); + // ── Conversation lock ────────────────────────────────────────────── + // While we're actively conversing with an agent, keep it selected as + // long as it's within interaction distance — ignore the view cone. + // This prevents deselect/reselect flicker when the player turns quickly + // (which would cause spurious OnAgentConnected re-broadcasts in + // persistent session mode). + if (CurrentAgent && CurrentAgent->bNetIsConversing) + { + if (AActor* AgentActor = CurrentAgent->GetOwner()) + { + const FVector AgentLoc = AgentActor->GetActorLocation() + + FVector(0.0f, 0.0f, AgentEyeLevelOffset); + const float DistSq = (AgentLoc - ViewLocation).SizeSquared(); + if (DistSq <= MaxDistSq) + { + return CurrentAgent; // Keep conversing agent selected. + } + } + } + // Get local player's pawn for occupied-NPC check. // Use pawn (replicated to ALL clients) instead of PlayerController // (only replicated to owning client due to bOnlyRelevantToOwner=true). diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_BodyExpressionComponent.h b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_BodyExpressionComponent.h index 9c8962f..c59c367 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_BodyExpressionComponent.h +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_BodyExpressionComponent.h @@ -86,7 +86,7 @@ public: UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|BodyExpression", meta = (ClampMin = "0.1", ClampMax = "3.0", ToolTip = "How long (seconds) to crossfade between animations.\n0.5 = snappy, 1.5 = smooth.")) - float EmotionBlendDuration = 0.5f; + float EmotionBlendDuration = 1.0f; /** Overall blend weight for body expressions. 1.0 = full, 0.5 = subtle. */ UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|BodyExpression", diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_ElevenLabsComponent.h b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_ElevenLabsComponent.h index c06e07f..1ab4887 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_ElevenLabsComponent.h +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_ElevenLabsComponent.h @@ -39,6 +39,14 @@ DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentStartedSpeaking); DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentStoppedSpeaking); DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentInterrupted); +/** + * Fired when audio playback actually starts — AFTER any pre-buffering delay. + * Unlike OnAgentStartedSpeaking (which fires at the first audio chunk arrival), + * this fires when the AudioComponent calls Play(), meaning the audio is now audible. + * Use this when you need animation/behaviour synced with audible speech. + */ +DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAudioPlaybackStarted); + /** * Fired when the server sends its first agent_chat_response_part — i.e. the moment * the LLM starts generating, well before audio arrives. @@ -253,6 +261,13 @@ public: meta = (ToolTip = "Fires when the agent starts speaking (first audio chunk). Use for lip-sync or UI feedback.")) FOnAgentStartedSpeaking OnAgentStartedSpeaking; + /** Fired when audio playback actually starts — AFTER any pre-buffering delay. + * Unlike OnAgentStartedSpeaking (first chunk arrival), this fires when audio is audible. + * Use this for body/gesture animations that should be synced with audible speech. */ + UPROPERTY(BlueprintAssignable, Category = "PS AI ConvAgent|ElevenLabs|Events", + meta = (ToolTip = "Fires when audio playback actually starts (after pre-buffering).\nSynced with audible speech. Use for body animations.")) + FOnAudioPlaybackStarted OnAudioPlaybackStarted; + /** Fired when the agent finishes playing all audio. Use this to re-open the microphone (in Server VAD mode without interruption) or update UI. */ UPROPERTY(BlueprintAssignable, Category = "PS AI ConvAgent|ElevenLabs|Events", meta = (ToolTip = "Fires when the agent finishes speaking. Use to re-open the mic or update UI."))