Fix body expression sync, conversation stability, and persistent session disconnect

- Sync body animation with actual audio playback via new OnAudioPlaybackStarted
  delegate instead of OnAgentStartedSpeaking (accounts for pre-buffer delay)
- Fix stale pre-buffer broadcasts by cancelling bPreBuffering on silence detection
  and guarding pre-buffer timeout with bAgentSpeaking check
- Smooth body crossfade using FInterpTo instead of linear interpolation
- Add conversation lock in EvaluateBestAgent: keep agent selected during active
  conversation regardless of view cone (distance-only check prevents deselect
  flicker on fast camera turns)
- Broadcast OnAgentDisconnected in persistent session EndConversation so all
  expression components (body, facial, lip sync, gaze) properly deactivate
  when the player leaves the interaction zone

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
j.foucher 2026-03-05 12:18:35 +01:00
parent 2e96e3c766
commit fb641d5aa4
5 changed files with 93 additions and 61 deletions

View File

@ -44,7 +44,7 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::BeginPlay()
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationConnected); this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationConnected);
Agent->OnAgentDisconnected.AddDynamic( Agent->OnAgentDisconnected.AddDynamic(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationDisconnected); this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationDisconnected);
Agent->OnAgentStartedSpeaking.AddDynamic( Agent->OnAudioPlaybackStarted.AddDynamic(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStarted); this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStarted);
Agent->OnAgentStoppedSpeaking.AddDynamic( Agent->OnAgentStoppedSpeaking.AddDynamic(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStopped); this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStopped);
@ -97,7 +97,7 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::EndPlay(const EEndPlayReason::Typ
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationConnected); this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationConnected);
AgentComponent->OnAgentDisconnected.RemoveDynamic( AgentComponent->OnAgentDisconnected.RemoveDynamic(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationDisconnected); this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationDisconnected);
AgentComponent->OnAgentStartedSpeaking.RemoveDynamic( AgentComponent->OnAudioPlaybackStarted.RemoveDynamic(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStarted); this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStarted);
AgentComponent->OnAgentStoppedSpeaking.RemoveDynamic( AgentComponent->OnAgentStoppedSpeaking.RemoveDynamic(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStopped); this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStopped);
@ -185,33 +185,10 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::SwitchToNewAnim(UAnimSequence* Ne
if (!bForce && NewAnim == ActiveAnim) return; if (!bForce && NewAnim == ActiveAnim) return;
if (!NewAnim) return; if (!NewAnim) return;
if (CrossfadeAlpha < 1.0f && PrevAnim) // Always start a fresh crossfade from whatever is currently active.
{ // If a crossfade was in progress, the old PrevAnim is lost, but the
// Mid-crossfade: a crossfade is already in progress. // transition FROM the current ActiveAnim (at its current time) to the
// DON'T reset CrossfadeAlpha — just swap the target animation. // new anim will always be smooth and predictable.
// This preserves PrevAnim's contribution and avoids a visual pop.
//
// Before: Blend(PrevAnim, OldActive, alpha) e.g. 70% Prev + 30% Active
// After: Blend(PrevAnim, NewAnim, alpha) e.g. 70% Prev + 30% New
//
// The crossfade continues naturally — New fades in, Prev fades out.
// Pop is only 30% * (New@0 - OldActive@t) instead of 70% * (Prev - Active).
ActiveAnim = NewAnim;
ActivePlaybackTime = 0.0f;
// CrossfadeAlpha stays where it is — continuity
if (bDebug && DebugVerbosity >= 1)
{
UE_LOG(LogPS_AI_ConvAgent_BodyExpr, Log,
TEXT("Body anim switch (MID-CROSSFADE α=%.2f): target → %s (%s, %s)"),
CrossfadeAlpha, *NewAnim->GetName(),
bIsSpeaking ? TEXT("speaking") : TEXT("idle"),
*UEnum::GetValueAsString(ActiveEmotion));
}
}
else
{
// No crossfade in progress — normal switch with full crossfade
PrevAnim = ActiveAnim; PrevAnim = ActiveAnim;
PrevPlaybackTime = ActivePlaybackTime; PrevPlaybackTime = ActivePlaybackTime;
@ -228,7 +205,6 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::SwitchToNewAnim(UAnimSequence* Ne
bIsSpeaking ? TEXT("speaking") : TEXT("idle"), bIsSpeaking ? TEXT("speaking") : TEXT("idle"),
*UEnum::GetValueAsString(ActiveEmotion)); *UEnum::GetValueAsString(ActiveEmotion));
} }
}
} }
void UPS_AI_ConvAgent_BodyExpressionComponent::PickAndSwitchAnim() void UPS_AI_ConvAgent_BodyExpressionComponent::PickAndSwitchAnim()
@ -387,7 +363,7 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::TickComponent(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationConnected); this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationConnected);
Agent->OnAgentDisconnected.AddDynamic( Agent->OnAgentDisconnected.AddDynamic(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationDisconnected); this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationDisconnected);
Agent->OnAgentStartedSpeaking.AddDynamic( Agent->OnAudioPlaybackStarted.AddDynamic(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStarted); this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStarted);
Agent->OnAgentStoppedSpeaking.AddDynamic( Agent->OnAgentStoppedSpeaking.AddDynamic(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStopped); this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStopped);
@ -480,12 +456,16 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::TickComponent(
if (CrossfadeAlpha < 1.0f) if (CrossfadeAlpha < 1.0f)
{ {
const float BlendSpeed = 1.0f / FMath::Max(0.05f, EmotionBlendDuration); // Exponential ease-out: fast start, gradual approach to 1.0.
CrossfadeAlpha = FMath::Min(1.0f, CrossfadeAlpha + DeltaTime * BlendSpeed); // Factor of 3 compensates for FInterpTo's exponential decay
// reaching ~95% in EmotionBlendDuration seconds.
const float InterpSpeed = 3.0f / FMath::Max(0.05f, EmotionBlendDuration);
CrossfadeAlpha = FMath::FInterpTo(CrossfadeAlpha, 1.0f, DeltaTime, InterpSpeed);
// Crossfade complete — release previous anim // Snap to 1.0 when close enough, release previous anim
if (CrossfadeAlpha >= 1.0f) if (CrossfadeAlpha > 0.999f)
{ {
CrossfadeAlpha = 1.0f;
PrevAnim = nullptr; PrevAnim = nullptr;
PrevPlaybackTime = 0.0f; PrevPlaybackTime = 0.0f;
} }
@ -498,8 +478,8 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::TickComponent(
CurrentSnapshot.PrevAnim = PrevAnim; CurrentSnapshot.PrevAnim = PrevAnim;
CurrentSnapshot.ActiveTime = ActivePlaybackTime; CurrentSnapshot.ActiveTime = ActivePlaybackTime;
CurrentSnapshot.PrevTime = PrevPlaybackTime; CurrentSnapshot.PrevTime = PrevPlaybackTime;
// Apply SmoothStep for ease-in-out crossfade (raw alpha is linear) // FInterpTo already provides exponential easing — pass alpha directly.
CurrentSnapshot.CrossfadeAlpha = FMath::SmoothStep(0.0f, 1.0f, CrossfadeAlpha); CurrentSnapshot.CrossfadeAlpha = CrossfadeAlpha;
CurrentSnapshot.ActivationAlpha = CurrentActiveAlpha; CurrentSnapshot.ActivationAlpha = CurrentActiveAlpha;
CurrentSnapshot.BlendWeight = BlendWeight; CurrentSnapshot.BlendWeight = BlendWeight;
} }
@ -526,9 +506,6 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::DrawDebugHUD() const
FString ActiveName = ActiveAnim ? ActiveAnim->GetName() : TEXT("(none)"); FString ActiveName = ActiveAnim ? ActiveAnim->GetName() : TEXT("(none)");
FString PrevName = PrevAnim ? PrevAnim->GetName() : TEXT("---"); FString PrevName = PrevAnim ? PrevAnim->GetName() : TEXT("---");
// Smoothed crossfade for display
const float SmoothedCrossfade = FMath::SmoothStep(0.0f, 1.0f, CrossfadeAlpha);
// State label // State label
FString StateStr; FString StateStr;
if (!bActive) if (!bActive)
@ -562,8 +539,8 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::DrawDebugHUD() const
GEngine->AddOnScreenDebugMessage(BaseKey + 3, DisplayTime, GEngine->AddOnScreenDebugMessage(BaseKey + 3, DisplayTime,
CrossfadeAlpha < 1.0f ? WarnColor : MainColor, CrossfadeAlpha < 1.0f ? WarnColor : MainColor,
FString::Printf(TEXT(" Crossfade: %.3f (smooth: %.3f) Prev: %s"), FString::Printf(TEXT(" Crossfade: %.3f Prev: %s"),
CrossfadeAlpha, SmoothedCrossfade, *PrevName)); CrossfadeAlpha, *PrevName));
GEngine->AddOnScreenDebugMessage(BaseKey + 4, DisplayTime, MainColor, GEngine->AddOnScreenDebugMessage(BaseKey + 4, DisplayTime, MainColor,
FString::Printf(TEXT(" Emotion: %s (%s) Weight: %.2f"), FString::Printf(TEXT(" Emotion: %s (%s) Weight: %.2f"),

View File

@ -145,10 +145,16 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
TEXT("[T+%.2fs] [Turn %d] Pre-buffer timeout (%dms). Starting playback."), TEXT("[T+%.2fs] [Turn %d] Pre-buffer timeout (%dms). Starting playback."),
Tpb, LastClosedTurnIndex, AudioPreBufferMs); Tpb, LastClosedTurnIndex, AudioPreBufferMs);
} }
// Only start playback if the agent is still speaking.
// If silence detection already set bAgentSpeaking=false, this is stale.
if (bAgentSpeaking)
{
if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying()) if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
{ {
AudioPlaybackComponent->Play(); AudioPlaybackComponent->Play();
} }
OnAudioPlaybackStarted.Broadcast();
}
} }
} }
@ -223,6 +229,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
{ {
bHardTimeoutFired = bHardTimeout && !bAgentResponseReceived; bHardTimeoutFired = bHardTimeout && !bAgentResponseReceived;
bAgentSpeaking = false; bAgentSpeaking = false;
bPreBuffering = false; // Cancel pending pre-buffer to prevent stale OnAudioPlaybackStarted.
bAgentResponseReceived = false; bAgentResponseReceived = false;
SilentTickCount = 0; SilentTickCount = 0;
bShouldBroadcastStopped = true; bShouldBroadcastStopped = true;
@ -367,9 +374,17 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EndConversation()
{ {
bIntentionalDisconnect = true; bIntentionalDisconnect = true;
WebSocketProxy->Disconnect(); WebSocketProxy->Disconnect();
// OnClosed callback will fire OnAgentDisconnected.
WebSocketProxy = nullptr; WebSocketProxy = nullptr;
} }
} }
else
{
// Persistent mode: WebSocket stays alive but the interaction is over.
// Broadcast OnAgentDisconnected so expression components deactivate
// (body, facial, etc.). The WebSocket OnClosed never fires here.
OnAgentDisconnected.Broadcast(1000, TEXT("EndConversation (persistent)"));
}
// Reset replicated state so other players can talk to this NPC. // Reset replicated state so other players can talk to this NPC.
bNetIsConversing = false; bNetIsConversing = false;
@ -1333,10 +1348,14 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
Tpb2, LastClosedTurnIndex, AudioPreBufferMs); Tpb2, LastClosedTurnIndex, AudioPreBufferMs);
} }
} }
else if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying()) else
{
if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
{ {
AudioPlaybackComponent->Play(); AudioPlaybackComponent->Play();
} }
OnAudioPlaybackStarted.Broadcast();
}
} }
else if (bPreBuffering) else if (bPreBuffering)
{ {
@ -1361,6 +1380,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
{ {
AudioPlaybackComponent->Play(); AudioPlaybackComponent->Play();
} }
OnAudioPlaybackStarted.Broadcast();
} }
SilentTickCount = 0; SilentTickCount = 0;
} }

View File

@ -165,6 +165,26 @@ UPS_AI_ConvAgent_ElevenLabsComponent* UPS_AI_ConvAgent_InteractionComponent::Eva
UPS_AI_ConvAgent_ElevenLabsComponent* CurrentAgent = SelectedAgent.Get(); UPS_AI_ConvAgent_ElevenLabsComponent* CurrentAgent = SelectedAgent.Get();
// ── Conversation lock ──────────────────────────────────────────────
// While we're actively conversing with an agent, keep it selected as
// long as it's within interaction distance — ignore the view cone.
// This prevents deselect/reselect flicker when the player turns quickly
// (which would cause spurious OnAgentConnected re-broadcasts in
// persistent session mode).
if (CurrentAgent && CurrentAgent->bNetIsConversing)
{
if (AActor* AgentActor = CurrentAgent->GetOwner())
{
const FVector AgentLoc = AgentActor->GetActorLocation()
+ FVector(0.0f, 0.0f, AgentEyeLevelOffset);
const float DistSq = (AgentLoc - ViewLocation).SizeSquared();
if (DistSq <= MaxDistSq)
{
return CurrentAgent; // Keep conversing agent selected.
}
}
}
// Get local player's pawn for occupied-NPC check. // Get local player's pawn for occupied-NPC check.
// Use pawn (replicated to ALL clients) instead of PlayerController // Use pawn (replicated to ALL clients) instead of PlayerController
// (only replicated to owning client due to bOnlyRelevantToOwner=true). // (only replicated to owning client due to bOnlyRelevantToOwner=true).

View File

@ -86,7 +86,7 @@ public:
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|BodyExpression", UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|BodyExpression",
meta = (ClampMin = "0.1", ClampMax = "3.0", meta = (ClampMin = "0.1", ClampMax = "3.0",
ToolTip = "How long (seconds) to crossfade between animations.\n0.5 = snappy, 1.5 = smooth.")) ToolTip = "How long (seconds) to crossfade between animations.\n0.5 = snappy, 1.5 = smooth."))
float EmotionBlendDuration = 0.5f; float EmotionBlendDuration = 1.0f;
/** Overall blend weight for body expressions. 1.0 = full, 0.5 = subtle. */ /** Overall blend weight for body expressions. 1.0 = full, 0.5 = subtle. */
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|BodyExpression", UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|BodyExpression",

View File

@ -39,6 +39,14 @@ DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentStartedSpeaking);
DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentStoppedSpeaking); DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentStoppedSpeaking);
DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentInterrupted); DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentInterrupted);
/**
* Fired when audio playback actually starts AFTER any pre-buffering delay.
* Unlike OnAgentStartedSpeaking (which fires at the first audio chunk arrival),
* this fires when the AudioComponent calls Play(), meaning the audio is now audible.
* Use this when you need animation/behaviour synced with audible speech.
*/
DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAudioPlaybackStarted);
/** /**
* Fired when the server sends its first agent_chat_response_part i.e. the moment * Fired when the server sends its first agent_chat_response_part i.e. the moment
* the LLM starts generating, well before audio arrives. * the LLM starts generating, well before audio arrives.
@ -253,6 +261,13 @@ public:
meta = (ToolTip = "Fires when the agent starts speaking (first audio chunk). Use for lip-sync or UI feedback.")) meta = (ToolTip = "Fires when the agent starts speaking (first audio chunk). Use for lip-sync or UI feedback."))
FOnAgentStartedSpeaking OnAgentStartedSpeaking; FOnAgentStartedSpeaking OnAgentStartedSpeaking;
/** Fired when audio playback actually starts — AFTER any pre-buffering delay.
* Unlike OnAgentStartedSpeaking (first chunk arrival), this fires when audio is audible.
* Use this for body/gesture animations that should be synced with audible speech. */
UPROPERTY(BlueprintAssignable, Category = "PS AI ConvAgent|ElevenLabs|Events",
meta = (ToolTip = "Fires when audio playback actually starts (after pre-buffering).\nSynced with audible speech. Use for body animations."))
FOnAudioPlaybackStarted OnAudioPlaybackStarted;
/** Fired when the agent finishes playing all audio. Use this to re-open the microphone (in Server VAD mode without interruption) or update UI. */ /** Fired when the agent finishes playing all audio. Use this to re-open the microphone (in Server VAD mode without interruption) or update UI. */
UPROPERTY(BlueprintAssignable, Category = "PS AI ConvAgent|ElevenLabs|Events", UPROPERTY(BlueprintAssignable, Category = "PS AI ConvAgent|ElevenLabs|Events",
meta = (ToolTip = "Fires when the agent finishes speaking. Use to re-open the mic or update UI.")) meta = (ToolTip = "Fires when the agent finishes speaking. Use to re-open the mic or update UI."))