Fix body expression sync, conversation stability, and persistent session disconnect

- Sync body animation with actual audio playback via new OnAudioPlaybackStarted
  delegate instead of OnAgentStartedSpeaking (accounts for pre-buffer delay)
- Fix stale pre-buffer broadcasts by cancelling bPreBuffering on silence detection
  and guarding pre-buffer timeout with bAgentSpeaking check
- Smooth body crossfade using FInterpTo instead of linear interpolation
- Add conversation lock in EvaluateBestAgent: keep agent selected during active
  conversation regardless of view cone (distance-only check prevents deselect
  flicker on fast camera turns)
- Broadcast OnAgentDisconnected in persistent session EndConversation so all
  expression components (body, facial, lip sync, gaze) properly deactivate
  when the player leaves the interaction zone

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
j.foucher 2026-03-05 12:18:35 +01:00
parent 2e96e3c766
commit fb641d5aa4
5 changed files with 93 additions and 61 deletions

View File

@ -44,7 +44,7 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::BeginPlay()
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationConnected);
Agent->OnAgentDisconnected.AddDynamic(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationDisconnected);
Agent->OnAgentStartedSpeaking.AddDynamic(
Agent->OnAudioPlaybackStarted.AddDynamic(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStarted);
Agent->OnAgentStoppedSpeaking.AddDynamic(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStopped);
@ -97,7 +97,7 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::EndPlay(const EEndPlayReason::Typ
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationConnected);
AgentComponent->OnAgentDisconnected.RemoveDynamic(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationDisconnected);
AgentComponent->OnAgentStartedSpeaking.RemoveDynamic(
AgentComponent->OnAudioPlaybackStarted.RemoveDynamic(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStarted);
AgentComponent->OnAgentStoppedSpeaking.RemoveDynamic(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStopped);
@ -185,49 +185,25 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::SwitchToNewAnim(UAnimSequence* Ne
if (!bForce && NewAnim == ActiveAnim) return;
if (!NewAnim) return;
if (CrossfadeAlpha < 1.0f && PrevAnim)
// Always start a fresh crossfade from whatever is currently active.
// If a crossfade was in progress, the old PrevAnim is lost, but the
// transition FROM the current ActiveAnim (at its current time) to the
// new anim will always be smooth and predictable.
PrevAnim = ActiveAnim;
PrevPlaybackTime = ActivePlaybackTime;
ActiveAnim = NewAnim;
ActivePlaybackTime = 0.0f;
CrossfadeAlpha = 0.0f;
if (bDebug && DebugVerbosity >= 1)
{
// Mid-crossfade: a crossfade is already in progress.
// DON'T reset CrossfadeAlpha — just swap the target animation.
// This preserves PrevAnim's contribution and avoids a visual pop.
//
// Before: Blend(PrevAnim, OldActive, alpha) e.g. 70% Prev + 30% Active
// After: Blend(PrevAnim, NewAnim, alpha) e.g. 70% Prev + 30% New
//
// The crossfade continues naturally — New fades in, Prev fades out.
// Pop is only 30% * (New@0 - OldActive@t) instead of 70% * (Prev - Active).
ActiveAnim = NewAnim;
ActivePlaybackTime = 0.0f;
// CrossfadeAlpha stays where it is — continuity
if (bDebug && DebugVerbosity >= 1)
{
UE_LOG(LogPS_AI_ConvAgent_BodyExpr, Log,
TEXT("Body anim switch (MID-CROSSFADE α=%.2f): target → %s (%s, %s)"),
CrossfadeAlpha, *NewAnim->GetName(),
bIsSpeaking ? TEXT("speaking") : TEXT("idle"),
*UEnum::GetValueAsString(ActiveEmotion));
}
}
else
{
// No crossfade in progress — normal switch with full crossfade
PrevAnim = ActiveAnim;
PrevPlaybackTime = ActivePlaybackTime;
ActiveAnim = NewAnim;
ActivePlaybackTime = 0.0f;
CrossfadeAlpha = 0.0f;
if (bDebug && DebugVerbosity >= 1)
{
UE_LOG(LogPS_AI_ConvAgent_BodyExpr, Log,
TEXT("Body anim switch: %s → %s (%s, %s)"),
PrevAnim ? *PrevAnim->GetName() : TEXT("(none)"),
*NewAnim->GetName(),
bIsSpeaking ? TEXT("speaking") : TEXT("idle"),
*UEnum::GetValueAsString(ActiveEmotion));
}
UE_LOG(LogPS_AI_ConvAgent_BodyExpr, Log,
TEXT("Body anim switch: %s → %s (%s, %s)"),
PrevAnim ? *PrevAnim->GetName() : TEXT("(none)"),
*NewAnim->GetName(),
bIsSpeaking ? TEXT("speaking") : TEXT("idle"),
*UEnum::GetValueAsString(ActiveEmotion));
}
}
@ -387,7 +363,7 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::TickComponent(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationConnected);
Agent->OnAgentDisconnected.AddDynamic(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationDisconnected);
Agent->OnAgentStartedSpeaking.AddDynamic(
Agent->OnAudioPlaybackStarted.AddDynamic(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStarted);
Agent->OnAgentStoppedSpeaking.AddDynamic(
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStopped);
@ -480,12 +456,16 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::TickComponent(
if (CrossfadeAlpha < 1.0f)
{
const float BlendSpeed = 1.0f / FMath::Max(0.05f, EmotionBlendDuration);
CrossfadeAlpha = FMath::Min(1.0f, CrossfadeAlpha + DeltaTime * BlendSpeed);
// Exponential ease-out: fast start, gradual approach to 1.0.
// Factor of 3 compensates for FInterpTo's exponential decay
// reaching ~95% in EmotionBlendDuration seconds.
const float InterpSpeed = 3.0f / FMath::Max(0.05f, EmotionBlendDuration);
CrossfadeAlpha = FMath::FInterpTo(CrossfadeAlpha, 1.0f, DeltaTime, InterpSpeed);
// Crossfade complete — release previous anim
if (CrossfadeAlpha >= 1.0f)
// Snap to 1.0 when close enough, release previous anim
if (CrossfadeAlpha > 0.999f)
{
CrossfadeAlpha = 1.0f;
PrevAnim = nullptr;
PrevPlaybackTime = 0.0f;
}
@ -498,8 +478,8 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::TickComponent(
CurrentSnapshot.PrevAnim = PrevAnim;
CurrentSnapshot.ActiveTime = ActivePlaybackTime;
CurrentSnapshot.PrevTime = PrevPlaybackTime;
// Apply SmoothStep for ease-in-out crossfade (raw alpha is linear)
CurrentSnapshot.CrossfadeAlpha = FMath::SmoothStep(0.0f, 1.0f, CrossfadeAlpha);
// FInterpTo already provides exponential easing — pass alpha directly.
CurrentSnapshot.CrossfadeAlpha = CrossfadeAlpha;
CurrentSnapshot.ActivationAlpha = CurrentActiveAlpha;
CurrentSnapshot.BlendWeight = BlendWeight;
}
@ -526,9 +506,6 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::DrawDebugHUD() const
FString ActiveName = ActiveAnim ? ActiveAnim->GetName() : TEXT("(none)");
FString PrevName = PrevAnim ? PrevAnim->GetName() : TEXT("---");
// Smoothed crossfade for display
const float SmoothedCrossfade = FMath::SmoothStep(0.0f, 1.0f, CrossfadeAlpha);
// State label
FString StateStr;
if (!bActive)
@ -562,8 +539,8 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::DrawDebugHUD() const
GEngine->AddOnScreenDebugMessage(BaseKey + 3, DisplayTime,
CrossfadeAlpha < 1.0f ? WarnColor : MainColor,
FString::Printf(TEXT(" Crossfade: %.3f (smooth: %.3f) Prev: %s"),
CrossfadeAlpha, SmoothedCrossfade, *PrevName));
FString::Printf(TEXT(" Crossfade: %.3f Prev: %s"),
CrossfadeAlpha, *PrevName));
GEngine->AddOnScreenDebugMessage(BaseKey + 4, DisplayTime, MainColor,
FString::Printf(TEXT(" Emotion: %s (%s) Weight: %.2f"),

View File

@ -145,9 +145,15 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
TEXT("[T+%.2fs] [Turn %d] Pre-buffer timeout (%dms). Starting playback."),
Tpb, LastClosedTurnIndex, AudioPreBufferMs);
}
if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
// Only start playback if the agent is still speaking.
// If silence detection already set bAgentSpeaking=false, this is stale.
if (bAgentSpeaking)
{
AudioPlaybackComponent->Play();
if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
{
AudioPlaybackComponent->Play();
}
OnAudioPlaybackStarted.Broadcast();
}
}
}
@ -223,6 +229,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
{
bHardTimeoutFired = bHardTimeout && !bAgentResponseReceived;
bAgentSpeaking = false;
bPreBuffering = false; // Cancel pending pre-buffer to prevent stale OnAudioPlaybackStarted.
bAgentResponseReceived = false;
SilentTickCount = 0;
bShouldBroadcastStopped = true;
@ -367,9 +374,17 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EndConversation()
{
bIntentionalDisconnect = true;
WebSocketProxy->Disconnect();
// OnClosed callback will fire OnAgentDisconnected.
WebSocketProxy = nullptr;
}
}
else
{
// Persistent mode: WebSocket stays alive but the interaction is over.
// Broadcast OnAgentDisconnected so expression components deactivate
// (body, facial, etc.). The WebSocket OnClosed never fires here.
OnAgentDisconnected.Broadcast(1000, TEXT("EndConversation (persistent)"));
}
// Reset replicated state so other players can talk to this NPC.
bNetIsConversing = false;
@ -1333,9 +1348,13 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
Tpb2, LastClosedTurnIndex, AudioPreBufferMs);
}
}
else if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
else
{
AudioPlaybackComponent->Play();
if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
{
AudioPlaybackComponent->Play();
}
OnAudioPlaybackStarted.Broadcast();
}
}
else if (bPreBuffering)
@ -1361,6 +1380,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
{
AudioPlaybackComponent->Play();
}
OnAudioPlaybackStarted.Broadcast();
}
SilentTickCount = 0;
}

View File

@ -165,6 +165,26 @@ UPS_AI_ConvAgent_ElevenLabsComponent* UPS_AI_ConvAgent_InteractionComponent::Eva
UPS_AI_ConvAgent_ElevenLabsComponent* CurrentAgent = SelectedAgent.Get();
// ── Conversation lock ──────────────────────────────────────────────
// While we're actively conversing with an agent, keep it selected as
// long as it's within interaction distance — ignore the view cone.
// This prevents deselect/reselect flicker when the player turns quickly
// (which would cause spurious OnAgentConnected re-broadcasts in
// persistent session mode).
if (CurrentAgent && CurrentAgent->bNetIsConversing)
{
if (AActor* AgentActor = CurrentAgent->GetOwner())
{
const FVector AgentLoc = AgentActor->GetActorLocation()
+ FVector(0.0f, 0.0f, AgentEyeLevelOffset);
const float DistSq = (AgentLoc - ViewLocation).SizeSquared();
if (DistSq <= MaxDistSq)
{
return CurrentAgent; // Keep conversing agent selected.
}
}
}
// Get local player's pawn for occupied-NPC check.
// Use pawn (replicated to ALL clients) instead of PlayerController
// (only replicated to owning client due to bOnlyRelevantToOwner=true).

View File

@ -86,7 +86,7 @@ public:
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|BodyExpression",
meta = (ClampMin = "0.1", ClampMax = "3.0",
ToolTip = "How long (seconds) to crossfade between animations.\n0.5 = snappy, 1.5 = smooth."))
float EmotionBlendDuration = 0.5f;
float EmotionBlendDuration = 1.0f;
/** Overall blend weight for body expressions. 1.0 = full, 0.5 = subtle. */
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|BodyExpression",

View File

@ -39,6 +39,14 @@ DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentStartedSpeaking);
DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentStoppedSpeaking);
DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentInterrupted);
/**
* Fired when audio playback actually starts AFTER any pre-buffering delay.
* Unlike OnAgentStartedSpeaking (which fires at the first audio chunk arrival),
* this fires when the AudioComponent calls Play(), meaning the audio is now audible.
* Use this when you need animation/behaviour synced with audible speech.
*/
DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAudioPlaybackStarted);
/**
* Fired when the server sends its first agent_chat_response_part i.e. the moment
* the LLM starts generating, well before audio arrives.
@ -253,6 +261,13 @@ public:
meta = (ToolTip = "Fires when the agent starts speaking (first audio chunk). Use for lip-sync or UI feedback."))
FOnAgentStartedSpeaking OnAgentStartedSpeaking;
/** Fired when audio playback actually starts — AFTER any pre-buffering delay.
* Unlike OnAgentStartedSpeaking (first chunk arrival), this fires when audio is audible.
* Use this for body/gesture animations that should be synced with audible speech. */
UPROPERTY(BlueprintAssignable, Category = "PS AI ConvAgent|ElevenLabs|Events",
meta = (ToolTip = "Fires when audio playback actually starts (after pre-buffering).\nSynced with audible speech. Use for body animations."))
FOnAudioPlaybackStarted OnAudioPlaybackStarted;
/** Fired when the agent finishes playing all audio. Use this to re-open the microphone (in Server VAD mode without interruption) or update UI. */
UPROPERTY(BlueprintAssignable, Category = "PS AI ConvAgent|ElevenLabs|Events",
meta = (ToolTip = "Fires when the agent finishes speaking. Use to re-open the mic or update UI."))