Fix body expression sync, conversation stability, and persistent session disconnect
- Sync body animation with actual audio playback via new OnAudioPlaybackStarted delegate instead of OnAgentStartedSpeaking (accounts for pre-buffer delay) - Fix stale pre-buffer broadcasts by cancelling bPreBuffering on silence detection and guarding pre-buffer timeout with bAgentSpeaking check - Smooth body crossfade using FInterpTo instead of linear interpolation - Add conversation lock in EvaluateBestAgent: keep agent selected during active conversation regardless of view cone (distance-only check prevents deselect flicker on fast camera turns) - Broadcast OnAgentDisconnected in persistent session EndConversation so all expression components (body, facial, lip sync, gaze) properly deactivate when the player leaves the interaction zone Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
2e96e3c766
commit
fb641d5aa4
@ -44,7 +44,7 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::BeginPlay()
|
||||
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationConnected);
|
||||
Agent->OnAgentDisconnected.AddDynamic(
|
||||
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationDisconnected);
|
||||
Agent->OnAgentStartedSpeaking.AddDynamic(
|
||||
Agent->OnAudioPlaybackStarted.AddDynamic(
|
||||
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStarted);
|
||||
Agent->OnAgentStoppedSpeaking.AddDynamic(
|
||||
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStopped);
|
||||
@ -97,7 +97,7 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::EndPlay(const EEndPlayReason::Typ
|
||||
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationConnected);
|
||||
AgentComponent->OnAgentDisconnected.RemoveDynamic(
|
||||
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationDisconnected);
|
||||
AgentComponent->OnAgentStartedSpeaking.RemoveDynamic(
|
||||
AgentComponent->OnAudioPlaybackStarted.RemoveDynamic(
|
||||
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStarted);
|
||||
AgentComponent->OnAgentStoppedSpeaking.RemoveDynamic(
|
||||
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStopped);
|
||||
@ -185,49 +185,25 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::SwitchToNewAnim(UAnimSequence* Ne
|
||||
if (!bForce && NewAnim == ActiveAnim) return;
|
||||
if (!NewAnim) return;
|
||||
|
||||
if (CrossfadeAlpha < 1.0f && PrevAnim)
|
||||
// Always start a fresh crossfade from whatever is currently active.
|
||||
// If a crossfade was in progress, the old PrevAnim is lost, but the
|
||||
// transition FROM the current ActiveAnim (at its current time) to the
|
||||
// new anim will always be smooth and predictable.
|
||||
PrevAnim = ActiveAnim;
|
||||
PrevPlaybackTime = ActivePlaybackTime;
|
||||
|
||||
ActiveAnim = NewAnim;
|
||||
ActivePlaybackTime = 0.0f;
|
||||
CrossfadeAlpha = 0.0f;
|
||||
|
||||
if (bDebug && DebugVerbosity >= 1)
|
||||
{
|
||||
// Mid-crossfade: a crossfade is already in progress.
|
||||
// DON'T reset CrossfadeAlpha — just swap the target animation.
|
||||
// This preserves PrevAnim's contribution and avoids a visual pop.
|
||||
//
|
||||
// Before: Blend(PrevAnim, OldActive, alpha) e.g. 70% Prev + 30% Active
|
||||
// After: Blend(PrevAnim, NewAnim, alpha) e.g. 70% Prev + 30% New
|
||||
//
|
||||
// The crossfade continues naturally — New fades in, Prev fades out.
|
||||
// Pop is only 30% * (New@0 - OldActive@t) instead of 70% * (Prev - Active).
|
||||
ActiveAnim = NewAnim;
|
||||
ActivePlaybackTime = 0.0f;
|
||||
// CrossfadeAlpha stays where it is — continuity
|
||||
|
||||
if (bDebug && DebugVerbosity >= 1)
|
||||
{
|
||||
UE_LOG(LogPS_AI_ConvAgent_BodyExpr, Log,
|
||||
TEXT("Body anim switch (MID-CROSSFADE α=%.2f): target → %s (%s, %s)"),
|
||||
CrossfadeAlpha, *NewAnim->GetName(),
|
||||
bIsSpeaking ? TEXT("speaking") : TEXT("idle"),
|
||||
*UEnum::GetValueAsString(ActiveEmotion));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// No crossfade in progress — normal switch with full crossfade
|
||||
PrevAnim = ActiveAnim;
|
||||
PrevPlaybackTime = ActivePlaybackTime;
|
||||
|
||||
ActiveAnim = NewAnim;
|
||||
ActivePlaybackTime = 0.0f;
|
||||
CrossfadeAlpha = 0.0f;
|
||||
|
||||
if (bDebug && DebugVerbosity >= 1)
|
||||
{
|
||||
UE_LOG(LogPS_AI_ConvAgent_BodyExpr, Log,
|
||||
TEXT("Body anim switch: %s → %s (%s, %s)"),
|
||||
PrevAnim ? *PrevAnim->GetName() : TEXT("(none)"),
|
||||
*NewAnim->GetName(),
|
||||
bIsSpeaking ? TEXT("speaking") : TEXT("idle"),
|
||||
*UEnum::GetValueAsString(ActiveEmotion));
|
||||
}
|
||||
UE_LOG(LogPS_AI_ConvAgent_BodyExpr, Log,
|
||||
TEXT("Body anim switch: %s → %s (%s, %s)"),
|
||||
PrevAnim ? *PrevAnim->GetName() : TEXT("(none)"),
|
||||
*NewAnim->GetName(),
|
||||
bIsSpeaking ? TEXT("speaking") : TEXT("idle"),
|
||||
*UEnum::GetValueAsString(ActiveEmotion));
|
||||
}
|
||||
}
|
||||
|
||||
@ -387,7 +363,7 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::TickComponent(
|
||||
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationConnected);
|
||||
Agent->OnAgentDisconnected.AddDynamic(
|
||||
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnConversationDisconnected);
|
||||
Agent->OnAgentStartedSpeaking.AddDynamic(
|
||||
Agent->OnAudioPlaybackStarted.AddDynamic(
|
||||
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStarted);
|
||||
Agent->OnAgentStoppedSpeaking.AddDynamic(
|
||||
this, &UPS_AI_ConvAgent_BodyExpressionComponent::OnSpeakingStopped);
|
||||
@ -480,12 +456,16 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::TickComponent(
|
||||
|
||||
if (CrossfadeAlpha < 1.0f)
|
||||
{
|
||||
const float BlendSpeed = 1.0f / FMath::Max(0.05f, EmotionBlendDuration);
|
||||
CrossfadeAlpha = FMath::Min(1.0f, CrossfadeAlpha + DeltaTime * BlendSpeed);
|
||||
// Exponential ease-out: fast start, gradual approach to 1.0.
|
||||
// Factor of 3 compensates for FInterpTo's exponential decay
|
||||
// reaching ~95% in EmotionBlendDuration seconds.
|
||||
const float InterpSpeed = 3.0f / FMath::Max(0.05f, EmotionBlendDuration);
|
||||
CrossfadeAlpha = FMath::FInterpTo(CrossfadeAlpha, 1.0f, DeltaTime, InterpSpeed);
|
||||
|
||||
// Crossfade complete — release previous anim
|
||||
if (CrossfadeAlpha >= 1.0f)
|
||||
// Snap to 1.0 when close enough, release previous anim
|
||||
if (CrossfadeAlpha > 0.999f)
|
||||
{
|
||||
CrossfadeAlpha = 1.0f;
|
||||
PrevAnim = nullptr;
|
||||
PrevPlaybackTime = 0.0f;
|
||||
}
|
||||
@ -498,8 +478,8 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::TickComponent(
|
||||
CurrentSnapshot.PrevAnim = PrevAnim;
|
||||
CurrentSnapshot.ActiveTime = ActivePlaybackTime;
|
||||
CurrentSnapshot.PrevTime = PrevPlaybackTime;
|
||||
// Apply SmoothStep for ease-in-out crossfade (raw alpha is linear)
|
||||
CurrentSnapshot.CrossfadeAlpha = FMath::SmoothStep(0.0f, 1.0f, CrossfadeAlpha);
|
||||
// FInterpTo already provides exponential easing — pass alpha directly.
|
||||
CurrentSnapshot.CrossfadeAlpha = CrossfadeAlpha;
|
||||
CurrentSnapshot.ActivationAlpha = CurrentActiveAlpha;
|
||||
CurrentSnapshot.BlendWeight = BlendWeight;
|
||||
}
|
||||
@ -526,9 +506,6 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::DrawDebugHUD() const
|
||||
FString ActiveName = ActiveAnim ? ActiveAnim->GetName() : TEXT("(none)");
|
||||
FString PrevName = PrevAnim ? PrevAnim->GetName() : TEXT("---");
|
||||
|
||||
// Smoothed crossfade for display
|
||||
const float SmoothedCrossfade = FMath::SmoothStep(0.0f, 1.0f, CrossfadeAlpha);
|
||||
|
||||
// State label
|
||||
FString StateStr;
|
||||
if (!bActive)
|
||||
@ -562,8 +539,8 @@ void UPS_AI_ConvAgent_BodyExpressionComponent::DrawDebugHUD() const
|
||||
|
||||
GEngine->AddOnScreenDebugMessage(BaseKey + 3, DisplayTime,
|
||||
CrossfadeAlpha < 1.0f ? WarnColor : MainColor,
|
||||
FString::Printf(TEXT(" Crossfade: %.3f (smooth: %.3f) Prev: %s"),
|
||||
CrossfadeAlpha, SmoothedCrossfade, *PrevName));
|
||||
FString::Printf(TEXT(" Crossfade: %.3f Prev: %s"),
|
||||
CrossfadeAlpha, *PrevName));
|
||||
|
||||
GEngine->AddOnScreenDebugMessage(BaseKey + 4, DisplayTime, MainColor,
|
||||
FString::Printf(TEXT(" Emotion: %s (%s) Weight: %.2f"),
|
||||
|
||||
@ -145,9 +145,15 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
|
||||
TEXT("[T+%.2fs] [Turn %d] Pre-buffer timeout (%dms). Starting playback."),
|
||||
Tpb, LastClosedTurnIndex, AudioPreBufferMs);
|
||||
}
|
||||
if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
|
||||
// Only start playback if the agent is still speaking.
|
||||
// If silence detection already set bAgentSpeaking=false, this is stale.
|
||||
if (bAgentSpeaking)
|
||||
{
|
||||
AudioPlaybackComponent->Play();
|
||||
if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
|
||||
{
|
||||
AudioPlaybackComponent->Play();
|
||||
}
|
||||
OnAudioPlaybackStarted.Broadcast();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -223,6 +229,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
|
||||
{
|
||||
bHardTimeoutFired = bHardTimeout && !bAgentResponseReceived;
|
||||
bAgentSpeaking = false;
|
||||
bPreBuffering = false; // Cancel pending pre-buffer to prevent stale OnAudioPlaybackStarted.
|
||||
bAgentResponseReceived = false;
|
||||
SilentTickCount = 0;
|
||||
bShouldBroadcastStopped = true;
|
||||
@ -367,9 +374,17 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EndConversation()
|
||||
{
|
||||
bIntentionalDisconnect = true;
|
||||
WebSocketProxy->Disconnect();
|
||||
// OnClosed callback will fire OnAgentDisconnected.
|
||||
WebSocketProxy = nullptr;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Persistent mode: WebSocket stays alive but the interaction is over.
|
||||
// Broadcast OnAgentDisconnected so expression components deactivate
|
||||
// (body, facial, etc.). The WebSocket OnClosed never fires here.
|
||||
OnAgentDisconnected.Broadcast(1000, TEXT("EndConversation (persistent)"));
|
||||
}
|
||||
|
||||
// Reset replicated state so other players can talk to this NPC.
|
||||
bNetIsConversing = false;
|
||||
@ -1333,9 +1348,13 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
|
||||
Tpb2, LastClosedTurnIndex, AudioPreBufferMs);
|
||||
}
|
||||
}
|
||||
else if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
|
||||
else
|
||||
{
|
||||
AudioPlaybackComponent->Play();
|
||||
if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
|
||||
{
|
||||
AudioPlaybackComponent->Play();
|
||||
}
|
||||
OnAudioPlaybackStarted.Broadcast();
|
||||
}
|
||||
}
|
||||
else if (bPreBuffering)
|
||||
@ -1361,6 +1380,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
|
||||
{
|
||||
AudioPlaybackComponent->Play();
|
||||
}
|
||||
OnAudioPlaybackStarted.Broadcast();
|
||||
}
|
||||
SilentTickCount = 0;
|
||||
}
|
||||
|
||||
@ -165,6 +165,26 @@ UPS_AI_ConvAgent_ElevenLabsComponent* UPS_AI_ConvAgent_InteractionComponent::Eva
|
||||
|
||||
UPS_AI_ConvAgent_ElevenLabsComponent* CurrentAgent = SelectedAgent.Get();
|
||||
|
||||
// ── Conversation lock ──────────────────────────────────────────────
|
||||
// While we're actively conversing with an agent, keep it selected as
|
||||
// long as it's within interaction distance — ignore the view cone.
|
||||
// This prevents deselect/reselect flicker when the player turns quickly
|
||||
// (which would cause spurious OnAgentConnected re-broadcasts in
|
||||
// persistent session mode).
|
||||
if (CurrentAgent && CurrentAgent->bNetIsConversing)
|
||||
{
|
||||
if (AActor* AgentActor = CurrentAgent->GetOwner())
|
||||
{
|
||||
const FVector AgentLoc = AgentActor->GetActorLocation()
|
||||
+ FVector(0.0f, 0.0f, AgentEyeLevelOffset);
|
||||
const float DistSq = (AgentLoc - ViewLocation).SizeSquared();
|
||||
if (DistSq <= MaxDistSq)
|
||||
{
|
||||
return CurrentAgent; // Keep conversing agent selected.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get local player's pawn for occupied-NPC check.
|
||||
// Use pawn (replicated to ALL clients) instead of PlayerController
|
||||
// (only replicated to owning client due to bOnlyRelevantToOwner=true).
|
||||
|
||||
@ -86,7 +86,7 @@ public:
|
||||
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|BodyExpression",
|
||||
meta = (ClampMin = "0.1", ClampMax = "3.0",
|
||||
ToolTip = "How long (seconds) to crossfade between animations.\n0.5 = snappy, 1.5 = smooth."))
|
||||
float EmotionBlendDuration = 0.5f;
|
||||
float EmotionBlendDuration = 1.0f;
|
||||
|
||||
/** Overall blend weight for body expressions. 1.0 = full, 0.5 = subtle. */
|
||||
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|BodyExpression",
|
||||
|
||||
@ -39,6 +39,14 @@ DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentStartedSpeaking);
|
||||
DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentStoppedSpeaking);
|
||||
DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentInterrupted);
|
||||
|
||||
/**
|
||||
* Fired when audio playback actually starts — AFTER any pre-buffering delay.
|
||||
* Unlike OnAgentStartedSpeaking (which fires at the first audio chunk arrival),
|
||||
* this fires when the AudioComponent calls Play(), meaning the audio is now audible.
|
||||
* Use this when you need animation/behaviour synced with audible speech.
|
||||
*/
|
||||
DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAudioPlaybackStarted);
|
||||
|
||||
/**
|
||||
* Fired when the server sends its first agent_chat_response_part — i.e. the moment
|
||||
* the LLM starts generating, well before audio arrives.
|
||||
@ -253,6 +261,13 @@ public:
|
||||
meta = (ToolTip = "Fires when the agent starts speaking (first audio chunk). Use for lip-sync or UI feedback."))
|
||||
FOnAgentStartedSpeaking OnAgentStartedSpeaking;
|
||||
|
||||
/** Fired when audio playback actually starts — AFTER any pre-buffering delay.
|
||||
* Unlike OnAgentStartedSpeaking (first chunk arrival), this fires when audio is audible.
|
||||
* Use this for body/gesture animations that should be synced with audible speech. */
|
||||
UPROPERTY(BlueprintAssignable, Category = "PS AI ConvAgent|ElevenLabs|Events",
|
||||
meta = (ToolTip = "Fires when audio playback actually starts (after pre-buffering).\nSynced with audible speech. Use for body animations."))
|
||||
FOnAudioPlaybackStarted OnAudioPlaybackStarted;
|
||||
|
||||
/** Fired when the agent finishes playing all audio. Use this to re-open the microphone (in Server VAD mode without interruption) or update UI. */
|
||||
UPROPERTY(BlueprintAssignable, Category = "PS AI ConvAgent|ElevenLabs|Events",
|
||||
meta = (ToolTip = "Fires when the agent finishes speaking. Use to re-open the mic or update UI."))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user