Fix audio cutoff and lip sync activation bugs during agent switching
- Fix A→B→A audio cutoff: when switching back to a pending-leave agent, cancel the deferred leave instead of force-completing it (was calling StopAgentAudio on the agent we're returning to) - Fix deferred leave firing during TTS gaps: use IsAgentSpeakingOrPending() instead of IsAgentSpeaking() — checks bAgentGenerating and bAgentResponseReceived to avoid premature leave during inter-batch silence - Convert silence detection from tick-based to time-based: SilentTickCount → SilentTime (float seconds), GeneratingTickCount → GeneratingTime. Consistent behavior regardless of frame rate (was 5s@120fps vs 20s@30fps) - Fix lazy binding: add OnAgentConnected/OnAgentDisconnected in LipSync and FacialExpression TickComponent lazy-bind path (bActive stayed false forever in packaged builds when component init order differed) - Fix reconnection: reset bWaitingForAgentResponse and GeneratingTime before entering reconnect mode to avoid stale state on new session - Fix event_ID audio filtering: reset LastInterruptEventId in HandleAgentResponse and SendUserTurnStart so first audio chunks of a new turn are not silently discarded by stale interrupt filter - Preserve retained gaze when switching back to same agent (don't CleanupRetainedGaze if PrevRetained == NewAgent) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
aea02abe89
commit
e5a32f5997
@ -177,27 +177,27 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
|
|||||||
|
|
||||||
// Generating timeout (ISSUE-1): if the server sent agent_chat_response_part
|
// Generating timeout (ISSUE-1): if the server sent agent_chat_response_part
|
||||||
// (bAgentGenerating=true) but no audio ever arrived (bAgentSpeaking=false),
|
// (bAgentGenerating=true) but no audio ever arrived (bAgentSpeaking=false),
|
||||||
// force-clear bAgentGenerating after 10s so StartListening() is no longer blocked.
|
// force-clear bAgentGenerating after GeneratingTimeoutSeconds so StartListening()
|
||||||
// Normal path: first audio chunk → EnqueueAgentAudio → bAgentGenerating=false.
|
// is no longer blocked. Time-based to behave consistently across frame rates.
|
||||||
// This fallback covers the rare case where TTS produces nothing (e.g. empty response).
|
|
||||||
if (bAgentGenerating && !bAgentSpeaking)
|
if (bAgentGenerating && !bAgentSpeaking)
|
||||||
{
|
{
|
||||||
if (++GeneratingTickCount >= HardSilenceTimeoutTicks)
|
GeneratingTime += DeltaTime;
|
||||||
|
if (GeneratingTime >= GeneratingTimeoutSeconds)
|
||||||
{
|
{
|
||||||
bAgentGenerating = false;
|
bAgentGenerating = false;
|
||||||
GeneratingTickCount = 0;
|
GeneratingTime = 0.0f;
|
||||||
if (bDebug)
|
if (bDebug)
|
||||||
{
|
{
|
||||||
const double T = FPlatformTime::Seconds() - SessionStartTime;
|
const double T = FPlatformTime::Seconds() - SessionStartTime;
|
||||||
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning,
|
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning,
|
||||||
TEXT("[T+%.2fs] [Turn %d] Generating timeout (10s) — no audio arrived. Clearing bAgentGenerating."),
|
TEXT("[T+%.2fs] [Turn %d] Generating timeout (%.0fs) — no audio arrived. Clearing bAgentGenerating."),
|
||||||
T, LastClosedTurnIndex);
|
T, LastClosedTurnIndex, GeneratingTimeoutSeconds);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
GeneratingTickCount = 0;
|
GeneratingTime = 0.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pre-buffer timer: start playback after the pre-buffer period expires.
|
// Pre-buffer timer: start playback after the pre-buffer period expires.
|
||||||
@ -321,19 +321,19 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
|
|||||||
FScopeLock Lock(&AudioQueueLock);
|
FScopeLock Lock(&AudioQueueLock);
|
||||||
if (AudioQueue.Num() - AudioQueueReadOffset == 0)
|
if (AudioQueue.Num() - AudioQueueReadOffset == 0)
|
||||||
{
|
{
|
||||||
SilentTickCount++;
|
SilentTime += DeltaTime;
|
||||||
|
|
||||||
// Wait for agent_response (confirms the full response is done) before
|
// Wait for agent_response (confirms the full response is done) before
|
||||||
// declaring the agent stopped. This prevents premature OnAgentStoppedSpeaking
|
// declaring the agent stopped. This prevents premature OnAgentStoppedSpeaking
|
||||||
// events when ElevenLabs TTS streams audio in multiple batches with gaps
|
// events when ElevenLabs TTS streams audio in multiple batches with gaps
|
||||||
// (e.g. for long responses) — without this guard, the Blueprint's
|
// (e.g. for long responses) — without this guard, the Blueprint's
|
||||||
// OnAgentStoppedSpeaking handler reopens the mic mid-response.
|
// OnAgentStoppedSpeaking handler reopens the mic mid-response.
|
||||||
const bool bResponseConfirmed = bAgentResponseReceived && SilentTickCount >= SilenceThresholdTicks;
|
const bool bResponseConfirmed = bAgentResponseReceived && SilentTime >= SilenceThresholdSeconds;
|
||||||
|
|
||||||
// Hard-timeout fallback: if agent_response never arrives (or is very late),
|
// Hard-timeout fallback: if agent_response never arrives (or is very late),
|
||||||
// stop after 10s of silence to avoid leaving the state machine stuck.
|
// stop after 10s of silence to avoid leaving the state machine stuck.
|
||||||
// 10s was chosen to bridge observed inter-batch TTS gaps of up to ~5s.
|
// Time-based to behave consistently regardless of frame rate.
|
||||||
const bool bHardTimeout = SilentTickCount >= HardSilenceTimeoutTicks;
|
const bool bHardTimeout = SilentTime >= HardSilenceTimeoutSeconds;
|
||||||
|
|
||||||
if (bResponseConfirmed || bHardTimeout)
|
if (bResponseConfirmed || bHardTimeout)
|
||||||
{
|
{
|
||||||
@ -341,13 +341,13 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
|
|||||||
bAgentSpeaking = false;
|
bAgentSpeaking = false;
|
||||||
bPreBuffering = false; // Cancel pending pre-buffer to prevent stale OnAudioPlaybackStarted.
|
bPreBuffering = false; // Cancel pending pre-buffer to prevent stale OnAudioPlaybackStarted.
|
||||||
bAgentResponseReceived = false;
|
bAgentResponseReceived = false;
|
||||||
SilentTickCount = 0;
|
SilentTime = 0.0f;
|
||||||
bShouldBroadcastStopped = true;
|
bShouldBroadcastStopped = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
SilentTickCount = 0;
|
SilentTime = 0.0f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Broadcast OUTSIDE the lock — Blueprint handlers can execute for arbitrary time.
|
// Broadcast OUTSIDE the lock — Blueprint handlers can execute for arbitrary time.
|
||||||
@ -1096,7 +1096,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::HandleDisconnected(int32 StatusCode,
|
|||||||
bIsListening = false;
|
bIsListening = false;
|
||||||
// bAgentSpeaking / bAgentGenerating / bAgentResponseReceived already cleared by StopAgentAudio.
|
// bAgentSpeaking / bAgentGenerating / bAgentResponseReceived already cleared by StopAgentAudio.
|
||||||
bWaitingForAgentResponse = false;
|
bWaitingForAgentResponse = false;
|
||||||
GeneratingTickCount = 0;
|
GeneratingTime = 0.0f;
|
||||||
TurnIndex = 0;
|
TurnIndex = 0;
|
||||||
LastClosedTurnIndex = 0;
|
LastClosedTurnIndex = 0;
|
||||||
CurrentEmotion = EPS_AI_ConvAgent_Emotion::Neutral;
|
CurrentEmotion = EPS_AI_ConvAgent_Emotion::Neutral;
|
||||||
@ -1112,6 +1112,11 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::HandleDisconnected(int32 StatusCode,
|
|||||||
if (!bIntentionalDisconnect && StatusCode != 1000
|
if (!bIntentionalDisconnect && StatusCode != 1000
|
||||||
&& MaxReconnectAttempts > 0 && GetOwnerRole() == ROLE_Authority)
|
&& MaxReconnectAttempts > 0 && GetOwnerRole() == ROLE_Authority)
|
||||||
{
|
{
|
||||||
|
// Clean up stale turn state so the reconnected session starts fresh.
|
||||||
|
// StopAgentAudio() already ran above, but these fields are not reset by it:
|
||||||
|
bWaitingForAgentResponse = false;
|
||||||
|
GeneratingTime = 0.0f;
|
||||||
|
|
||||||
bWantsReconnect = true;
|
bWantsReconnect = true;
|
||||||
ReconnectAttemptCount = 0;
|
ReconnectAttemptCount = 0;
|
||||||
const double Delay = 1.0; // First attempt after 1 second.
|
const double Delay = 1.0; // First attempt after 1 second.
|
||||||
@ -1612,7 +1617,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
|
|||||||
bAgentGenerating = false; // Agent is now speaking — generation phase is over.
|
bAgentGenerating = false; // Agent is now speaking — generation phase is over.
|
||||||
bAgentResponseReceived = false; // Reset: wait for agent_response before allowing StopSpeaking.
|
bAgentResponseReceived = false; // Reset: wait for agent_response before allowing StopSpeaking.
|
||||||
bQueueWasDry = false;
|
bQueueWasDry = false;
|
||||||
SilentTickCount = 0;
|
SilentTime = 0.0f;
|
||||||
// Adaptive pre-buffer: record first chunk timing for inter-chunk gap measurement.
|
// Adaptive pre-buffer: record first chunk timing for inter-chunk gap measurement.
|
||||||
TurnFirstChunkTime = FPlatformTime::Seconds();
|
TurnFirstChunkTime = FPlatformTime::Seconds();
|
||||||
TurnFirstChunkBytes = PCMData.Num();
|
TurnFirstChunkBytes = PCMData.Num();
|
||||||
@ -1717,7 +1722,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
|
|||||||
}
|
}
|
||||||
OnAudioPlaybackStarted.Broadcast();
|
OnAudioPlaybackStarted.Broadcast();
|
||||||
}
|
}
|
||||||
SilentTickCount = 0;
|
SilentTime = 0.0f;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1745,7 +1750,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Reset silence counter — new audio arrived, we're not in a gap anymore
|
// Reset silence counter — new audio arrived, we're not in a gap anymore
|
||||||
SilentTickCount = 0;
|
SilentTime = 0.0f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1784,7 +1789,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::StopAgentAudio()
|
|||||||
if (bAgentSpeaking)
|
if (bAgentSpeaking)
|
||||||
{
|
{
|
||||||
bAgentSpeaking = false;
|
bAgentSpeaking = false;
|
||||||
SilentTickCount = 0;
|
SilentTime = 0.0f;
|
||||||
bWasSpeaking = true;
|
bWasSpeaking = true;
|
||||||
Now = FPlatformTime::Seconds();
|
Now = FPlatformTime::Seconds();
|
||||||
}
|
}
|
||||||
@ -2433,7 +2438,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::MulticastAgentStoppedSpeaking_Impleme
|
|||||||
{
|
{
|
||||||
if (GetOwnerRole() == ROLE_Authority) return;
|
if (GetOwnerRole() == ROLE_Authority) return;
|
||||||
bAgentSpeaking = false;
|
bAgentSpeaking = false;
|
||||||
SilentTickCount = 0;
|
SilentTime = 0.0f;
|
||||||
OnAgentStoppedSpeaking.Broadcast();
|
OnAgentStoppedSpeaking.Broadcast();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2711,8 +2716,8 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::DrawDebugHUD() const
|
|||||||
// Audio queue (read without lock for debug display — minor race is acceptable)
|
// Audio queue (read without lock for debug display — minor race is acceptable)
|
||||||
const int32 QueueBytes = FMath::Max(0, AudioQueue.Num() - AudioQueueReadOffset);
|
const int32 QueueBytes = FMath::Max(0, AudioQueue.Num() - AudioQueueReadOffset);
|
||||||
GEngine->AddOnScreenDebugMessage(BaseKey + 5, DisplayTime, MainColor,
|
GEngine->AddOnScreenDebugMessage(BaseKey + 5, DisplayTime, MainColor,
|
||||||
FString::Printf(TEXT(" AudioQueue: %d bytes SilentTicks: %d"),
|
FString::Printf(TEXT(" AudioQueue: %d bytes SilentTime: %.2fs"),
|
||||||
QueueBytes, SilentTickCount));
|
QueueBytes, SilentTime));
|
||||||
|
|
||||||
// Timing
|
// Timing
|
||||||
const double Now = FPlatformTime::Seconds();
|
const double Now = FPlatformTime::Seconds();
|
||||||
|
|||||||
@ -286,6 +286,15 @@ void UPS_AI_ConvAgent_FacialExpressionComponent::TickComponent(
|
|||||||
AgentComponent = Agent;
|
AgentComponent = Agent;
|
||||||
Agent->OnAgentEmotionChanged.AddDynamic(
|
Agent->OnAgentEmotionChanged.AddDynamic(
|
||||||
this, &UPS_AI_ConvAgent_FacialExpressionComponent::OnEmotionChanged);
|
this, &UPS_AI_ConvAgent_FacialExpressionComponent::OnEmotionChanged);
|
||||||
|
|
||||||
|
// Bind conversation lifecycle — same as BeginPlay path.
|
||||||
|
// Without these, bActive stays false forever when lazy-bound.
|
||||||
|
Agent->OnAgentConnected.AddDynamic(
|
||||||
|
this, &UPS_AI_ConvAgent_FacialExpressionComponent::OnConversationConnected);
|
||||||
|
Agent->OnAgentDisconnected.AddDynamic(
|
||||||
|
this, &UPS_AI_ConvAgent_FacialExpressionComponent::OnConversationDisconnected);
|
||||||
|
bActive = false;
|
||||||
|
CurrentActiveAlpha = 0.0f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -182,9 +182,9 @@ void UPS_AI_ConvAgent_InteractionComponent::TickComponent(float DeltaTime, ELeve
|
|||||||
CleanupRetainedGaze(Pending);
|
CleanupRetainedGaze(Pending);
|
||||||
PendingLeaveAgent.Reset();
|
PendingLeaveAgent.Reset();
|
||||||
}
|
}
|
||||||
else if (!Pending->IsAgentSpeaking())
|
else if (!Pending->IsAgentSpeakingOrPending())
|
||||||
{
|
{
|
||||||
// Agent finished speaking — leave conversation, retain gaze.
|
// Agent truly finished speaking (not just a TTS inter-batch gap) — leave conversation, retain gaze.
|
||||||
ExecuteLeave(Pending);
|
ExecuteLeave(Pending);
|
||||||
GazeRetainedAgent = Pending;
|
GazeRetainedAgent = Pending;
|
||||||
PendingLeaveAgent.Reset();
|
PendingLeaveAgent.Reset();
|
||||||
@ -539,23 +539,45 @@ void UPS_AI_ConvAgent_InteractionComponent::SetSelectedAgent(UPS_AI_ConvAgent_El
|
|||||||
// player until they walk out of interaction range.
|
// player until they walk out of interaction range.
|
||||||
if (bAutoStartConversation && (OldAgent->IsConnected() || OldAgent->bNetIsConversing))
|
if (bAutoStartConversation && (OldAgent->IsConnected() || OldAgent->bNetIsConversing))
|
||||||
{
|
{
|
||||||
// If a previous pending leave exists, force-complete it now.
|
// If a previous pending leave exists, handle it.
|
||||||
if (UPS_AI_ConvAgent_ElevenLabsComponent* PrevPending = PendingLeaveAgent.Get())
|
if (UPS_AI_ConvAgent_ElevenLabsComponent* PrevPending = PendingLeaveAgent.Get())
|
||||||
{
|
{
|
||||||
|
if (PrevPending == NewAgent)
|
||||||
|
{
|
||||||
|
// Player is switching back to the pending agent (A→B→A).
|
||||||
|
// Cancel the deferred leave — don't execute it, the player
|
||||||
|
// is coming back to this agent and the conversation is still alive.
|
||||||
|
PendingLeaveAgent.Reset();
|
||||||
|
|
||||||
|
if (bDebug)
|
||||||
|
{
|
||||||
|
UE_LOG(LogPS_AI_ConvAgent_Select, Log,
|
||||||
|
TEXT(" Cancelled pending leave (switching back to same agent): %s"),
|
||||||
|
PrevPending->GetOwner() ? *PrevPending->GetOwner()->GetName() : TEXT("(null)"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Different agent — force-complete the old pending leave.
|
||||||
ExecuteLeave(PrevPending);
|
ExecuteLeave(PrevPending);
|
||||||
CleanupRetainedGaze(PrevPending);
|
CleanupRetainedGaze(PrevPending);
|
||||||
PendingLeaveAgent.Reset();
|
PendingLeaveAgent.Reset();
|
||||||
}
|
}
|
||||||
// Similarly, clean up any existing retained gaze.
|
}
|
||||||
|
// Similarly, clean up any existing retained gaze (unless it's the agent
|
||||||
|
// we're about to re-select — keep gaze alive during the transition).
|
||||||
if (UPS_AI_ConvAgent_ElevenLabsComponent* PrevRetained = GazeRetainedAgent.Get())
|
if (UPS_AI_ConvAgent_ElevenLabsComponent* PrevRetained = GazeRetainedAgent.Get())
|
||||||
|
{
|
||||||
|
if (PrevRetained != NewAgent)
|
||||||
{
|
{
|
||||||
CleanupRetainedGaze(PrevRetained);
|
CleanupRetainedGaze(PrevRetained);
|
||||||
GazeRetainedAgent.Reset();
|
GazeRetainedAgent.Reset();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (OldAgent->IsAgentSpeaking())
|
if (OldAgent->IsAgentSpeakingOrPending())
|
||||||
{
|
{
|
||||||
// Agent is still speaking — defer the Leave.
|
// Agent is still speaking (or generating, waiting for more audio) — defer the Leave.
|
||||||
// Gaze and body tracking stay active so the agent keeps
|
// Gaze and body tracking stay active so the agent keeps
|
||||||
// looking at the player while finishing its sentence.
|
// looking at the player while finishing its sentence.
|
||||||
PendingLeaveAgent = OldAgent;
|
PendingLeaveAgent = OldAgent;
|
||||||
|
|||||||
@ -758,6 +758,16 @@ void UPS_AI_ConvAgent_LipSyncComponent::TickComponent(float DeltaTime, ELevelTic
|
|||||||
this, &UPS_AI_ConvAgent_LipSyncComponent::OnAgentInterrupted);
|
this, &UPS_AI_ConvAgent_LipSyncComponent::OnAgentInterrupted);
|
||||||
Agent->OnAgentStoppedSpeaking.AddDynamic(
|
Agent->OnAgentStoppedSpeaking.AddDynamic(
|
||||||
this, &UPS_AI_ConvAgent_LipSyncComponent::OnAgentStopped);
|
this, &UPS_AI_ConvAgent_LipSyncComponent::OnAgentStopped);
|
||||||
|
|
||||||
|
// Bind conversation lifecycle — same as BeginPlay path.
|
||||||
|
// Without these, bActive stays false forever when lazy-bound.
|
||||||
|
Agent->OnAgentConnected.AddDynamic(
|
||||||
|
this, &UPS_AI_ConvAgent_LipSyncComponent::OnConversationConnected);
|
||||||
|
Agent->OnAgentDisconnected.AddDynamic(
|
||||||
|
this, &UPS_AI_ConvAgent_LipSyncComponent::OnConversationDisconnected);
|
||||||
|
bActive = false;
|
||||||
|
CurrentActiveAlpha = 0.0f;
|
||||||
|
|
||||||
Agent->bEnableAgentPartialResponse = true;
|
Agent->bEnableAgentPartialResponse = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -127,6 +127,7 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::SendUserTurnStart()
|
|||||||
bWaitingForResponse = false;
|
bWaitingForResponse = false;
|
||||||
bFirstAudioResponseLogged = false;
|
bFirstAudioResponseLogged = false;
|
||||||
bAgentResponseStartedFired = false;
|
bAgentResponseStartedFired = false;
|
||||||
|
LastInterruptEventId = 0; // New user turn — stale interrupt filter no longer valid.
|
||||||
|
|
||||||
// No log here — turn start is implicit from audio chunks following.
|
// No log here — turn start is implicit from audio chunks following.
|
||||||
}
|
}
|
||||||
@ -563,6 +564,13 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::HandleAgentResponse(const TShar
|
|||||||
// subsequent agent_chat_response_part is guaranteed to belong to a new turn.
|
// subsequent agent_chat_response_part is guaranteed to belong to a new turn.
|
||||||
bAgentResponseStartedFired = false;
|
bAgentResponseStartedFired = false;
|
||||||
|
|
||||||
|
// Also reset the interrupt audio filter here. agent_response is the last message
|
||||||
|
// of the current turn — any audio arriving after this belongs to a new generation
|
||||||
|
// and must not be filtered by a stale interrupt event_id from this turn.
|
||||||
|
// This covers the edge case where audio for the next turn arrives before
|
||||||
|
// agent_chat_response_part (which also resets the filter).
|
||||||
|
LastInterruptEventId = 0;
|
||||||
|
|
||||||
// { "type": "agent_response",
|
// { "type": "agent_response",
|
||||||
// "agent_response_event": { "agent_response": "..." }
|
// "agent_response_event": { "agent_response": "..." }
|
||||||
// }
|
// }
|
||||||
|
|||||||
@ -601,6 +601,11 @@ public:
|
|||||||
UFUNCTION(BlueprintPure, Category = "PS AI ConvAgent|ElevenLabs")
|
UFUNCTION(BlueprintPure, Category = "PS AI ConvAgent|ElevenLabs")
|
||||||
bool IsAgentSpeaking() const { return bAgentSpeaking; }
|
bool IsAgentSpeaking() const { return bAgentSpeaking; }
|
||||||
|
|
||||||
|
/** True when the agent is speaking OR the server hasn't confirmed the full response yet.
|
||||||
|
* Use this instead of IsAgentSpeaking() when you need to know if the agent MIGHT still
|
||||||
|
* produce more audio (e.g. during TTS inter-batch gaps). */
|
||||||
|
bool IsAgentSpeakingOrPending() const { return bAgentSpeaking || (bAgentGenerating && !bAgentResponseReceived); }
|
||||||
|
|
||||||
UFUNCTION(BlueprintPure, Category = "PS AI ConvAgent|ElevenLabs")
|
UFUNCTION(BlueprintPure, Category = "PS AI ConvAgent|ElevenLabs")
|
||||||
const FPS_AI_ConvAgent_ConversationInfo_ElevenLabs& GetConversationInfo() const;
|
const FPS_AI_ConvAgent_ConversationInfo_ElevenLabs& GetConversationInfo() const;
|
||||||
|
|
||||||
@ -795,25 +800,27 @@ private:
|
|||||||
// Debug: track when the AudioQueue runs dry during speech (one-shot log).
|
// Debug: track when the AudioQueue runs dry during speech (one-shot log).
|
||||||
bool bQueueWasDry = false;
|
bool bQueueWasDry = false;
|
||||||
|
|
||||||
// Silence detection: how many consecutive ticks with an empty audio queue.
|
// Silence detection: accumulated seconds of empty audio queue.
|
||||||
int32 SilentTickCount = 0;
|
// Time-based (not tick-based) to behave consistently across frame rates.
|
||||||
|
float SilentTime = 0.0f;
|
||||||
|
|
||||||
// Generating timeout: how many consecutive ticks bAgentGenerating has been true
|
// Generating timeout: accumulated seconds with bAgentGenerating=true but no audio.
|
||||||
// without any audio arriving. If this reaches HardSilenceTimeoutTicks, bAgentGenerating
|
// If this exceeds GeneratingTimeoutSeconds, bAgentGenerating is force-cleared
|
||||||
// is force-cleared so StartListening() is no longer blocked. This covers the edge case
|
// so StartListening() is no longer blocked.
|
||||||
// where the server sends agent_chat_response_part but the TTS pipeline produces no audio.
|
float GeneratingTime = 0.0f;
|
||||||
int32 GeneratingTickCount = 0;
|
|
||||||
|
|
||||||
// Primary threshold: fire OnAgentStoppedSpeaking after this many silent ticks
|
// Primary threshold: fire OnAgentStoppedSpeaking after this many seconds of silence
|
||||||
// once the server has confirmed the full response (bAgentResponseReceived=true).
|
// once the server has confirmed the full response (bAgentResponseReceived=true).
|
||||||
// 30 ticks ≈ 0.5s at 60fps — enough to bridge brief inter-chunk gaps in the TTS stream.
|
// 0.5s is enough to bridge brief inter-chunk gaps in the TTS stream.
|
||||||
static constexpr int32 SilenceThresholdTicks = 30;
|
static constexpr float SilenceThresholdSeconds = 0.5f;
|
||||||
|
|
||||||
// Hard-timeout fallback: fire even without agent_response confirmation after 10s
|
// Hard-timeout fallback: fire even without agent_response confirmation after 10s
|
||||||
// of silence. This covers edge cases where agent_response is very late or missing,
|
// of silence. This covers edge cases where agent_response is very late or missing,
|
||||||
// while being long enough to bridge inter-batch TTS gaps (observed up to ~5s).
|
// while being long enough to bridge inter-batch TTS gaps (observed up to ~5s).
|
||||||
// Previously 2s — raised after logs showed premature firing during multi-batch responses.
|
static constexpr float HardSilenceTimeoutSeconds = 10.0f;
|
||||||
static constexpr int32 HardSilenceTimeoutTicks = 600; // 10s at 60fps
|
|
||||||
|
// Generating timeout: same as hard silence timeout.
|
||||||
|
static constexpr float GeneratingTimeoutSeconds = 10.0f;
|
||||||
|
|
||||||
// True once the server sends agent_response for the current turn.
|
// True once the server sends agent_response for the current turn.
|
||||||
// The server sends the full text when generation is complete — this is the
|
// The server sends the full text when generation is complete — this is the
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user