Add response latency metrics to ElevenLabs debug HUD
Track 4 latencies per conversation turn (computed only when bDebug is active): - STT→Gen: user stops talking → server starts generating - Gen→Audio: server generating → first audio chunk received - Total: user stops talking → first audio chunk (end-to-end) - End-to-Ear: user stops talking → audio playback starts (includes pre-buffer) New timestamps: GenerationStartTime (HandleAgentResponseStarted), PlaybackStartTime (3 OnAudioPlaybackStarted sites). Values persist on HUD between turns, reset when new turn starts. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
6d4ef21269
commit
d60f8d8484
@ -159,6 +159,11 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
|
||||
{
|
||||
AudioPlaybackComponent->Play();
|
||||
}
|
||||
PlaybackStartTime = FPlatformTime::Seconds();
|
||||
if (bDebug && TurnEndTime > 0.0)
|
||||
{
|
||||
LastLatencies.EndToEarMs = static_cast<float>((PlaybackStartTime - TurnEndTime) * 1000.0);
|
||||
}
|
||||
OnAudioPlaybackStarted.Broadcast();
|
||||
}
|
||||
}
|
||||
@ -449,6 +454,8 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::StartListening()
|
||||
}
|
||||
}
|
||||
bWaitingForAgentResponse = false; // New user turn — cancel any pending response timeout.
|
||||
GenerationStartTime = 0.0;
|
||||
PlaybackStartTime = 0.0;
|
||||
++TurnIndex;
|
||||
bIsListening = true;
|
||||
TurnStartTime = FPlatformTime::Seconds();
|
||||
@ -1050,6 +1057,12 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::HandleAgentResponseStarted()
|
||||
}
|
||||
|
||||
const double Now = FPlatformTime::Seconds();
|
||||
GenerationStartTime = Now;
|
||||
if (bDebug && TurnEndTime > 0.0)
|
||||
{
|
||||
LastLatencies.STTToGenMs = static_cast<float>((Now - TurnEndTime) * 1000.0);
|
||||
}
|
||||
|
||||
const double T = Now - SessionStartTime;
|
||||
const double LatencyFromTurnEnd = TurnEndTime > 0.0 ? Now - TurnEndTime : 0.0;
|
||||
if (bIsListening)
|
||||
@ -1335,6 +1348,12 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
|
||||
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
|
||||
TEXT("[T+%.2fs] [Turn %d] Agent speaking — first audio chunk. (%.2fs after turn end)"),
|
||||
T, LastClosedTurnIndex, LatencyFromTurnEnd);
|
||||
|
||||
// Update latency snapshot for HUD display.
|
||||
if (TurnEndTime > 0.0)
|
||||
LastLatencies.TotalMs = static_cast<float>((AgentSpeakStart - TurnEndTime) * 1000.0);
|
||||
if (GenerationStartTime > 0.0)
|
||||
LastLatencies.GenToAudioMs = static_cast<float>((AgentSpeakStart - GenerationStartTime) * 1000.0);
|
||||
}
|
||||
|
||||
OnAgentStartedSpeaking.Broadcast();
|
||||
@ -1366,6 +1385,11 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
|
||||
{
|
||||
AudioPlaybackComponent->Play();
|
||||
}
|
||||
PlaybackStartTime = FPlatformTime::Seconds();
|
||||
if (bDebug && TurnEndTime > 0.0)
|
||||
{
|
||||
LastLatencies.EndToEarMs = static_cast<float>((PlaybackStartTime - TurnEndTime) * 1000.0);
|
||||
}
|
||||
OnAudioPlaybackStarted.Broadcast();
|
||||
}
|
||||
}
|
||||
@ -1392,6 +1416,11 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
|
||||
{
|
||||
AudioPlaybackComponent->Play();
|
||||
}
|
||||
PlaybackStartTime = FPlatformTime::Seconds();
|
||||
if (bDebug && TurnEndTime > 0.0)
|
||||
{
|
||||
LastLatencies.EndToEarMs = static_cast<float>((PlaybackStartTime - TurnEndTime) * 1000.0);
|
||||
}
|
||||
OnAudioPlaybackStarted.Broadcast();
|
||||
}
|
||||
SilentTickCount = 0;
|
||||
@ -2333,8 +2362,17 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::DrawDebugHUD() const
|
||||
NetConnectedPawns.Num(), *SpeakerName));
|
||||
}
|
||||
|
||||
// Latencies (from last completed turn)
|
||||
if (LastLatencies.TotalMs > 0.0f)
|
||||
{
|
||||
GEngine->AddOnScreenDebugMessage(BaseKey + 8, DisplayTime, MainColor,
|
||||
FString::Printf(TEXT(" Latency: total=%.0fms (stt>gen=%.0fms gen>audio=%.0fms) ear=%.0fms"),
|
||||
LastLatencies.TotalMs, LastLatencies.STTToGenMs,
|
||||
LastLatencies.GenToAudioMs, LastLatencies.EndToEarMs));
|
||||
}
|
||||
|
||||
// Reconnection
|
||||
GEngine->AddOnScreenDebugMessage(BaseKey + 8, DisplayTime,
|
||||
GEngine->AddOnScreenDebugMessage(BaseKey + 9, DisplayTime,
|
||||
bWantsReconnect ? FColor::Red : MainColor,
|
||||
FString::Printf(TEXT(" Reconnect: %d/%d attempts%s"),
|
||||
ReconnectAttemptCount, MaxReconnectAttempts,
|
||||
|
||||
@ -632,6 +632,19 @@ private:
|
||||
double TurnStartTime = 0.0; // Set in StartListening — when mic opens.
|
||||
double TurnEndTime = 0.0; // Set in StopListening — when mic closes.
|
||||
double AgentSpeakStart = 0.0; // Set in EnqueueAgentAudio (first chunk) — when audio begins.
|
||||
double GenerationStartTime = 0.0; // Set in HandleAgentResponseStarted — server starts generating.
|
||||
double PlaybackStartTime = 0.0; // Set when audio playback actually starts (post pre-buffer).
|
||||
|
||||
// Last-turn latency snapshot (ms) — updated per turn, displayed on debug HUD.
|
||||
// Persists between turns so the HUD always shows the most recent measurement.
|
||||
struct FDebugLatencies
|
||||
{
|
||||
float STTToGenMs = 0.0f; // TurnEnd → server starts generating
|
||||
float GenToAudioMs = 0.0f; // Server generating → first audio chunk
|
||||
float TotalMs = 0.0f; // TurnEnd → first audio chunk
|
||||
float EndToEarMs = 0.0f; // TurnEnd → audio playback starts (user-perceived)
|
||||
};
|
||||
FDebugLatencies LastLatencies;
|
||||
|
||||
// Accumulates incoming PCM bytes until the audio component needs data.
|
||||
// Uses a read offset instead of RemoveAt(0,N) to avoid O(n) memmove every
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user