Improve latency debug HUD: separate toggle, per-turn reset, multi-line display
- Add bDebugLatency property + CVar (ps.ai.ConvAgent.Debug.Latency) independent from bDebug to save HUD space - Reset latencies to zero each turn (StopListening) instead of persisting - Add UserSpeechMs and PreBufferMs to the latency struct - Move latency captures outside bDebug guard (always measured) - Replace single-line latency in DrawDebugHUD with dedicated DrawLatencyHUD() showing 6 metrics on separate lines with color coding Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
d60f8d8484
commit
955c97e0dd
@ -26,6 +26,12 @@ static TAutoConsoleVariable<int32> CVarDebugElevenLabs(
|
|||||||
TEXT("Debug HUD for ElevenLabs. -1=use property, 0=off, 1-3=verbosity."),
|
TEXT("Debug HUD for ElevenLabs. -1=use property, 0=off, 1-3=verbosity."),
|
||||||
ECVF_Default);
|
ECVF_Default);
|
||||||
|
|
||||||
|
static TAutoConsoleVariable<int32> CVarDebugLatency(
|
||||||
|
TEXT("ps.ai.ConvAgent.Debug.Latency"),
|
||||||
|
-1,
|
||||||
|
TEXT("Latency debug HUD. -1=use property, 0=off, 1=on."),
|
||||||
|
ECVF_Default);
|
||||||
|
|
||||||
// ─────────────────────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
// Constructor
|
// Constructor
|
||||||
// ─────────────────────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
@ -160,9 +166,13 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
|
|||||||
AudioPlaybackComponent->Play();
|
AudioPlaybackComponent->Play();
|
||||||
}
|
}
|
||||||
PlaybackStartTime = FPlatformTime::Seconds();
|
PlaybackStartTime = FPlatformTime::Seconds();
|
||||||
if (bDebug && TurnEndTime > 0.0)
|
if (TurnEndTime > 0.0)
|
||||||
{
|
{
|
||||||
LastLatencies.EndToEarMs = static_cast<float>((PlaybackStartTime - TurnEndTime) * 1000.0);
|
CurrentLatencies.EndToEarMs = static_cast<float>((PlaybackStartTime - TurnEndTime) * 1000.0);
|
||||||
|
}
|
||||||
|
if (PreBufferStartTime > 0.0)
|
||||||
|
{
|
||||||
|
CurrentLatencies.PreBufferMs = static_cast<float>((PlaybackStartTime - PreBufferStartTime) * 1000.0);
|
||||||
}
|
}
|
||||||
OnAudioPlaybackStarted.Broadcast();
|
OnAudioPlaybackStarted.Broadcast();
|
||||||
}
|
}
|
||||||
@ -308,6 +318,14 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
|
|||||||
DrawDebugHUD();
|
DrawDebugHUD();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
const int32 CVarVal = CVarDebugLatency.GetValueOnGameThread();
|
||||||
|
const bool bShowLatency = (CVarVal >= 0) ? (CVarVal > 0) : bDebugLatency;
|
||||||
|
if (bShowLatency)
|
||||||
|
{
|
||||||
|
DrawLatencyHUD();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ─────────────────────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
@ -576,6 +594,14 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::StopListening()
|
|||||||
}
|
}
|
||||||
|
|
||||||
TurnEndTime = FPlatformTime::Seconds();
|
TurnEndTime = FPlatformTime::Seconds();
|
||||||
|
|
||||||
|
// Reset latency measurements for this new response cycle.
|
||||||
|
CurrentLatencies = FDebugLatencies();
|
||||||
|
if (TurnStartTime > 0.0)
|
||||||
|
{
|
||||||
|
CurrentLatencies.UserSpeechMs = static_cast<float>((TurnEndTime - TurnStartTime) * 1000.0);
|
||||||
|
}
|
||||||
|
|
||||||
// Start the response timeout clock — but only when the server hasn't already started
|
// Start the response timeout clock — but only when the server hasn't already started
|
||||||
// generating. When StopListening() is called from HandleAgentResponseStarted() as part
|
// generating. When StopListening() is called from HandleAgentResponseStarted() as part
|
||||||
// of collision avoidance, bAgentGenerating is already true, meaning the server IS already
|
// of collision avoidance, bAgentGenerating is already true, meaning the server IS already
|
||||||
@ -1058,9 +1084,9 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::HandleAgentResponseStarted()
|
|||||||
|
|
||||||
const double Now = FPlatformTime::Seconds();
|
const double Now = FPlatformTime::Seconds();
|
||||||
GenerationStartTime = Now;
|
GenerationStartTime = Now;
|
||||||
if (bDebug && TurnEndTime > 0.0)
|
if (TurnEndTime > 0.0)
|
||||||
{
|
{
|
||||||
LastLatencies.STTToGenMs = static_cast<float>((Now - TurnEndTime) * 1000.0);
|
CurrentLatencies.STTToGenMs = static_cast<float>((Now - TurnEndTime) * 1000.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
const double T = Now - SessionStartTime;
|
const double T = Now - SessionStartTime;
|
||||||
@ -1341,6 +1367,12 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
|
|||||||
bQueueWasDry = false;
|
bQueueWasDry = false;
|
||||||
SilentTickCount = 0;
|
SilentTickCount = 0;
|
||||||
|
|
||||||
|
// Latency capture (always, for HUD display).
|
||||||
|
if (TurnEndTime > 0.0)
|
||||||
|
CurrentLatencies.TotalMs = static_cast<float>((AgentSpeakStart - TurnEndTime) * 1000.0);
|
||||||
|
if (GenerationStartTime > 0.0)
|
||||||
|
CurrentLatencies.GenToAudioMs = static_cast<float>((AgentSpeakStart - GenerationStartTime) * 1000.0);
|
||||||
|
|
||||||
if (bDebug)
|
if (bDebug)
|
||||||
{
|
{
|
||||||
const double T = AgentSpeakStart - SessionStartTime;
|
const double T = AgentSpeakStart - SessionStartTime;
|
||||||
@ -1348,12 +1380,6 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
|
|||||||
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
|
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
|
||||||
TEXT("[T+%.2fs] [Turn %d] Agent speaking — first audio chunk. (%.2fs after turn end)"),
|
TEXT("[T+%.2fs] [Turn %d] Agent speaking — first audio chunk. (%.2fs after turn end)"),
|
||||||
T, LastClosedTurnIndex, LatencyFromTurnEnd);
|
T, LastClosedTurnIndex, LatencyFromTurnEnd);
|
||||||
|
|
||||||
// Update latency snapshot for HUD display.
|
|
||||||
if (TurnEndTime > 0.0)
|
|
||||||
LastLatencies.TotalMs = static_cast<float>((AgentSpeakStart - TurnEndTime) * 1000.0);
|
|
||||||
if (GenerationStartTime > 0.0)
|
|
||||||
LastLatencies.GenToAudioMs = static_cast<float>((AgentSpeakStart - GenerationStartTime) * 1000.0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
OnAgentStartedSpeaking.Broadcast();
|
OnAgentStartedSpeaking.Broadcast();
|
||||||
@ -1386,10 +1412,11 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
|
|||||||
AudioPlaybackComponent->Play();
|
AudioPlaybackComponent->Play();
|
||||||
}
|
}
|
||||||
PlaybackStartTime = FPlatformTime::Seconds();
|
PlaybackStartTime = FPlatformTime::Seconds();
|
||||||
if (bDebug && TurnEndTime > 0.0)
|
if (TurnEndTime > 0.0)
|
||||||
{
|
{
|
||||||
LastLatencies.EndToEarMs = static_cast<float>((PlaybackStartTime - TurnEndTime) * 1000.0);
|
CurrentLatencies.EndToEarMs = static_cast<float>((PlaybackStartTime - TurnEndTime) * 1000.0);
|
||||||
}
|
}
|
||||||
|
// No pre-buffer in this path (immediate playback).
|
||||||
OnAudioPlaybackStarted.Broadcast();
|
OnAudioPlaybackStarted.Broadcast();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1417,9 +1444,13 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
|
|||||||
AudioPlaybackComponent->Play();
|
AudioPlaybackComponent->Play();
|
||||||
}
|
}
|
||||||
PlaybackStartTime = FPlatformTime::Seconds();
|
PlaybackStartTime = FPlatformTime::Seconds();
|
||||||
if (bDebug && TurnEndTime > 0.0)
|
if (TurnEndTime > 0.0)
|
||||||
{
|
{
|
||||||
LastLatencies.EndToEarMs = static_cast<float>((PlaybackStartTime - TurnEndTime) * 1000.0);
|
CurrentLatencies.EndToEarMs = static_cast<float>((PlaybackStartTime - TurnEndTime) * 1000.0);
|
||||||
|
}
|
||||||
|
if (PreBufferStartTime > 0.0)
|
||||||
|
{
|
||||||
|
CurrentLatencies.PreBufferMs = static_cast<float>((PlaybackStartTime - PreBufferStartTime) * 1000.0);
|
||||||
}
|
}
|
||||||
OnAudioPlaybackStarted.Broadcast();
|
OnAudioPlaybackStarted.Broadcast();
|
||||||
}
|
}
|
||||||
@ -2362,19 +2393,57 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::DrawDebugHUD() const
|
|||||||
NetConnectedPawns.Num(), *SpeakerName));
|
NetConnectedPawns.Num(), *SpeakerName));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Latencies (from last completed turn)
|
|
||||||
if (LastLatencies.TotalMs > 0.0f)
|
|
||||||
{
|
|
||||||
GEngine->AddOnScreenDebugMessage(BaseKey + 8, DisplayTime, MainColor,
|
|
||||||
FString::Printf(TEXT(" Latency: total=%.0fms (stt>gen=%.0fms gen>audio=%.0fms) ear=%.0fms"),
|
|
||||||
LastLatencies.TotalMs, LastLatencies.STTToGenMs,
|
|
||||||
LastLatencies.GenToAudioMs, LastLatencies.EndToEarMs));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reconnection
|
// Reconnection
|
||||||
GEngine->AddOnScreenDebugMessage(BaseKey + 9, DisplayTime,
|
GEngine->AddOnScreenDebugMessage(BaseKey + 8, DisplayTime,
|
||||||
bWantsReconnect ? FColor::Red : MainColor,
|
bWantsReconnect ? FColor::Red : MainColor,
|
||||||
FString::Printf(TEXT(" Reconnect: %d/%d attempts%s"),
|
FString::Printf(TEXT(" Reconnect: %d/%d attempts%s"),
|
||||||
ReconnectAttemptCount, MaxReconnectAttempts,
|
ReconnectAttemptCount, MaxReconnectAttempts,
|
||||||
bWantsReconnect ? TEXT(" (ACTIVE)") : TEXT("")));
|
bWantsReconnect ? TEXT(" (ACTIVE)") : TEXT("")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void UPS_AI_ConvAgent_ElevenLabsComponent::DrawLatencyHUD() const
|
||||||
|
{
|
||||||
|
if (!GEngine) return;
|
||||||
|
|
||||||
|
// Separate BaseKey range so it never collides with DrawDebugHUD
|
||||||
|
const int32 BaseKey = 93700;
|
||||||
|
const float DisplayTime = 0.0f; // refreshed every tick
|
||||||
|
|
||||||
|
const FColor TitleColor = FColor::Cyan;
|
||||||
|
const FColor ValueColor = FColor::White;
|
||||||
|
const FColor HighlightColor = FColor::Yellow;
|
||||||
|
|
||||||
|
// Helper: format a single metric — shows "---" when not yet captured this turn
|
||||||
|
auto Fmt = [](float Ms) -> FString
|
||||||
|
{
|
||||||
|
return (Ms > 0.0f) ? FString::Printf(TEXT("%.0f ms"), Ms) : FString(TEXT("---"));
|
||||||
|
};
|
||||||
|
|
||||||
|
// Title
|
||||||
|
GEngine->AddOnScreenDebugMessage(BaseKey, DisplayTime, TitleColor,
|
||||||
|
TEXT("=== Latency HUD ==="));
|
||||||
|
|
||||||
|
// 1. User speech duration
|
||||||
|
GEngine->AddOnScreenDebugMessage(BaseKey + 1, DisplayTime, ValueColor,
|
||||||
|
FString::Printf(TEXT(" User speech: %s"), *Fmt(CurrentLatencies.UserSpeechMs)));
|
||||||
|
|
||||||
|
// 2. STT + Network: end of speech → server starts generating
|
||||||
|
GEngine->AddOnScreenDebugMessage(BaseKey + 2, DisplayTime, ValueColor,
|
||||||
|
FString::Printf(TEXT(" STT + Network: %s"), *Fmt(CurrentLatencies.STTToGenMs)));
|
||||||
|
|
||||||
|
// 3. LLM + TTS: server generating → first audio chunk received
|
||||||
|
GEngine->AddOnScreenDebugMessage(BaseKey + 3, DisplayTime, ValueColor,
|
||||||
|
FString::Printf(TEXT(" LLM + TTS: %s"), *Fmt(CurrentLatencies.GenToAudioMs)));
|
||||||
|
|
||||||
|
// 4. Total round-trip: end of speech → first audio chunk
|
||||||
|
GEngine->AddOnScreenDebugMessage(BaseKey + 4, DisplayTime, HighlightColor,
|
||||||
|
FString::Printf(TEXT(" Total: %s"), *Fmt(CurrentLatencies.TotalMs)));
|
||||||
|
|
||||||
|
// 5. Pre-buffer wait before playback
|
||||||
|
GEngine->AddOnScreenDebugMessage(BaseKey + 5, DisplayTime, ValueColor,
|
||||||
|
FString::Printf(TEXT(" Pre-buffer: %s"), *Fmt(CurrentLatencies.PreBufferMs)));
|
||||||
|
|
||||||
|
// 6. End-to-ear: end of speech → audio playback starts (user-perceived latency)
|
||||||
|
GEngine->AddOnScreenDebugMessage(BaseKey + 6, DisplayTime, HighlightColor,
|
||||||
|
FString::Printf(TEXT(" End-to-Ear: %s"), *Fmt(CurrentLatencies.EndToEarMs)));
|
||||||
|
}
|
||||||
|
|||||||
@ -231,6 +231,11 @@ public:
|
|||||||
meta = (ClampMin = "0", ClampMax = "3", EditCondition = "bDebug"))
|
meta = (ClampMin = "0", ClampMax = "3", EditCondition = "bDebug"))
|
||||||
int32 DebugVerbosity = 1;
|
int32 DebugVerbosity = 1;
|
||||||
|
|
||||||
|
/** Show a separate latency debug HUD with detailed per-turn timing breakdown.
|
||||||
|
* Independent from bDebug — can be enabled alone via CVar ps.ai.ConvAgent.Debug.Latency. */
|
||||||
|
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|Debug")
|
||||||
|
bool bDebugLatency = false;
|
||||||
|
|
||||||
// ── Events ────────────────────────────────────────────────────────────────
|
// ── Events ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
/** Fired when the WebSocket connection is established and the conversation session is ready. Provides the ConversationID and AgentID. */
|
/** Fired when the WebSocket connection is established and the conversation session is ready. Provides the ConversationID and AgentID. */
|
||||||
@ -635,16 +640,19 @@ private:
|
|||||||
double GenerationStartTime = 0.0; // Set in HandleAgentResponseStarted — server starts generating.
|
double GenerationStartTime = 0.0; // Set in HandleAgentResponseStarted — server starts generating.
|
||||||
double PlaybackStartTime = 0.0; // Set when audio playback actually starts (post pre-buffer).
|
double PlaybackStartTime = 0.0; // Set when audio playback actually starts (post pre-buffer).
|
||||||
|
|
||||||
// Last-turn latency snapshot (ms) — updated per turn, displayed on debug HUD.
|
// Current-turn latency measurements (ms). Reset when the user starts a new turn
|
||||||
// Persists between turns so the HUD always shows the most recent measurement.
|
// (StopListening). Each field is populated as the corresponding event fires.
|
||||||
|
// Zero means "not yet measured this turn".
|
||||||
struct FDebugLatencies
|
struct FDebugLatencies
|
||||||
{
|
{
|
||||||
float STTToGenMs = 0.0f; // TurnEnd → server starts generating
|
float UserSpeechMs = 0.0f; // How long the user spoke (TurnStart → TurnEnd)
|
||||||
float GenToAudioMs = 0.0f; // Server generating → first audio chunk
|
float STTToGenMs = 0.0f; // TurnEnd → server starts generating (network + STT)
|
||||||
float TotalMs = 0.0f; // TurnEnd → first audio chunk
|
float GenToAudioMs = 0.0f; // Server generating → first audio chunk (LLM + TTS)
|
||||||
float EndToEarMs = 0.0f; // TurnEnd → audio playback starts (user-perceived)
|
float TotalMs = 0.0f; // TurnEnd → first audio chunk (full round-trip)
|
||||||
|
float PreBufferMs = 0.0f; // Pre-buffer wait before playback
|
||||||
|
float EndToEarMs = 0.0f; // TurnEnd → audio playback starts (user-perceived)
|
||||||
};
|
};
|
||||||
FDebugLatencies LastLatencies;
|
FDebugLatencies CurrentLatencies;
|
||||||
|
|
||||||
// Accumulates incoming PCM bytes until the audio component needs data.
|
// Accumulates incoming PCM bytes until the audio component needs data.
|
||||||
// Uses a read offset instead of RemoveAt(0,N) to avoid O(n) memmove every
|
// Uses a read offset instead of RemoveAt(0,N) to avoid O(n) memmove every
|
||||||
@ -747,4 +755,5 @@ private:
|
|||||||
|
|
||||||
/** Draw on-screen debug info (called from TickComponent when bDebug). */
|
/** Draw on-screen debug info (called from TickComponent when bDebug). */
|
||||||
void DrawDebugHUD() const;
|
void DrawDebugHUD() const;
|
||||||
|
void DrawLatencyHUD() const;
|
||||||
};
|
};
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user