Improve latency debug HUD: separate toggle, per-turn reset, multi-line display
- Add bDebugLatency property + CVar (ps.ai.ConvAgent.Debug.Latency) independent from bDebug to save HUD space - Reset latencies to zero each turn (StopListening) instead of persisting - Add UserSpeechMs and PreBufferMs to the latency struct - Move latency captures outside bDebug guard (always measured) - Replace single-line latency in DrawDebugHUD with dedicated DrawLatencyHUD() showing 6 metrics on separate lines with color coding Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
d60f8d8484
commit
955c97e0dd
@ -26,6 +26,12 @@ static TAutoConsoleVariable<int32> CVarDebugElevenLabs(
|
||||
TEXT("Debug HUD for ElevenLabs. -1=use property, 0=off, 1-3=verbosity."),
|
||||
ECVF_Default);
|
||||
|
||||
static TAutoConsoleVariable<int32> CVarDebugLatency(
|
||||
TEXT("ps.ai.ConvAgent.Debug.Latency"),
|
||||
-1,
|
||||
TEXT("Latency debug HUD. -1=use property, 0=off, 1=on."),
|
||||
ECVF_Default);
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Constructor
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
@ -160,9 +166,13 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
|
||||
AudioPlaybackComponent->Play();
|
||||
}
|
||||
PlaybackStartTime = FPlatformTime::Seconds();
|
||||
if (bDebug && TurnEndTime > 0.0)
|
||||
if (TurnEndTime > 0.0)
|
||||
{
|
||||
LastLatencies.EndToEarMs = static_cast<float>((PlaybackStartTime - TurnEndTime) * 1000.0);
|
||||
CurrentLatencies.EndToEarMs = static_cast<float>((PlaybackStartTime - TurnEndTime) * 1000.0);
|
||||
}
|
||||
if (PreBufferStartTime > 0.0)
|
||||
{
|
||||
CurrentLatencies.PreBufferMs = static_cast<float>((PlaybackStartTime - PreBufferStartTime) * 1000.0);
|
||||
}
|
||||
OnAudioPlaybackStarted.Broadcast();
|
||||
}
|
||||
@ -308,6 +318,14 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
|
||||
DrawDebugHUD();
|
||||
}
|
||||
}
|
||||
{
|
||||
const int32 CVarVal = CVarDebugLatency.GetValueOnGameThread();
|
||||
const bool bShowLatency = (CVarVal >= 0) ? (CVarVal > 0) : bDebugLatency;
|
||||
if (bShowLatency)
|
||||
{
|
||||
DrawLatencyHUD();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
@ -576,6 +594,14 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::StopListening()
|
||||
}
|
||||
|
||||
TurnEndTime = FPlatformTime::Seconds();
|
||||
|
||||
// Reset latency measurements for this new response cycle.
|
||||
CurrentLatencies = FDebugLatencies();
|
||||
if (TurnStartTime > 0.0)
|
||||
{
|
||||
CurrentLatencies.UserSpeechMs = static_cast<float>((TurnEndTime - TurnStartTime) * 1000.0);
|
||||
}
|
||||
|
||||
// Start the response timeout clock — but only when the server hasn't already started
|
||||
// generating. When StopListening() is called from HandleAgentResponseStarted() as part
|
||||
// of collision avoidance, bAgentGenerating is already true, meaning the server IS already
|
||||
@ -1058,9 +1084,9 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::HandleAgentResponseStarted()
|
||||
|
||||
const double Now = FPlatformTime::Seconds();
|
||||
GenerationStartTime = Now;
|
||||
if (bDebug && TurnEndTime > 0.0)
|
||||
if (TurnEndTime > 0.0)
|
||||
{
|
||||
LastLatencies.STTToGenMs = static_cast<float>((Now - TurnEndTime) * 1000.0);
|
||||
CurrentLatencies.STTToGenMs = static_cast<float>((Now - TurnEndTime) * 1000.0);
|
||||
}
|
||||
|
||||
const double T = Now - SessionStartTime;
|
||||
@ -1341,6 +1367,12 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
|
||||
bQueueWasDry = false;
|
||||
SilentTickCount = 0;
|
||||
|
||||
// Latency capture (always, for HUD display).
|
||||
if (TurnEndTime > 0.0)
|
||||
CurrentLatencies.TotalMs = static_cast<float>((AgentSpeakStart - TurnEndTime) * 1000.0);
|
||||
if (GenerationStartTime > 0.0)
|
||||
CurrentLatencies.GenToAudioMs = static_cast<float>((AgentSpeakStart - GenerationStartTime) * 1000.0);
|
||||
|
||||
if (bDebug)
|
||||
{
|
||||
const double T = AgentSpeakStart - SessionStartTime;
|
||||
@ -1348,12 +1380,6 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
|
||||
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
|
||||
TEXT("[T+%.2fs] [Turn %d] Agent speaking — first audio chunk. (%.2fs after turn end)"),
|
||||
T, LastClosedTurnIndex, LatencyFromTurnEnd);
|
||||
|
||||
// Update latency snapshot for HUD display.
|
||||
if (TurnEndTime > 0.0)
|
||||
LastLatencies.TotalMs = static_cast<float>((AgentSpeakStart - TurnEndTime) * 1000.0);
|
||||
if (GenerationStartTime > 0.0)
|
||||
LastLatencies.GenToAudioMs = static_cast<float>((AgentSpeakStart - GenerationStartTime) * 1000.0);
|
||||
}
|
||||
|
||||
OnAgentStartedSpeaking.Broadcast();
|
||||
@ -1386,10 +1412,11 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
|
||||
AudioPlaybackComponent->Play();
|
||||
}
|
||||
PlaybackStartTime = FPlatformTime::Seconds();
|
||||
if (bDebug && TurnEndTime > 0.0)
|
||||
if (TurnEndTime > 0.0)
|
||||
{
|
||||
LastLatencies.EndToEarMs = static_cast<float>((PlaybackStartTime - TurnEndTime) * 1000.0);
|
||||
CurrentLatencies.EndToEarMs = static_cast<float>((PlaybackStartTime - TurnEndTime) * 1000.0);
|
||||
}
|
||||
// No pre-buffer in this path (immediate playback).
|
||||
OnAudioPlaybackStarted.Broadcast();
|
||||
}
|
||||
}
|
||||
@ -1417,9 +1444,13 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
|
||||
AudioPlaybackComponent->Play();
|
||||
}
|
||||
PlaybackStartTime = FPlatformTime::Seconds();
|
||||
if (bDebug && TurnEndTime > 0.0)
|
||||
if (TurnEndTime > 0.0)
|
||||
{
|
||||
LastLatencies.EndToEarMs = static_cast<float>((PlaybackStartTime - TurnEndTime) * 1000.0);
|
||||
CurrentLatencies.EndToEarMs = static_cast<float>((PlaybackStartTime - TurnEndTime) * 1000.0);
|
||||
}
|
||||
if (PreBufferStartTime > 0.0)
|
||||
{
|
||||
CurrentLatencies.PreBufferMs = static_cast<float>((PlaybackStartTime - PreBufferStartTime) * 1000.0);
|
||||
}
|
||||
OnAudioPlaybackStarted.Broadcast();
|
||||
}
|
||||
@ -2362,19 +2393,57 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::DrawDebugHUD() const
|
||||
NetConnectedPawns.Num(), *SpeakerName));
|
||||
}
|
||||
|
||||
// Latencies (from last completed turn)
|
||||
if (LastLatencies.TotalMs > 0.0f)
|
||||
{
|
||||
GEngine->AddOnScreenDebugMessage(BaseKey + 8, DisplayTime, MainColor,
|
||||
FString::Printf(TEXT(" Latency: total=%.0fms (stt>gen=%.0fms gen>audio=%.0fms) ear=%.0fms"),
|
||||
LastLatencies.TotalMs, LastLatencies.STTToGenMs,
|
||||
LastLatencies.GenToAudioMs, LastLatencies.EndToEarMs));
|
||||
}
|
||||
|
||||
// Reconnection
|
||||
GEngine->AddOnScreenDebugMessage(BaseKey + 9, DisplayTime,
|
||||
GEngine->AddOnScreenDebugMessage(BaseKey + 8, DisplayTime,
|
||||
bWantsReconnect ? FColor::Red : MainColor,
|
||||
FString::Printf(TEXT(" Reconnect: %d/%d attempts%s"),
|
||||
ReconnectAttemptCount, MaxReconnectAttempts,
|
||||
bWantsReconnect ? TEXT(" (ACTIVE)") : TEXT("")));
|
||||
}
|
||||
|
||||
void UPS_AI_ConvAgent_ElevenLabsComponent::DrawLatencyHUD() const
|
||||
{
|
||||
if (!GEngine) return;
|
||||
|
||||
// Separate BaseKey range so it never collides with DrawDebugHUD
|
||||
const int32 BaseKey = 93700;
|
||||
const float DisplayTime = 0.0f; // refreshed every tick
|
||||
|
||||
const FColor TitleColor = FColor::Cyan;
|
||||
const FColor ValueColor = FColor::White;
|
||||
const FColor HighlightColor = FColor::Yellow;
|
||||
|
||||
// Helper: format a single metric — shows "---" when not yet captured this turn
|
||||
auto Fmt = [](float Ms) -> FString
|
||||
{
|
||||
return (Ms > 0.0f) ? FString::Printf(TEXT("%.0f ms"), Ms) : FString(TEXT("---"));
|
||||
};
|
||||
|
||||
// Title
|
||||
GEngine->AddOnScreenDebugMessage(BaseKey, DisplayTime, TitleColor,
|
||||
TEXT("=== Latency HUD ==="));
|
||||
|
||||
// 1. User speech duration
|
||||
GEngine->AddOnScreenDebugMessage(BaseKey + 1, DisplayTime, ValueColor,
|
||||
FString::Printf(TEXT(" User speech: %s"), *Fmt(CurrentLatencies.UserSpeechMs)));
|
||||
|
||||
// 2. STT + Network: end of speech → server starts generating
|
||||
GEngine->AddOnScreenDebugMessage(BaseKey + 2, DisplayTime, ValueColor,
|
||||
FString::Printf(TEXT(" STT + Network: %s"), *Fmt(CurrentLatencies.STTToGenMs)));
|
||||
|
||||
// 3. LLM + TTS: server generating → first audio chunk received
|
||||
GEngine->AddOnScreenDebugMessage(BaseKey + 3, DisplayTime, ValueColor,
|
||||
FString::Printf(TEXT(" LLM + TTS: %s"), *Fmt(CurrentLatencies.GenToAudioMs)));
|
||||
|
||||
// 4. Total round-trip: end of speech → first audio chunk
|
||||
GEngine->AddOnScreenDebugMessage(BaseKey + 4, DisplayTime, HighlightColor,
|
||||
FString::Printf(TEXT(" Total: %s"), *Fmt(CurrentLatencies.TotalMs)));
|
||||
|
||||
// 5. Pre-buffer wait before playback
|
||||
GEngine->AddOnScreenDebugMessage(BaseKey + 5, DisplayTime, ValueColor,
|
||||
FString::Printf(TEXT(" Pre-buffer: %s"), *Fmt(CurrentLatencies.PreBufferMs)));
|
||||
|
||||
// 6. End-to-ear: end of speech → audio playback starts (user-perceived latency)
|
||||
GEngine->AddOnScreenDebugMessage(BaseKey + 6, DisplayTime, HighlightColor,
|
||||
FString::Printf(TEXT(" End-to-Ear: %s"), *Fmt(CurrentLatencies.EndToEarMs)));
|
||||
}
|
||||
|
||||
@ -231,6 +231,11 @@ public:
|
||||
meta = (ClampMin = "0", ClampMax = "3", EditCondition = "bDebug"))
|
||||
int32 DebugVerbosity = 1;
|
||||
|
||||
/** Show a separate latency debug HUD with detailed per-turn timing breakdown.
|
||||
* Independent from bDebug — can be enabled alone via CVar ps.ai.ConvAgent.Debug.Latency. */
|
||||
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|Debug")
|
||||
bool bDebugLatency = false;
|
||||
|
||||
// ── Events ────────────────────────────────────────────────────────────────
|
||||
|
||||
/** Fired when the WebSocket connection is established and the conversation session is ready. Provides the ConversationID and AgentID. */
|
||||
@ -635,16 +640,19 @@ private:
|
||||
double GenerationStartTime = 0.0; // Set in HandleAgentResponseStarted — server starts generating.
|
||||
double PlaybackStartTime = 0.0; // Set when audio playback actually starts (post pre-buffer).
|
||||
|
||||
// Last-turn latency snapshot (ms) — updated per turn, displayed on debug HUD.
|
||||
// Persists between turns so the HUD always shows the most recent measurement.
|
||||
// Current-turn latency measurements (ms). Reset when the user starts a new turn
|
||||
// (StopListening). Each field is populated as the corresponding event fires.
|
||||
// Zero means "not yet measured this turn".
|
||||
struct FDebugLatencies
|
||||
{
|
||||
float STTToGenMs = 0.0f; // TurnEnd → server starts generating
|
||||
float GenToAudioMs = 0.0f; // Server generating → first audio chunk
|
||||
float TotalMs = 0.0f; // TurnEnd → first audio chunk
|
||||
float EndToEarMs = 0.0f; // TurnEnd → audio playback starts (user-perceived)
|
||||
float UserSpeechMs = 0.0f; // How long the user spoke (TurnStart → TurnEnd)
|
||||
float STTToGenMs = 0.0f; // TurnEnd → server starts generating (network + STT)
|
||||
float GenToAudioMs = 0.0f; // Server generating → first audio chunk (LLM + TTS)
|
||||
float TotalMs = 0.0f; // TurnEnd → first audio chunk (full round-trip)
|
||||
float PreBufferMs = 0.0f; // Pre-buffer wait before playback
|
||||
float EndToEarMs = 0.0f; // TurnEnd → audio playback starts (user-perceived)
|
||||
};
|
||||
FDebugLatencies LastLatencies;
|
||||
FDebugLatencies CurrentLatencies;
|
||||
|
||||
// Accumulates incoming PCM bytes until the audio component needs data.
|
||||
// Uses a read offset instead of RemoveAt(0,N) to avoid O(n) memmove every
|
||||
@ -747,4 +755,5 @@ private:
|
||||
|
||||
/** Draw on-screen debug info (called from TickComponent when bDebug). */
|
||||
void DrawDebugHUD() const;
|
||||
void DrawLatencyHUD() const;
|
||||
};
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user