diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp index b8d9694..895bf07 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp @@ -159,6 +159,11 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel { AudioPlaybackComponent->Play(); } + PlaybackStartTime = FPlatformTime::Seconds(); + if (bDebug && TurnEndTime > 0.0) + { + LastLatencies.EndToEarMs = static_cast((PlaybackStartTime - TurnEndTime) * 1000.0); + } OnAudioPlaybackStarted.Broadcast(); } } @@ -449,6 +454,8 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::StartListening() } } bWaitingForAgentResponse = false; // New user turn — cancel any pending response timeout. + GenerationStartTime = 0.0; + PlaybackStartTime = 0.0; ++TurnIndex; bIsListening = true; TurnStartTime = FPlatformTime::Seconds(); @@ -1050,6 +1057,12 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::HandleAgentResponseStarted() } const double Now = FPlatformTime::Seconds(); + GenerationStartTime = Now; + if (bDebug && TurnEndTime > 0.0) + { + LastLatencies.STTToGenMs = static_cast((Now - TurnEndTime) * 1000.0); + } + const double T = Now - SessionStartTime; const double LatencyFromTurnEnd = TurnEndTime > 0.0 ? Now - TurnEndTime : 0.0; if (bIsListening) @@ -1335,6 +1348,12 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log, TEXT("[T+%.2fs] [Turn %d] Agent speaking — first audio chunk. (%.2fs after turn end)"), T, LastClosedTurnIndex, LatencyFromTurnEnd); + + // Update latency snapshot for HUD display. + if (TurnEndTime > 0.0) + LastLatencies.TotalMs = static_cast((AgentSpeakStart - TurnEndTime) * 1000.0); + if (GenerationStartTime > 0.0) + LastLatencies.GenToAudioMs = static_cast((AgentSpeakStart - GenerationStartTime) * 1000.0); } OnAgentStartedSpeaking.Broadcast(); @@ -1366,6 +1385,11 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray { AudioPlaybackComponent->Play(); } + PlaybackStartTime = FPlatformTime::Seconds(); + if (bDebug && TurnEndTime > 0.0) + { + LastLatencies.EndToEarMs = static_cast((PlaybackStartTime - TurnEndTime) * 1000.0); + } OnAudioPlaybackStarted.Broadcast(); } } @@ -1392,6 +1416,11 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray { AudioPlaybackComponent->Play(); } + PlaybackStartTime = FPlatformTime::Seconds(); + if (bDebug && TurnEndTime > 0.0) + { + LastLatencies.EndToEarMs = static_cast((PlaybackStartTime - TurnEndTime) * 1000.0); + } OnAudioPlaybackStarted.Broadcast(); } SilentTickCount = 0; @@ -2333,8 +2362,17 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::DrawDebugHUD() const NetConnectedPawns.Num(), *SpeakerName)); } + // Latencies (from last completed turn) + if (LastLatencies.TotalMs > 0.0f) + { + GEngine->AddOnScreenDebugMessage(BaseKey + 8, DisplayTime, MainColor, + FString::Printf(TEXT(" Latency: total=%.0fms (stt>gen=%.0fms gen>audio=%.0fms) ear=%.0fms"), + LastLatencies.TotalMs, LastLatencies.STTToGenMs, + LastLatencies.GenToAudioMs, LastLatencies.EndToEarMs)); + } + // Reconnection - GEngine->AddOnScreenDebugMessage(BaseKey + 8, DisplayTime, + GEngine->AddOnScreenDebugMessage(BaseKey + 9, DisplayTime, bWantsReconnect ? FColor::Red : MainColor, FString::Printf(TEXT(" Reconnect: %d/%d attempts%s"), ReconnectAttemptCount, MaxReconnectAttempts, diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_ElevenLabsComponent.h b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_ElevenLabsComponent.h index 4bafd2a..9bff26b 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_ElevenLabsComponent.h +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_ElevenLabsComponent.h @@ -632,6 +632,19 @@ private: double TurnStartTime = 0.0; // Set in StartListening — when mic opens. double TurnEndTime = 0.0; // Set in StopListening — when mic closes. double AgentSpeakStart = 0.0; // Set in EnqueueAgentAudio (first chunk) — when audio begins. + double GenerationStartTime = 0.0; // Set in HandleAgentResponseStarted — server starts generating. + double PlaybackStartTime = 0.0; // Set when audio playback actually starts (post pre-buffer). + + // Last-turn latency snapshot (ms) — updated per turn, displayed on debug HUD. + // Persists between turns so the HUD always shows the most recent measurement. + struct FDebugLatencies + { + float STTToGenMs = 0.0f; // TurnEnd → server starts generating + float GenToAudioMs = 0.0f; // Server generating → first audio chunk + float TotalMs = 0.0f; // TurnEnd → first audio chunk + float EndToEarMs = 0.0f; // TurnEnd → audio playback starts (user-perceived) + }; + FDebugLatencies LastLatencies; // Accumulates incoming PCM bytes until the audio component needs data. // Uses a read offset instead of RemoveAt(0,N) to avoid O(n) memmove every