Add turn eagerness, speculative turn, adaptive pre-buffer, and latency HUD improvements

- Add TurnEagerness (Eager/Normal/Patient) and bSpeculativeTurn to agent config data asset, sent as conversation_config_override at WebSocket connection time - Add adaptive pre-buffer system: measures inter-chunk TTS timing and decreases pre-buffer when chunks arrive fast enough (decrease-only, resets each conversation) - New UPROPERTY: bAdaptivePreBuffer toggle, AudioPreBufferMs as starting/worst-case value - Rework latency HUD: TTS+Net, PreBuf actual/target with trend indicator, Gen>Ear, WS Ping, server region display - Fetch ElevenLabs server region from REST API x-region header - Add editor Detail Customization: TurnEagerness dropdown + SpeculativeTurn checkbox in AgentConfig with LLM picker and Language picker Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-06 16:43:20 +01:00
parent 2169c58cd7
commit 4456dfa9dc
9 changed files with 540 additions and 105 deletions
--- a/Unreal/PS_AI_Agent/Config/DefaultEngine.ini
+++ b/Unreal/PS_AI_Agent/Config/DefaultEngine.ini
@@ -1,8 +1,8 @@
 [/Script/EngineSettings.GameMapsSettings]
-GameDefaultMap=/Game/voidMap.voidMap
+GameDefaultMap=/PS_AI_ConvAgent/Demo_Metahuman.Demo_Metahuman
-EditorStartupMap=/Game/voidMap.voidMap
+EditorStartupMap=/PS_AI_ConvAgent/Demo_Metahuman.Demo_Metahuman
 [/Script/Engine.RendererSettings]
 r.AllowStaticLighting=False
@@ -182,4 +182,5 @@ ManualIPAddress=
 [/Script/PS_AI_ConvAgent.PS_AI_ConvAgent_Settings_ElevenLabs]
 API_Key=7b73c4244ccbec394cc010aaab01b0ec59ce0b11fc636ce4828354f675ca14a5
 ServerRegion=Global
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp
@@ -17,6 +17,9 @@
 #include "GameFramework/PlayerController.h"
 #include "Net/UnrealNetwork.h"
 #include "VoiceModule.h"
 #include "HttpModule.h"
 #include "Interfaces/IHttpRequest.h"
 #include "Interfaces/IHttpResponse.h"
 DEFINE_LOG_CATEGORY_STATIC(LogPS_AI_ConvAgent_ElevenLabs, Log, All);
@@ -147,15 +150,17 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
 	if (bPreBuffering)
 	{
 		const double Elapsed = (FPlatformTime::Seconds() - PreBufferStartTime) * 1000.0;
-		if (Elapsed >= static_cast<double>(AudioPreBufferMs))
+		const int32 EffPreBuf = (AudioPreBufferMs > 0)
 			? (bAdaptivePreBuffer ? AdaptivePreBufferMs : AudioPreBufferMs) : 0;
 		if (Elapsed >= static_cast<double>(EffPreBuf))
 		{
 			bPreBuffering = false;
 			if (bDebug)
 			{
 				const double Tpb = FPlatformTime::Seconds() - SessionStartTime;
 				UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
-					TEXT("[T+%.2fs] [Turn %d] Pre-buffer timeout (%dms). Starting playback."),
+					TEXT("[T+%.2fs] [Turn %d] Pre-buffer timeout (%dms adaptive). Starting playback."),
-					Tpb, LastClosedTurnIndex, AudioPreBufferMs);
+					Tpb, LastClosedTurnIndex, EffPreBuf);
 			}
 			// Only start playback if the agent is still speaking.
 			// If silence detection already set bAgentSpeaking=false, this is stale.
@@ -292,6 +297,9 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
 	// Broadcast OUTSIDE the lock — Blueprint handlers can execute for arbitrary time.
 	if (bShouldBroadcastStopped)
 	{
 		// Adapt pre-buffer for next turn based on this turn's signals.
 		ApplyPreBufferAdaptation();
 		if (bHardTimeoutFired && bDebug)
 		{
 			const double Tht = FPlatformTime::Seconds() - SessionStartTime;
@@ -321,7 +329,10 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
 	{
 		const int32 CVarVal = CVarDebugLatency.GetValueOnGameThread();
 		const bool bShowLatency = (CVarVal >= 0) ? (CVarVal > 0) : bDebugLatency;
-		if (bShowLatency)
+		// Only draw on the active (connected) Authority component.
 		// Multiple agents in the scene would overwrite each other's HUD at the same
 		// BaseKey, causing visible blinking between their values.
 		if (bShowLatency && IsConnected() && GetOwnerRole() == ROLE_Authority)
 		{
 			DrawLatencyHUD();
 		}
@@ -388,6 +399,11 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::StartConversation_Internal()
 	// Pass configuration to the proxy before connecting.
 	WebSocketProxy->TurnMode = TurnMode;
 	if (AgentConfig)
 	{
 		WebSocketProxy->TurnEagerness = AgentConfig->TurnEagerness;
 		WebSocketProxy->bSpeculativeTurn = AgentConfig->bSpeculativeTurn;
 	}
 	// Resolve AgentID by priority: AgentConfig > component string > project default.
 	FString ResolvedAgentID = AgentID;
@@ -834,6 +850,13 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::HandleConnected(const FPS_AI_ConvAgen
 	SessionStartTime = FPlatformTime::Seconds();
 	TurnIndex = 0;
 	LastClosedTurnIndex = 0;
 	// Initialize adaptive pre-buffer from designer settings.
 	AdaptivePreBufferMs = AudioPreBufferMs;  // Start at the designer's value.
 	PreBufferTrend = 0;
 	TurnIdealPreBufferMs = -1;
 	bTurnGapMeasured = false;
 	UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log, TEXT("[T+0.00s] Agent connected. ConversationID=%s"), *Info.ConversationID);
 	OnAgentConnected.Broadcast(Info);
@@ -852,6 +875,17 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::HandleConnected(const FPS_AI_ConvAgen
 		}
 	}
 	// Probe server region once per session (only when latency HUD is enabled).
 	if (ServerRegion.IsEmpty() && GetOwnerRole() == ROLE_Authority)
 	{
 		const int32 CVarVal = CVarDebugLatency.GetValueOnGameThread();
 		const bool bShowLatency = (CVarVal >= 0) ? (CVarVal > 0) : bDebugLatency;
 		if (bShowLatency)
 		{
 			FetchServerRegion();
 		}
 	}
 	// In Client turn mode (push-to-talk), the user controls listening manually via
 	// StartListening()/StopListening(). Auto-starting would leave the mic open
 	// permanently and interfere with push-to-talk — the T-release StopListening()
@@ -1081,21 +1115,28 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::HandleAgentResponseStarted()
 	// In Server VAD mode, StopListening() is not called — the server detects
 	// end of user speech and immediately starts generating. If TurnEndTime was
 	// not set by StopListening since the last generation (i.e. it's stale or 0),
-	// use Now as the best client-side approximation.
+	// use the proxy's LastUserTranscriptTime as the best approximation:
 	// user_transcript arrives after server VAD + ASR, just before LLM starts.
 	const bool bFreshTurnEnd = (TurnEndTime > GenerationStartTime) && (GenerationStartTime > 0.0);
 	if (!bFreshTurnEnd)
 	{
-		TurnEndTime = Now;
+		const double TranscriptTime = WebSocketProxy ? WebSocketProxy->GetLastUserTranscriptTime() : 0.0;
 		TurnEndTime = (TranscriptTime > 0.0) ? TranscriptTime : Now;
 	}
-	// Reset all latency measurements — new response cycle starts here.
+	// New response cycle starts here. All client-side metrics are anchored to
-	// All metrics are anchored to GenerationStartTime (= now), which is the closest
+	// GenerationStartTime (= now). Do NOT zero CurrentLatencies — the per-field
-	// client-side proxy for "user stopped speaking" in Server VAD mode.
+	// assignments in EnqueueAgentAudio() overwrite naturally, so the HUD shows the
-	CurrentLatencies = FDebugLatencies();
+	// previous turn's values until the new turn's measurements arrive (no "---" blink).
 	GenerationStartTime = Now;
 	const double T = Now - SessionStartTime;
 	const double LatencyFromTurnEnd = Now - TurnEndTime;
 	// LLM latency: time from user_transcript received to first text token arriving.
 	// In Server VAD mode, this approximates LLM TTFT + network (post-ASR).
 	// In Client turn mode, this is the full ASR + LLM latency.
 	CurrentLatencies.TurnEndToTextMs = static_cast<float>(LatencyFromTurnEnd * 1000.0);
 	if (bIsListening)
 	{
 		// In Server VAD + interruption mode, keep the mic open so the server can
@@ -1321,7 +1362,10 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::OnProceduralUnderflow(
 			AudioQueueReadOffset = 0;
 		}
-		// Log when queue recovers (new data arrived after being dry)
+		// Queue recovered: was dry, now has data again.
 		// Only flag as underrun if the gap was long enough to be audible.
 		// Short gaps (<200ms) are handled seamlessly by USoundWaveProcedural's
 		// internal silence — no need to increase the pre-buffer for those.
 		if (bQueueWasDry)
 		{
 			bQueueWasDry = false;
@@ -1329,7 +1373,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::OnProceduralUnderflow(
 			{
 				const double T = FPlatformTime::Seconds() - SessionStartTime;
 				UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
-					TEXT("[T+%.2fs] [Turn %d] AudioQueue recovered — feeding real data again (%d bytes remaining)."),
+					TEXT("[T+%.2fs] [Turn %d] AudioQueue recovered (%d bytes remaining)."),
 					T, LastClosedTurnIndex, AudioQueue.Num() - AudioQueueReadOffset);
 			}
 		}
@@ -1371,6 +1415,11 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
 		bAgentResponseReceived = false; // Reset: wait for agent_response before allowing StopSpeaking.
 		bQueueWasDry = false;
 		SilentTickCount = 0;
 		// Adaptive pre-buffer: record first chunk timing for inter-chunk gap measurement.
 		TurnFirstChunkTime = FPlatformTime::Seconds();
 		TurnFirstChunkBytes = PCMData.Num();
 		TurnIdealPreBufferMs = -1;
 		bTurnGapMeasured = false;
 		// Latency capture (always, for HUD display).
 		if (GenerationStartTime > 0.0)
@@ -1393,7 +1442,9 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
 			MulticastAgentStartedSpeaking();
 		}
-		if (AudioPreBufferMs > 0)
+		const int32 EffectivePreBufferMs = (AudioPreBufferMs > 0)
 			? (bAdaptivePreBuffer ? AdaptivePreBufferMs : AudioPreBufferMs) : 0;
 		if (EffectivePreBufferMs > 0)
 		{
 			// Pre-buffer: accumulate audio before starting playback.
 			// This absorbs TTS inter-chunk gaps so chunk 2 arrives before
@@ -1404,8 +1455,8 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
 			{
 				const double Tpb2 = FPlatformTime::Seconds() - SessionStartTime;
 				UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
-					TEXT("[T+%.2fs] [Turn %d] Pre-buffering %dms before starting playback."),
+					TEXT("[T+%.2fs] [Turn %d] Pre-buffering %dms (adaptive) before starting playback."),
-					Tpb2, LastClosedTurnIndex, AudioPreBufferMs);
+					Tpb2, LastClosedTurnIndex, EffectivePreBufferMs);
 			}
 		}
 		else
@@ -1433,14 +1484,25 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
 		if (GetOwnerRole() == ROLE_Authority)
 		{
 			bPreBuffering = false;
 			// Measure inter-chunk gap for adaptive pre-buffer.
 			if (!bTurnGapMeasured && TurnFirstChunkTime > 0.0)
 			{
 				const double NowGap = FPlatformTime::Seconds();
 				const double InterChunkGapMs = (NowGap - TurnFirstChunkTime) * 1000.0;
 				// Chunk 1 audio duration: 16kHz 16-bit mono = 32000 bytes/sec.
 				const double Chunk1AudioMs = (TurnFirstChunkBytes > 0)
 					? (static_cast<double>(TurnFirstChunkBytes) / 32.0) : 0.0;
 				TurnIdealPreBufferMs = FMath::Max(0, FMath::RoundToInt32(InterChunkGapMs - Chunk1AudioMs));
 				bTurnGapMeasured = true;
 			}
 			if (bDebug)
 			{
 				const double NowPb = FPlatformTime::Seconds();
 				const double BufferedMs = (NowPb - PreBufferStartTime) * 1000.0;
 				const double Tpb3 = NowPb - SessionStartTime;
 				UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
-					TEXT("[T+%.2fs] [Turn %d] Pre-buffer: second chunk arrived (%.0fms buffered). Starting playback."),
+					TEXT("[T+%.2fs] [Turn %d] Pre-buffer: second chunk arrived (%.0fms buffered, ideal=%dms). Starting playback."),
-					Tpb3, LastClosedTurnIndex, BufferedMs);
+					Tpb3, LastClosedTurnIndex, BufferedMs, TurnIdealPreBufferMs);
 			}
 			if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
 			{
@@ -1467,6 +1529,23 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
 		{
 			AudioPlaybackComponent->Play();
 		}
 		// Measure inter-chunk gap for adaptive pre-buffer (first gap only).
 		if (!bTurnGapMeasured && TurnFirstChunkTime > 0.0 && GetOwnerRole() == ROLE_Authority)
 		{
 			const double NowGap = FPlatformTime::Seconds();
 			const double InterChunkGapMs = (NowGap - TurnFirstChunkTime) * 1000.0;
 			const double Chunk1AudioMs = (TurnFirstChunkBytes > 0)
 				? (static_cast<double>(TurnFirstChunkBytes) / 32.0) : 0.0;
 			TurnIdealPreBufferMs = FMath::Max(0, FMath::RoundToInt32(InterChunkGapMs - Chunk1AudioMs));
 			bTurnGapMeasured = true;
 			if (bDebug)
 			{
 				const double T = NowGap - SessionStartTime;
 				UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
 					TEXT("[T+%.2fs] [Turn %d] Inter-chunk gap: %.0fms, chunk1 audio: %.0fms → ideal pre-buffer: %dms"),
 					T, LastClosedTurnIndex, InterChunkGapMs, Chunk1AudioMs, TurnIdealPreBufferMs);
 			}
 		}
 		// Reset silence counter — new audio arrived, we're not in a gap anymore
 		SilentTickCount = 0;
 	}
@@ -1516,6 +1595,9 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::StopAgentAudio()
 	// Broadcast outside the lock.
 	if (bWasSpeaking)
 	{
 		// Adapt pre-buffer for next turn based on this turn's signals.
 		ApplyPreBufferAdaptation();
 		if (bDebug)
 		{
 			const double T = Now - SessionStartTime;
@@ -1536,6 +1618,52 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::StopAgentAudio()
 	}
 }
 void UPS_AI_ConvAgent_ElevenLabsComponent::ApplyPreBufferAdaptation()
 {
 	// Only adapt on Authority (where the WebSocket lives and measurements are taken).
 	if (GetOwnerRole() != ROLE_Authority) return;
 	// Adaptive mode must be enabled, and pre-buffering must be active.
 	if (!bAdaptivePreBuffer || AudioPreBufferMs == 0) return;
 	// No measurement this turn (single-chunk response or no second chunk arrived).
 	if (TurnIdealPreBufferMs < 0) { PreBufferTrend = 0; return; }
 	const int32 Prev = AdaptivePreBufferMs;
 	// DECREASE-ONLY: the measured ideal tells us the minimum pre-buffer needed.
 	// If the ideal is lower than our current value, the connection is fast enough
 	// that we can reduce the pre-buffer and save latency.
 	// If the ideal is higher (e.g. natural speech pause, slow network), we do NOT
 	// increase — USoundWaveProcedural handles gaps seamlessly in most cases.
 	// The user sets AudioPreBufferMs as the "worst case" starting value;
 	// the system only optimizes downward from there. Resets each conversation.
 	if (TurnIdealPreBufferMs < AdaptivePreBufferMs)
 	{
 		// Ideal is lower — decrease toward it (EMA 30% per turn, with 50ms margin).
 		const int32 TargetMs = FMath::Max(AdaptivePreBufferMinMs, TurnIdealPreBufferMs + 50);
 		AdaptivePreBufferMs = FMath::Max(AdaptivePreBufferMinMs,
 			FMath::RoundToInt32(AdaptivePreBufferMs * 0.7f + TargetMs * 0.3f));
 		PreBufferTrend = (AdaptivePreBufferMs < Prev) ? -1 : 0;
 	}
 	else
 	{
 		// Ideal >= current — connection is same or worse, keep current value.
 		PreBufferTrend = 0;
 	}
 	// Reset measurement for next turn.
 	const int32 IdealForLog = TurnIdealPreBufferMs;
 	TurnIdealPreBufferMs = -1;
 	bTurnGapMeasured = false;
 	if (bDebug && Prev != AdaptivePreBufferMs)
 	{
 		const double T = FPlatformTime::Seconds() - SessionStartTime;
 		UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
 			TEXT("[T+%.2fs] [Turn %d] Adaptive pre-buffer: %d ms -> %d ms (ideal=%dms)"),
 			T, LastClosedTurnIndex, Prev, AdaptivePreBufferMs, IdealForLog);
 	}
 }
 // ─────────────────────────────────────────────────────────────────────────────
 // Microphone → WebSocket
 // ─────────────────────────────────────────────────────────────────────────────
@@ -2404,6 +2532,42 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::DrawDebugHUD() const
 			bWantsReconnect ? TEXT(" (ACTIVE)") : TEXT("")));
 }
 // ─────────────────────────────────────────────────────────────────────────────
 // Server region detection (one-shot HTTP probe)
 // ─────────────────────────────────────────────────────────────────────────────
 void UPS_AI_ConvAgent_ElevenLabsComponent::FetchServerRegion()
 {
 	const UPS_AI_ConvAgent_Settings_ElevenLabs* Settings = FPS_AI_ConvAgentModule::Get().GetSettings();
 	if (!Settings || Settings->API_Key.IsEmpty()) return;
 	auto Request = FHttpModule::Get().CreateRequest();
 	Request->SetURL(Settings->GetAPIBaseURL() + TEXT("/v1/models"));
 	Request->SetVerb(TEXT("GET"));
 	Request->SetHeader(TEXT("xi-api-key"), Settings->API_Key);
 	TWeakObjectPtr<UPS_AI_ConvAgent_ElevenLabsComponent> WeakThis(this);
 	Request->OnProcessRequestComplete().BindLambda(
 		[WeakThis](FHttpRequestPtr /*Req*/, FHttpResponsePtr Resp, bool bSuccess)
 		{
 			if (!bSuccess || !Resp.IsValid()) return;
 			const FString Region = Resp->GetHeader(TEXT("x-region"));
 			if (Region.IsEmpty()) return;
 			AsyncTask(ENamedThreads::GameThread, [WeakThis, Region]()
 			{
 				if (WeakThis.IsValid())
 				{
 					WeakThis->ServerRegion = Region;
 					UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log, TEXT("ElevenLabs server region: %s"), *Region);
 				}
 			});
 		});
 	Request->ProcessRequest();
 }
 // ─────────────────────────────────────────────────────────────────────────────
 // Latency debug HUD
 // ─────────────────────────────────────────────────────────────────────────────
 void UPS_AI_ConvAgent_ElevenLabsComponent::DrawLatencyHUD() const
 {
 	if (!GEngine) return;
@@ -2412,29 +2576,62 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::DrawLatencyHUD() const
 	const int32 BaseKey = 93700;
 	const float DisplayTime = 1.0f; // long enough to avoid flicker between ticks
-	const FColor TitleColor  = FColor::Cyan;
+	const FColor TitleColor     = FColor::Cyan;
-	const FColor ValueColor  = FColor::White;
+	const FColor ValueColor     = FColor::White;
 	const FColor HighlightColor = FColor::Yellow;
 	// Helper: format a single metric — shows "---" when not yet captured this turn
 	auto Fmt = [](float Ms) -> FString
 	{
 		return (Ms > 0.0f) ? FString::Printf(TEXT("%.0f ms"), Ms) : FString(TEXT("---"));
 	};
-	// Title — all times measured from agent_response_started
+	int32 Row = 0;
 	GEngine->AddOnScreenDebugMessage(BaseKey, DisplayTime, TitleColor,
 		TEXT("=== Latency (from gen start) ==="));
-	// 1. Gen → Audio: generation start → first audio chunk (LLM + TTS)
+	GEngine->AddOnScreenDebugMessage(BaseKey + Row++, DisplayTime, TitleColor,
-	GEngine->AddOnScreenDebugMessage(BaseKey + 1, DisplayTime, ValueColor,
+		TEXT("=== Voice-to-Voice Latency ==="));
 		FString::Printf(TEXT("  Gen>Audio:      %s"), *Fmt(CurrentLatencies.GenToAudioMs)));
-	// 2. Pre-buffer wait before playback
+	// Client-side breakdown: TTS+Net + Pre-buffer = Gen>Ear
-	GEngine->AddOnScreenDebugMessage(BaseKey + 2, DisplayTime, ValueColor,
+	// Note: LLM latency is only visible on ElevenLabs dashboard (server-side).
-		FString::Printf(TEXT("  Pre-buffer:     %s"), *Fmt(CurrentLatencies.PreBufferMs)));
+	// In Server VAD mode, no reliable client-side "end of user speech" marker exists.
 	GEngine->AddOnScreenDebugMessage(BaseKey + Row++, DisplayTime, ValueColor,
 		FString::Printf(TEXT("  TTS+Net:        %s"), *Fmt(CurrentLatencies.GenToAudioMs)));
-	// 3. Gen → Ear: generation start → playback starts (user-perceived total)
+	// Pre-buffer display depends on adaptive mode.
-	GEngine->AddOnScreenDebugMessage(BaseKey + 3, DisplayTime, HighlightColor,
+	if (bAdaptivePreBuffer && AudioPreBufferMs > 0)
 	{
 		// Adaptive ON: show actual wait + adaptive target with trend arrow.
 		GEngine->AddOnScreenDebugMessage(BaseKey + Row++, DisplayTime, ValueColor,
 			FString::Printf(TEXT("  PreBuf actual:  %s"), *Fmt(CurrentLatencies.PreBufferMs)));
 		const TCHAR* TrendArrow = (PreBufferTrend > 0) ? TEXT(" ^")
 		                         : (PreBufferTrend < 0) ? TEXT(" v")
 		                         : TEXT("");
 		const FColor AdaptiveColor = (PreBufferTrend > 0) ? FColor::Red
 		                            : (PreBufferTrend < 0) ? FColor::Green
 		                            : ValueColor;
 		GEngine->AddOnScreenDebugMessage(BaseKey + Row++, DisplayTime, AdaptiveColor,
 			FString::Printf(TEXT("  PreBuf target:  %d ms%s"), AdaptivePreBufferMs, TrendArrow));
 	}
 	else
 	{
 		// Adaptive OFF (or pre-buffer disabled): show fixed pre-buffer value.
 		GEngine->AddOnScreenDebugMessage(BaseKey + Row++, DisplayTime, ValueColor,
 			FString::Printf(TEXT("  Pre-buffer:     %s"), *Fmt(CurrentLatencies.PreBufferMs)));
 	}
 	GEngine->AddOnScreenDebugMessage(BaseKey + Row++, DisplayTime, HighlightColor,
 		FString::Printf(TEXT("  Gen>Ear:        %s"), *Fmt(CurrentLatencies.GenToEarMs)));
 	// Connection section
 	GEngine->AddOnScreenDebugMessage(BaseKey + Row++, DisplayTime, TitleColor,
 		TEXT("--- Connection ---"));
 	const int32 PingMs = WebSocketProxy ? WebSocketProxy->GetLastPingMs() : -1;
 	GEngine->AddOnScreenDebugMessage(BaseKey + Row++, DisplayTime, ValueColor,
 		FString::Printf(TEXT("  WS Ping:        %s"),
 			(PingMs >= 0) ? *FString::Printf(TEXT("%d ms"), PingMs) : TEXT("---")));
 	GEngine->AddOnScreenDebugMessage(BaseKey + Row++, DisplayTime, ValueColor,
 		FString::Printf(TEXT("  Region:         %s"),
 			ServerRegion.IsEmpty() ? TEXT("---") : *ServerRegion));
 }
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_WebSocket_ElevenLabsProxy.cpp
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_WebSocket_ElevenLabsProxy.cpp
@@ -207,41 +207,58 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::OnWsConnected()
 	// This produces smooth continuous audio chunks without the fragmentation caused by
 	// explicit optimize_streaming_latency or enable_intermediate_response overrides.
 	//
-	// In Client (push-to-talk) mode only, we override turn_timeout to reduce latency.
+	// Build turn configuration based on mode + latency settings.
 	// In Server VAD mode, the config override is empty (matches C++ sample exactly).
 	TSharedPtr<FJsonObject> ConversationConfigOverride = MakeShareable(new FJsonObject());
 	if (TurnMode == EPS_AI_ConvAgent_TurnMode_ElevenLabs::Client)
 	{
 		// turn_timeout: how long the server waits after VAD detects silence before
 		// processing the user's turn. Default is ~3s. In push-to-talk mode this
 		// directly adds latency — the server waits after the user releases T.
 		// 1s is safe without speculative_turn (which was removed — see history below).
 		//
 		// History:
 		//   turn_timeout=1 was problematic when combined with speculative_turn=true
 		//   (server silently dropped turns 3+). Without speculative_turn, 1s is safe
 		//   and halves the per-turn latency.
 		TSharedPtr<FJsonObject> TurnObj = MakeShareable(new FJsonObject());
-		TurnObj->SetNumberField(TEXT("turn_timeout"), 1);
+		bool bHasTurnOverrides = false;
-		TSharedPtr<FJsonObject> AgentObj = MakeShareable(new FJsonObject());
+		// In Client (push-to-talk) mode, reduce turn_timeout to minimize latency.
-		AgentObj->SetObjectField(TEXT("turn"), TurnObj);
+		if (TurnMode == EPS_AI_ConvAgent_TurnMode_ElevenLabs::Client)
 		{
 			TurnObj->SetNumberField(TEXT("turn_timeout"), 1);
 			bHasTurnOverrides = true;
 		}
-		ConversationConfigOverride->SetObjectField(TEXT("agent"), AgentObj);
+		// turn_eagerness: controls how quickly the server interprets pauses as end-of-speech.
 		// "eager" = fastest (may cut user off), "normal" = balanced, "patient" = waits longer.
 		if (TurnEagerness != EPS_AI_ConvAgent_TurnEagerness_ElevenLabs::Normal)
 		{
 			FString EagernessStr;
 			switch (TurnEagerness)
 			{
 			case EPS_AI_ConvAgent_TurnEagerness_ElevenLabs::Eager:   EagernessStr = TEXT("eager");   break;
 			case EPS_AI_ConvAgent_TurnEagerness_ElevenLabs::Patient: EagernessStr = TEXT("patient"); break;
 			default:                                                  EagernessStr = TEXT("normal");  break;
 			}
 			TurnObj->SetStringField(TEXT("turn_eagerness"), EagernessStr);
 			bHasTurnOverrides = true;
 		}
 		// speculative_turn: start generating a response before confirming end-of-speech.
 		// Reduces latency but may cause occasional false starts (discarded if user continues).
 		if (bSpeculativeTurn)
 		{
 			TurnObj->SetBoolField(TEXT("speculative_turn"), true);
 			bHasTurnOverrides = true;
 		}
 		if (bHasTurnOverrides)
 		{
 			TSharedPtr<FJsonObject> AgentObj = MakeShareable(new FJsonObject());
 			AgentObj->SetObjectField(TEXT("turn"), TurnObj);
 			ConversationConfigOverride->SetObjectField(TEXT("agent"), AgentObj);
 		}
 	}
 	// NOTE: We intentionally do NOT send these overrides (matching C++ sample):
 	//
-	// - tts.optimize_streaming_latency: Explicitly sending ANY value (even 0) changes
+	// - tts.optimize_streaming_latency: deprecated by ElevenLabs. Sending any value
-	//   the TTS chunking behaviour vs server defaults. The C++ sample omits this entirely.
+	//   changes TTS chunking behaviour. Server default (omitted) is optimal.
 	//   With value 3: many tiny chunks with 500ms-2s gaps (requires heavy buffering).
 	//   With value 0: fewer larger chunks but ~3s inter-chunk gaps (still causes gaps).
 	//   Server default (omitted): produces smooth continuous audio (no gaps in C++ sample).
 	//
 	// - custom_llm_extra_body.enable_intermediate_response: When true, the LLM speaks
-	//   before finishing generation → fragmented audio. When omitted (C++ sample), the
+	//   before finishing generation → fragmented audio. Omitted = server default.
 	//   LLM completes its response first → continuous TTS chunks.
 	//
 	// - custom_llm_extra_body (empty object): Even an empty object might override the
 	//   agent's configured custom_llm_extra_body with nothing. Omit entirely.
@@ -259,12 +276,15 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::OnWsConnected()
 	FJsonSerializer::Serialize(InitMsg.ToSharedRef(), InitWriter);
 	{
 		const UPS_AI_ConvAgent_Settings_ElevenLabs* S = FPS_AI_ConvAgentModule::Get().GetSettings();
-		if (S->bVerboseLogging)
+		if (S && S->bVerboseLogging)
 		{
 			UE_LOG(LogPS_AI_ConvAgent_WS_ElevenLabs, Verbose, TEXT("Sending initiation: %s"), *InitJson);
 		}
 	}
-	WebSocket->Send(InitJson);
+	if (WebSocket.IsValid())
 	{
 		WebSocket->Send(InitJson);
 	}
 }
 void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::OnWsConnectionError(const FString& Error)
@@ -507,6 +527,10 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::HandleTranscript(const TSharedP
 		return;
 	}
 	// Record arrival time for latency measurement (ASR+LLM breakdown).
 	// user_transcript arrives after server VAD + ASR, just before LLM starts.
 	LastUserTranscriptTime = FPlatformTime::Seconds();
 	FPS_AI_ConvAgent_TranscriptSegment_ElevenLabs Segment;
 	Segment.Speaker = TEXT("user");
 	(*TranscriptEvent)->TryGetStringField(TEXT("user_transcript"), Segment.Text);
@@ -679,6 +703,13 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::HandlePing(const TSharedPtr<FJs
 	if (Root->TryGetObjectField(TEXT("ping_event"), PingEvent) && PingEvent)
 	{
 		(*PingEvent)->TryGetNumberField(TEXT("event_id"), EventID);
 		// Extract server-reported WS round-trip latency.
 		int32 PingValue = 0;
 		if ((*PingEvent)->TryGetNumberField(TEXT("ping_ms"), PingValue))
 		{
 			LastPingMs.store(PingValue, std::memory_order_relaxed);
 		}
 	}
 	TSharedPtr<FJsonObject> Pong = MakeShareable(new FJsonObject());
@@ -718,7 +749,7 @@ FString UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::BuildWebSocketURL(const FStr
 {
 	const UPS_AI_ConvAgent_Settings_ElevenLabs* Settings = FPS_AI_ConvAgentModule::Get().GetSettings();
-	// Custom URL override takes full precedence
+	// Custom URL override takes full precedence (advanced / proxy use case)
 	if (!Settings->CustomWebSocketURL.IsEmpty())
 	{
 		return Settings->CustomWebSocketURL;
@@ -730,9 +761,9 @@ FString UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::BuildWebSocketURL(const FStr
 		return FString();
 	}
-	// Official ElevenLabs Conversational AI WebSocket endpoint
+	// Build URL from the region-aware base: wss://<regional-host>/v1/convai/conversation?agent_id=<ID>
-	// wss://api.elevenlabs.io/v1/convai/conversation?agent_id=<ID>
+	const FString BaseURL = Settings->GetWSBaseURL();
 	return FString::Printf(
-		TEXT("wss://api.elevenlabs.io/v1/convai/conversation?agent_id=%s"),
+		TEXT("%s/v1/convai/conversation?agent_id=%s"),
-		*AgentIDOverride);
+		*BaseURL, *AgentIDOverride);
 }
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent.h
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent.h
@@ -6,6 +6,22 @@
 #include "Modules/ModuleManager.h"
 #include "PS_AI_ConvAgent.generated.h"
 // ─────────────────────────────────────────────────────────────────────────────
 // ElevenLabs server region
 // ─────────────────────────────────────────────────────────────────────────────
 UENUM()
 enum class EPS_AI_ConvAgent_ElevenLabsRegion : uint8
 {
 	/** Automatic global routing (default). Server chosen by ElevenLabs based on client location. */
 	Global		UMETA(DisplayName = "Global (auto)"),
 	/** Force US servers: api.us.elevenlabs.io */
 	US			UMETA(DisplayName = "US"),
 	/** Force EU servers (Enterprise only): api.eu.residency.elevenlabs.io */
 	EU			UMETA(DisplayName = "EU (Enterprise)"),
 	/** Force India servers (Enterprise only): api.in.residency.elevenlabs.io */
 	India		UMETA(DisplayName = "India (Enterprise)")
 };
 // ─────────────────────────────────────────────────────────────────────────────
 // Settings object – exposed in Project Settings → Plugins → PS AI ConvAgent - ElevenLabs
 // ─────────────────────────────────────────────────────────────────────────────
@@ -24,8 +40,17 @@ public:
 	FString API_Key;
 	/**
-	 * Override the ElevenLabs WebSocket base URL. Leave empty to use the default:
+	 * Server region for ElevenLabs API.
-	 *   wss://api.elevenlabs.io/v1/convai/conversation
+	 * - Global (default): automatic routing based on client location.
 	 * - US: force US servers (api.us.elevenlabs.io).
 	 * - EU / India: Enterprise-only data residency endpoints.
 	 */
 	UPROPERTY(Config, EditAnywhere, Category = "PS AI ConvAgent|ElevenLabs API")
 	EPS_AI_ConvAgent_ElevenLabsRegion ServerRegion = EPS_AI_ConvAgent_ElevenLabsRegion::Global;
 	/**
 	 * Override the ElevenLabs WebSocket URL entirely. Leave empty to use ServerRegion setting.
 	 * Example: wss://custom-proxy.example.com/v1/convai/conversation?agent_id=YOUR_ID
 	 */
 	UPROPERTY(Config, EditAnywhere, AdvancedDisplay, Category = "PS AI ConvAgent|ElevenLabs API")
 	FString CustomWebSocketURL;
@@ -33,6 +58,30 @@ public:
 	/** Log verbose WebSocket messages to the Output Log (useful during development). */
 	UPROPERTY(Config, EditAnywhere, AdvancedDisplay, Category = "PS AI ConvAgent|ElevenLabs API")
 	bool bVerboseLogging = false;
 	/** Return the API base URL (https) for the selected region. */
 	FString GetAPIBaseURL() const
 	{
 		switch (ServerRegion)
 		{
 		case EPS_AI_ConvAgent_ElevenLabsRegion::US:    return TEXT("https://api.us.elevenlabs.io");
 		case EPS_AI_ConvAgent_ElevenLabsRegion::EU:    return TEXT("https://api.eu.residency.elevenlabs.io");
 		case EPS_AI_ConvAgent_ElevenLabsRegion::India: return TEXT("https://api.in.residency.elevenlabs.io");
 		default:                                        return TEXT("https://api.elevenlabs.io");
 		}
 	}
 	/** Return the WebSocket base URL (wss) for the selected region. */
 	FString GetWSBaseURL() const
 	{
 		switch (ServerRegion)
 		{
 		case EPS_AI_ConvAgent_ElevenLabsRegion::US:    return TEXT("wss://api.us.elevenlabs.io");
 		case EPS_AI_ConvAgent_ElevenLabsRegion::EU:    return TEXT("wss://api.eu.residency.elevenlabs.io");
 		case EPS_AI_ConvAgent_ElevenLabsRegion::India: return TEXT("wss://api.in.residency.elevenlabs.io");
 		default:                                        return TEXT("wss://api.elevenlabs.io");
 		}
 	}
 };
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_AgentConfig_ElevenLabs.h
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_AgentConfig_ElevenLabs.h
@@ -4,6 +4,7 @@
 #include "CoreMinimal.h"
 #include "Engine/DataAsset.h"
 #include "PS_AI_ConvAgent_Definitions.h"
 #include "PS_AI_ConvAgent_AgentConfig_ElevenLabs.generated.h"
 /**
@@ -186,6 +187,24 @@ public:
 		ToolTip = "Max conversation turns.\n0 = unlimited."))
 	int32 MaxTurns = 0;
 	// ── Latency / Turn-taking ───────────────────────────────────────────────
 	/** How quickly the server detects end-of-speech and starts responding.
 	 *  Eager = fastest response, may cut the user off during pauses.
 	 *  Normal = balanced (default). Patient = waits longer for user to finish.
 	 *  Sent as conversation_config_override at WebSocket connection time. */
 	UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Latency",
 		meta = (ToolTip = "Controls how quickly the server detects end-of-speech.\n- Eager: fastest response, may interrupt mid-pause.\n- Normal: balanced (default).\n- Patient: waits longer for user to finish."))
 	EPS_AI_ConvAgent_TurnEagerness_ElevenLabs TurnEagerness = EPS_AI_ConvAgent_TurnEagerness_ElevenLabs::Normal;
 	/** Enable speculative turn processing: the server starts generating a response
 	 *  before it's certain the user has finished speaking. If the user continues,
 	 *  the speculative response is discarded. Reduces perceived latency.
 	 *  May cause occasional false starts — disable if the agent interrupts too often. */
 	UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Latency",
 		meta = (ToolTip = "Start generating a response before confirming end-of-speech.\nReduces latency but may cause occasional false starts.\nDisable if the agent interrupts the user too often."))
 	bool bSpeculativeTurn = false;
 	// ── Emotion Tool ─────────────────────────────────────────────────────────
 	/** Include the built-in "set_emotion" client tool in the agent configuration.
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_Definitions.h
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_Definitions.h
@@ -29,6 +29,20 @@ enum class EPS_AI_ConvAgent_TurnMode_ElevenLabs : uint8
 	Client		UMETA(DisplayName = "Client Controlled"),
 };
 // ─────────────────────────────────────────────────────────────────────────────
 // Agent turn eagerness — controls how quickly the server detects end of speech
 // ─────────────────────────────────────────────────────────────────────────────
 UENUM(BlueprintType)
 enum class EPS_AI_ConvAgent_TurnEagerness_ElevenLabs : uint8
 {
 	/** Quick response at the earliest opportunity. Best for customer service. */
 	Eager		UMETA(DisplayName = "Eager"),
 	/** Balanced turn-taking for general scenarios (default). */
 	Normal		UMETA(DisplayName = "Normal"),
 	/** Longer wait for user to finish. Best for information collection. */
 	Patient		UMETA(DisplayName = "Patient"),
 };
 // ─────────────────────────────────────────────────────────────────────────────
 // WebSocket message type helpers (internal, not exposed to Blueprint)
 // ─────────────────────────────────────────────────────────────────────────────
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_ElevenLabsComponent.h
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_ElevenLabsComponent.h
@@ -185,14 +185,24 @@ public:
 		meta = (ToolTip = "Fire OnAgentPartialResponse with streaming text fragments as the LLM generates them.\nIdeal for real-time subtitles. Each event gives one text chunk, not the accumulated text."))
 	bool bEnableAgentPartialResponse = false;
-	/** Pre-buffer delay (ms) before starting audio playback on the first chunk.
+	/** Pre-buffer delay (ms) before starting audio playback on the first TTS chunk.
-	 *  Delays playback start so early TTS chunks can accumulate, preventing
+	 *  Set this to your "worst case" value (e.g. 300-1000ms depending on connection).
-	 *  mid-sentence pauses when the second chunk hasn't arrived yet.
+	 *  When adaptive mode is on, the system starts here and can only decrease
-	 *  Set to 0 for immediate playback. */
+	 *  (never increase) as it measures that chunks arrive fast enough.
 	 *  Set to 0 to disable pre-buffering entirely. */
 	UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|ElevenLabs|Latency",
 		meta = (ClampMin = "0", ClampMax = "4000",
-		ToolTip = "Pre-buffer delay in ms before starting audio playback.\nHigher values reduce mid-sentence pauses but add initial latency.\n0 = immediate playback."))
+		ToolTip = "Pre-buffer delay (ms) — your safe 'worst case' value.\nAdaptive mode can only decrease from here, never increase.\nSet 0 to disable pre-buffering entirely."))
-	int32 AudioPreBufferMs = 2000;
+	int32 AudioPreBufferMs = 300;
 	/** Enable adaptive pre-buffer: measures inter-chunk timing and automatically
 	 *  lowers the pre-buffer when TTS chunks arrive fast enough.
 	 *  The system can only decrease from AudioPreBufferMs — never increase.
 	 *  Resets to AudioPreBufferMs at the start of each conversation. */
 	UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|ElevenLabs|Latency",
 		meta = (EditCondition = "AudioPreBufferMs > 0",
 		ToolTip = "Automatically lower pre-buffer when connection is good.\nCan only decrease, never increase beyond AudioPreBufferMs.\nResets each conversation."))
 	bool bAdaptivePreBuffer = true;
 	/** Safety timeout: if the server does not start generating a response within this many seconds after the user stops speaking, fire OnAgentResponseTimeout. Set to 0 to disable. A normal response starts within 0.1-0.8s. */
 	UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|ElevenLabs",
@@ -640,18 +650,23 @@ private:
 	double GenerationStartTime = 0.0; // Set in HandleAgentResponseStarted — server starts generating.
 	double PlaybackStartTime   = 0.0; // Set when audio playback actually starts (post pre-buffer).
-	// Current-turn latency measurements (ms). Reset in HandleAgentResponseStarted.
+	// Current-turn latency measurements (ms). Overwritten per-field as each
-	// All anchored to GenerationStartTime (agent_response_started event), which is
+	// measurement is captured — NOT reset to zero between turns, so the HUD
-	// the closest client-side proxy for "user stopped speaking" in Server VAD mode.
+	// always shows the most recent value instead of blinking "---".
-	// Zero means "not yet measured this turn".
+	// All anchored to GenerationStartTime (agent_response_started event).
 	struct FDebugLatencies
 	{
-		float GenToAudioMs  = 0.0f; // agent_response_started → first audio chunk (LLM + TTS)
+		float TurnEndToTextMs = 0.0f; // user turn end → first text from LLM (≈ ASR + LLM TTFT)
 		float GenToAudioMs  = 0.0f; // agent_response_started → first audio chunk (≈ TTS + network)
 		float PreBufferMs   = 0.0f; // Pre-buffer wait before playback starts
 		float GenToEarMs    = 0.0f; // agent_response_started → playback starts (user-perceived)
 	};
 	FDebugLatencies CurrentLatencies;
 	// ElevenLabs server region (from x-region header on REST API). Fetched once per session.
 	FString ServerRegion;
 	void FetchServerRegion();
 	// Accumulates incoming PCM bytes until the audio component needs data.
 	// Uses a read offset instead of RemoveAt(0,N) to avoid O(n) memmove every
 	// underflow callback (~60Hz). Compacted periodically when read offset
@@ -664,6 +679,22 @@ private:
 	bool bPreBuffering = false;
 	double PreBufferStartTime = 0.0;
 	// ── Adaptive pre-buffer ─────────────────────────────────────────────────
 	// Runtime pre-buffer duration (ms). Equals AudioPreBufferMs when adaptive is off.
 	// When adaptive is on: initialized from AudioPreBufferMs, adjusted based on
 	// measured inter-chunk timing (not queue-dry detection).
 	int32 AdaptivePreBufferMs = 300;
 	static constexpr int32 AdaptivePreBufferMinMs = 50;
 	// Direction of last adaptation: +1=raised, -1=lowered, 0=stable. Used by HUD.
 	int32 PreBufferTrend = 0;
 	void ApplyPreBufferAdaptation();
 	// Per-turn inter-chunk timing measurement (game thread only).
 	// Set when the second TTS chunk arrives, consumed at turn end.
 	double TurnFirstChunkTime = 0.0;   // When chunk 1 arrived.
 	int32 TurnFirstChunkBytes = 0;     // Bytes in chunk 1 (to estimate audio duration).
 	int32 TurnIdealPreBufferMs = -1;   // Computed ideal pre-buffer. -1 = not measured.
 	bool bTurnGapMeasured = false;     // True after first inter-chunk gap is measured.
 	// Debug: track when the AudioQueue runs dry during speech (one-shot log).
 	bool bQueueWasDry = false;
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_WebSocket_ElevenLabsProxy.h
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_WebSocket_ElevenLabsProxy.h
@@ -197,6 +197,18 @@ public:
 	UFUNCTION(BlueprintPure, Category = "PS AI ConvAgent|ElevenLabs")
 	const FPS_AI_ConvAgent_ConversationInfo_ElevenLabs& GetConversationInfo() const { return ConversationInfo; }
 	/** Latest WebSocket round-trip latency reported by the server (ms).
 	 *  Returns -1 if no ping has been received yet. Thread-safe. */
 	int32 GetLastPingMs() const { return LastPingMs.load(std::memory_order_relaxed); }
 	/** Timestamp of the last user audio chunk sent to the server.
 	 *  Used as a proxy for "user stopped speaking" in Server VAD mode. */
 	double GetLastAudioChunkSentTime() const { return LastAudioChunkSentTime; }
 	/** Timestamp of the last user_transcript received from the server.
 	 *  Marks when server finished ASR — best anchor for LLM latency measurement. */
 	double GetLastUserTranscriptTime() const { return LastUserTranscriptTime; }
 	// ─────────────────────────────────────────────────────────────────────────
 	// Internal
 	// ─────────────────────────────────────────────────────────────────────────
@@ -235,10 +247,16 @@ private:
 	TArray<uint8> BinaryFrameBuffer;
 	// ── Latency tracking ─────────────────────────────────────────────────────
 	// Server-reported WebSocket round-trip latency from ping events (~every 2s).
 	// Atomic: written from WS callback thread, read from game thread (HUD).
 	std::atomic<int32> LastPingMs{-1};
 	// Timestamp of the last audio chunk sent (user speech).
 	double LastAudioChunkSentTime = 0.0;
 	// Timestamp when user turn ended (StopListening).
 	double UserTurnEndTime = 0.0;
 	// Timestamp of the last user_transcript received (server finished ASR).
 	double LastUserTranscriptTime = 0.0;
 	// Whether we are waiting for the first response after user stopped speaking.
 	// Atomic: defensive — documents thread-safety contract.
 	std::atomic<bool> bWaitingForResponse{false};
@@ -264,4 +282,10 @@ public:
 	// Set by UPS_AI_ConvAgent_ElevenLabsComponent before calling Connect().
 	// Controls turn_timeout in conversation_initiation_client_data.
 	EPS_AI_ConvAgent_TurnMode_ElevenLabs TurnMode = EPS_AI_ConvAgent_TurnMode_ElevenLabs::Server;
 	// Controls how eagerly the server interprets pauses as end-of-speech.
 	EPS_AI_ConvAgent_TurnEagerness_ElevenLabs TurnEagerness = EPS_AI_ConvAgent_TurnEagerness_ElevenLabs::Normal;
 	// Start generating before confirming end-of-speech (reduces latency, may cause false starts).
 	bool bSpeculativeTurn = false;
 };
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgentEditor/Private/PS_AI_ConvAgent_AgentConfigCustomization_ElevenLabs.cpp
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgentEditor/Private/PS_AI_ConvAgent_AgentConfigCustomization_ElevenLabs.cpp
@@ -22,33 +22,43 @@
 DEFINE_LOG_CATEGORY_STATIC(LogPS_AI_AgentConfigEditor, Log, All);
-// Approximate LLM latencies as shown on the ElevenLabs dashboard.
+// Approximate LLM latencies as shown on the ElevenLabs dashboard (March 2026).
-// The API does not expose this data — values are indicative and may change.
+// The /v1/convai/llm/list API does NOT expose latency — values are indicative.
 // Update this table periodically to stay current.
 static FString GetLLMLatencyHint(const FString& ModelID)
 {
 	struct FLatencyEntry { const TCHAR* ID; const TCHAR* Latency; };
 	static const FLatencyEntry Entries[] =
 	{
-		// OpenAI
+		// ── ElevenLabs-hosted ─────────────────────────────────────────────
-		{ TEXT("gpt-4o-mini"),         TEXT("~350ms") },
+		{ TEXT("glm-4.5-air"),         TEXT("~949ms") },
-		{ TEXT("gpt-4o"),              TEXT("~700ms") },
+		{ TEXT("qwen3-30b-a3b"),       TEXT("~189ms") },
-		{ TEXT("gpt-4"),               TEXT("~900ms") },
+		{ TEXT("gpt-oss-120b"),        TEXT("~321ms") },
-		{ TEXT("gpt-4-turbo"),         TEXT("~650ms") },
+		// ── Google ────────────────────────────────────────────────────────
-		// Anthropic
+		{ TEXT("gemini-3-pro"),        TEXT("~3.5s")  },
-		{ TEXT("claude-sonnet-4-5"),   TEXT("~750ms") },
+		{ TEXT("gemini-3-flash"),      TEXT("~1.4s")  },
-		{ TEXT("claude-haiku-4-5"),    TEXT("~350ms") },
+		{ TEXT("gemini-2.5-flash"),    TEXT("~967ms") },
-		{ TEXT("claude-3-5-sonnet"),   TEXT("~700ms") },
+		{ TEXT("gemini-2.5-flash-lite"), TEXT("~605ms") },
-		// Google
+		// ── OpenAI ────────────────────────────────────────────────────────
-		{ TEXT("gemini-1.5-pro"),      TEXT("~500ms") },
+		{ TEXT("gpt-5"),               TEXT("~1.1s")  },
-		{ TEXT("gemini-2.0-flash"),    TEXT("~300ms") },
+		{ TEXT("gpt-5.1"),             TEXT("~980ms") },
-		{ TEXT("gemini-2.5-flash"),    TEXT("~250ms") },
+		{ TEXT("gpt-5.2"),             TEXT("~795ms") },
-		// xAI
+		{ TEXT("gpt-5-mini"),          TEXT("~884ms") },
-		{ TEXT("grok-beta"),           TEXT("~500ms") },
+		{ TEXT("gpt-5-nano"),          TEXT("~734ms") },
-		// ElevenLabs-hosted
+		{ TEXT("gpt-4.1"),             TEXT("~870ms") },
-		{ TEXT("qwen3-30b-a3b"),       TEXT("~207ms") },
+		{ TEXT("gpt-4.1-mini"),        TEXT("~916ms") },
-		{ TEXT("glm-4.5-air"),         TEXT("~980ms") },
+		{ TEXT("gpt-4.1-nano"),        TEXT("~574ms") },
-		{ TEXT("gpt-oss-120b"),        TEXT("~331ms") },
+		{ TEXT("gpt-4o"),              TEXT("~728ms") },
 		{ TEXT("gpt-4o-mini"),         TEXT("~767ms") },
 		{ TEXT("gpt-4-turbo"),         TEXT("~1.5s")  },
 		{ TEXT("gpt-3.5-turbo"),       TEXT("~458ms") },
 		// ── Anthropic ─────────────────────────────────────────────────────
 		{ TEXT("claude-sonnet-4-5"),   TEXT("~1.4s")  },
 		{ TEXT("claude-sonnet-4"),     TEXT("~1.1s")  },
 		{ TEXT("claude-haiku-4-5"),    TEXT("~644ms") },
 		{ TEXT("claude-3.7-sonnet"),   TEXT("~1.2s")  },
 		{ TEXT("claude-3-haiku"),      TEXT("~484ms") },
 		{ TEXT("claude-3-5-sonnet"),   TEXT("~1.2s")  },
 	};
 	for (const auto& E : Entries)
@@ -58,6 +68,22 @@ static FString GetLLMLatencyHint(const FString& ModelID)
 	return FString();
 }
 // Infer provider from model ID prefix for display grouping.
 static FString GetLLMProvider(const FString& ModelID)
 {
 	if (ModelID.StartsWith(TEXT("gpt-")) || ModelID.StartsWith(TEXT("o1")) || ModelID.StartsWith(TEXT("o3")))
 		return TEXT("OpenAI");
 	if (ModelID.StartsWith(TEXT("claude-")))
 		return TEXT("Anthropic");
 	if (ModelID.StartsWith(TEXT("gemini-")))
 		return TEXT("Google");
 	if (ModelID.StartsWith(TEXT("grok")))
 		return TEXT("xAI");
 	if (ModelID == TEXT("glm-4.5-air") || ModelID == TEXT("qwen3-30b-a3b") || ModelID == TEXT("gpt-oss-120b"))
 		return TEXT("ElevenLabs");
 	return FString();
 }
 // Language code → display name. Shared by BuildAgentPayload (to resolve
 // {Language} placeholder) and the fetch handler (to strip the resolved fragment).
 static FString GetLanguageDisplayName(const FString& LangCode)
@@ -332,9 +358,11 @@ void FPS_AI_ConvAgent_AgentConfigCustomization_ElevenLabs::CustomizeDetails(
 			.Font(IDetailLayoutBuilder::GetDetailFont())
 		]
 		.ValueContent()
 		.MaxDesiredWidth(600.f)
 		[
 			SNew(SBox)
 			.MinDesiredHeight(200.f)
 			.MinDesiredWidth(400.f)
 			[
 				SNew(SMultiLineEditableTextBox)
 				.Font(IDetailLayoutBuilder::GetDetailFont())
@@ -679,6 +707,10 @@ void FPS_AI_ConvAgent_AgentConfigCustomization_ElevenLabs::OnFetchLLMsClicked()
 			Pinned->LLMDisplayNames.Reset();
 			Pinned->LLMModelIDs.Reset();
 			// Collect models grouped by provider for sorted display.
 			struct FLLMEntry { FString ModelID; FString Provider; FString Display; bool bCheckpoint; };
 			TArray<FLLMEntry> AllEntries;
 			for (const auto& LLMVal : *LLMs)
 			{
 				const TSharedPtr<FJsonObject>* LLMObj = nullptr;
@@ -703,28 +735,62 @@ void FPS_AI_ConvAgent_AgentConfigCustomization_ElevenLabs::OnFetchLLMsClicked()
 					}
 				}
 				// Check if it's a checkpoint model (sub-version).
 				bool bIsCheckpoint = false;
 				(*LLMObj)->TryGetBoolField(TEXT("is_checkpoint"), bIsCheckpoint);
 				// Build display string: "model-id  (~350ms)" or "  model-id  (checkpoint, ~350ms)"
 				const FString Latency = GetLLMLatencyHint(ModelID);
 				const FString Provider = GetLLMProvider(ModelID);
 				// Build display: "  model-id  (checkpoint, ~350ms)" for checkpoints,
 				//                "model-id  (~350ms)" for main models.
 				FString Display;
 				if (bIsCheckpoint)
 				{
 					Display = Latency.IsEmpty()
-						? FString::Printf(TEXT("  %s  (checkpoint)"), *ModelID)
+						? FString::Printf(TEXT("    %s  (checkpoint)"), *ModelID)
-						: FString::Printf(TEXT("  %s  (checkpoint, %s)"), *ModelID, *Latency);
+						: FString::Printf(TEXT("    %s  (checkpoint, %s)"), *ModelID, *Latency);
 				}
 				else
 				{
 					Display = Latency.IsEmpty()
-						? ModelID
+						? FString::Printf(TEXT("  %s"), *ModelID)
-						: FString::Printf(TEXT("%s  (%s)"), *ModelID, *Latency);
+						: FString::Printf(TEXT("  %s  (%s)"), *ModelID, *Latency);
 				}
-				Pinned->LLMDisplayNames.Add(MakeShareable(new FString(Display)));
+				AllEntries.Add({ ModelID, Provider, Display, bIsCheckpoint });
-				Pinned->LLMModelIDs.Add(ModelID);
+			}
 			// Sort by provider order (ElevenLabs, Google, OpenAI, Anthropic, Other),
 			// then main models before checkpoints, then alphabetically.
 			static const TArray<FString> ProviderOrder = {
 				TEXT("ElevenLabs"), TEXT("Google"), TEXT("OpenAI"), TEXT("Anthropic"), TEXT("xAI")
 			};
 			AllEntries.Sort([](const FLLMEntry& A, const FLLMEntry& B)
 			{
 				int32 IdxA = ProviderOrder.IndexOfByKey(A.Provider);
 				int32 IdxB = ProviderOrder.IndexOfByKey(B.Provider);
 				if (IdxA == INDEX_NONE) IdxA = ProviderOrder.Num();
 				if (IdxB == INDEX_NONE) IdxB = ProviderOrder.Num();
 				if (IdxA != IdxB) return IdxA < IdxB;
 				if (A.bCheckpoint != B.bCheckpoint) return !A.bCheckpoint; // main first
 				return A.ModelID < B.ModelID;
 			});
 			// Insert provider headers as non-selectable separator entries.
 			FString LastProvider;
 			for (const auto& Entry : AllEntries)
 			{
 				const FString& Prov = Entry.Provider.IsEmpty() ? TEXT("Other") : Entry.Provider;
 				if (Prov != LastProvider)
 				{
 					// Header line: "── OpenAI ──" (not selectable — mapped to empty ModelID)
 					FString Header = FString::Printf(TEXT("── %s ──"), *Prov);
 					Pinned->LLMDisplayNames.Add(MakeShareable(new FString(Header)));
 					Pinned->LLMModelIDs.Add(FString()); // empty = separator
 					LastProvider = Prov;
 				}
 				Pinned->LLMDisplayNames.Add(MakeShareable(new FString(Entry.Display)));
 				Pinned->LLMModelIDs.Add(Entry.ModelID);
 			}
 			// Pre-select the currently set LLMModel if it exists in the list.
@@ -767,6 +833,9 @@ void FPS_AI_ConvAgent_AgentConfigCustomization_ElevenLabs::OnLLMSelected(
 	int32 Idx = LLMDisplayNames.IndexOfByKey(NewSelection);
 	if (Idx == INDEX_NONE) return;
 	// Separator headers have empty ModelID — ignore selection.
 	if (LLMModelIDs[Idx].IsEmpty()) return;
 	if (UPS_AI_ConvAgent_AgentConfig_ElevenLabs* Asset = GetEditedAsset())
 	{
 		Asset->Modify();