Add turn eagerness, speculative turn, adaptive pre-buffer, and latency HUD improvements

- Add TurnEagerness (Eager/Normal/Patient) and bSpeculativeTurn to agent config data asset, sent as conversation_config_override at WebSocket connection time - Add adaptive pre-buffer system: measures inter-chunk TTS timing and decreases pre-buffer when chunks arrive fast enough (decrease-only, resets each conversation) - New UPROPERTY: bAdaptivePreBuffer toggle, AudioPreBufferMs as starting/worst-case value - Rework latency HUD: TTS+Net, PreBuf actual/target with trend indicator, Gen>Ear, WS Ping, server region display - Fetch ElevenLabs server region from REST API x-region header - Add editor Detail Customization: TurnEagerness dropdown + SpeculativeTurn checkbox in AgentConfig with LLM picker and Language picker Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-06 16:43:20 +01:00
parent 2169c58cd7
commit 4456dfa9dc
9 changed files with 540 additions and 105 deletions
--- a/Unreal/PS_AI_Agent/Config/DefaultEngine.ini
+++ b/Unreal/PS_AI_Agent/Config/DefaultEngine.ini
@@ -1,8 +1,8 @@


 [/Script/EngineSettings.GameMapsSettings]
-GameDefaultMap=/Game/voidMap.voidMap
-EditorStartupMap=/Game/voidMap.voidMap
+GameDefaultMap=/PS_AI_ConvAgent/Demo_Metahuman.Demo_Metahuman
+EditorStartupMap=/PS_AI_ConvAgent/Demo_Metahuman.Demo_Metahuman

 [/Script/Engine.RendererSettings]
 r.AllowStaticLighting=False
@@ -182,4 +182,5 @@ ManualIPAddress=

 [/Script/PS_AI_ConvAgent.PS_AI_ConvAgent_Settings_ElevenLabs]
 API_Key=7b73c4244ccbec394cc010aaab01b0ec59ce0b11fc636ce4828354f675ca14a5
+ServerRegion=Global

--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_ElevenLabsComponent.cpp
@@ -17,6 +17,9 @@
 #include "GameFramework/PlayerController.h"
 #include "Net/UnrealNetwork.h"
 #include "VoiceModule.h"
+#include "HttpModule.h"
+#include "Interfaces/IHttpRequest.h"
+#include "Interfaces/IHttpResponse.h"

 DEFINE_LOG_CATEGORY_STATIC(LogPS_AI_ConvAgent_ElevenLabs, Log, All);

@@ -147,15 +150,17 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
 	if (bPreBuffering)
 	{
 		const double Elapsed = (FPlatformTime::Seconds() - PreBufferStartTime) * 1000.0;
-		if (Elapsed >= static_cast<double>(AudioPreBufferMs))
+		const int32 EffPreBuf = (AudioPreBufferMs > 0)
+			? (bAdaptivePreBuffer ? AdaptivePreBufferMs : AudioPreBufferMs) : 0;
+		if (Elapsed >= static_cast<double>(EffPreBuf))
 		{
 			bPreBuffering = false;
 			if (bDebug)
 			{
 				const double Tpb = FPlatformTime::Seconds() - SessionStartTime;
 				UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
-					TEXT("[T+%.2fs] [Turn %d] Pre-buffer timeout (%dms). Starting playback."),
-					Tpb, LastClosedTurnIndex, AudioPreBufferMs);
+					TEXT("[T+%.2fs] [Turn %d] Pre-buffer timeout (%dms adaptive). Starting playback."),
+					Tpb, LastClosedTurnIndex, EffPreBuf);
 			}
 			// Only start playback if the agent is still speaking.
 			// If silence detection already set bAgentSpeaking=false, this is stale.
@@ -292,6 +297,9 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
 	// Broadcast OUTSIDE the lock — Blueprint handlers can execute for arbitrary time.
 	if (bShouldBroadcastStopped)
 	{
+		// Adapt pre-buffer for next turn based on this turn's signals.
+		ApplyPreBufferAdaptation();
+
 		if (bHardTimeoutFired && bDebug)
 		{
 			const double Tht = FPlatformTime::Seconds() - SessionStartTime;
@@ -321,7 +329,10 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
 	{
 		const int32 CVarVal = CVarDebugLatency.GetValueOnGameThread();
 		const bool bShowLatency = (CVarVal >= 0) ? (CVarVal > 0) : bDebugLatency;
-		if (bShowLatency)
+		// Only draw on the active (connected) Authority component.
+		// Multiple agents in the scene would overwrite each other's HUD at the same
+		// BaseKey, causing visible blinking between their values.
+		if (bShowLatency && IsConnected() && GetOwnerRole() == ROLE_Authority)
 		{
 			DrawLatencyHUD();
 		}
@@ -388,6 +399,11 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::StartConversation_Internal()

 	// Pass configuration to the proxy before connecting.
 	WebSocketProxy->TurnMode = TurnMode;
+	if (AgentConfig)
+	{
+		WebSocketProxy->TurnEagerness = AgentConfig->TurnEagerness;
+		WebSocketProxy->bSpeculativeTurn = AgentConfig->bSpeculativeTurn;
+	}

 	// Resolve AgentID by priority: AgentConfig > component string > project default.
 	FString ResolvedAgentID = AgentID;
@@ -834,6 +850,13 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::HandleConnected(const FPS_AI_ConvAgen
 	SessionStartTime = FPlatformTime::Seconds();
 	TurnIndex = 0;
 	LastClosedTurnIndex = 0;
+
+	// Initialize adaptive pre-buffer from designer settings.
+	AdaptivePreBufferMs = AudioPreBufferMs;  // Start at the designer's value.
+	PreBufferTrend = 0;
+	TurnIdealPreBufferMs = -1;
+	bTurnGapMeasured = false;
+
 	UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log, TEXT("[T+0.00s] Agent connected. ConversationID=%s"), *Info.ConversationID);
 	OnAgentConnected.Broadcast(Info);

@@ -852,6 +875,17 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::HandleConnected(const FPS_AI_ConvAgen
 		}
 	}

+	// Probe server region once per session (only when latency HUD is enabled).
+	if (ServerRegion.IsEmpty() && GetOwnerRole() == ROLE_Authority)
+	{
+		const int32 CVarVal = CVarDebugLatency.GetValueOnGameThread();
+		const bool bShowLatency = (CVarVal >= 0) ? (CVarVal > 0) : bDebugLatency;
+		if (bShowLatency)
+		{
+			FetchServerRegion();
+		}
+	}
+
 	// In Client turn mode (push-to-talk), the user controls listening manually via
 	// StartListening()/StopListening(). Auto-starting would leave the mic open
 	// permanently and interfere with push-to-talk — the T-release StopListening()
@@ -1081,21 +1115,28 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::HandleAgentResponseStarted()
 	// In Server VAD mode, StopListening() is not called — the server detects
 	// end of user speech and immediately starts generating. If TurnEndTime was
 	// not set by StopListening since the last generation (i.e. it's stale or 0),
-	// use Now as the best client-side approximation.
+	// use the proxy's LastUserTranscriptTime as the best approximation:
+	// user_transcript arrives after server VAD + ASR, just before LLM starts.
 	const bool bFreshTurnEnd = (TurnEndTime > GenerationStartTime) && (GenerationStartTime > 0.0);
 	if (!bFreshTurnEnd)
 	{
-		TurnEndTime = Now;
+		const double TranscriptTime = WebSocketProxy ? WebSocketProxy->GetLastUserTranscriptTime() : 0.0;
+		TurnEndTime = (TranscriptTime > 0.0) ? TranscriptTime : Now;
 	}

-	// Reset all latency measurements — new response cycle starts here.
-	// All metrics are anchored to GenerationStartTime (= now), which is the closest
-	// client-side proxy for "user stopped speaking" in Server VAD mode.
-	CurrentLatencies = FDebugLatencies();
+	// New response cycle starts here. All client-side metrics are anchored to
+	// GenerationStartTime (= now). Do NOT zero CurrentLatencies — the per-field
+	// assignments in EnqueueAgentAudio() overwrite naturally, so the HUD shows the
+	// previous turn's values until the new turn's measurements arrive (no "---" blink).
 	GenerationStartTime = Now;

 	const double T = Now - SessionStartTime;
 	const double LatencyFromTurnEnd = Now - TurnEndTime;
+
+	// LLM latency: time from user_transcript received to first text token arriving.
+	// In Server VAD mode, this approximates LLM TTFT + network (post-ASR).
+	// In Client turn mode, this is the full ASR + LLM latency.
+	CurrentLatencies.TurnEndToTextMs = static_cast<float>(LatencyFromTurnEnd * 1000.0);
 	if (bIsListening)
 	{
 		// In Server VAD + interruption mode, keep the mic open so the server can
@@ -1321,7 +1362,10 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::OnProceduralUnderflow(
 			AudioQueueReadOffset = 0;
 		}

-		// Log when queue recovers (new data arrived after being dry)
+		// Queue recovered: was dry, now has data again.
+		// Only flag as underrun if the gap was long enough to be audible.
+		// Short gaps (<200ms) are handled seamlessly by USoundWaveProcedural's
+		// internal silence — no need to increase the pre-buffer for those.
 		if (bQueueWasDry)
 		{
 			bQueueWasDry = false;
@@ -1329,7 +1373,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::OnProceduralUnderflow(
 			{
 				const double T = FPlatformTime::Seconds() - SessionStartTime;
 				UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
-					TEXT("[T+%.2fs] [Turn %d] AudioQueue recovered — feeding real data again (%d bytes remaining)."),
+					TEXT("[T+%.2fs] [Turn %d] AudioQueue recovered (%d bytes remaining)."),
 					T, LastClosedTurnIndex, AudioQueue.Num() - AudioQueueReadOffset);
 			}
 		}
@@ -1371,6 +1415,11 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
 		bAgentResponseReceived = false; // Reset: wait for agent_response before allowing StopSpeaking.
 		bQueueWasDry = false;
 		SilentTickCount = 0;
+		// Adaptive pre-buffer: record first chunk timing for inter-chunk gap measurement.
+		TurnFirstChunkTime = FPlatformTime::Seconds();
+		TurnFirstChunkBytes = PCMData.Num();
+		TurnIdealPreBufferMs = -1;
+		bTurnGapMeasured = false;

 		// Latency capture (always, for HUD display).
 		if (GenerationStartTime > 0.0)
@@ -1393,7 +1442,9 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
 			MulticastAgentStartedSpeaking();
 		}

-		if (AudioPreBufferMs > 0)
+		const int32 EffectivePreBufferMs = (AudioPreBufferMs > 0)
+			? (bAdaptivePreBuffer ? AdaptivePreBufferMs : AudioPreBufferMs) : 0;
+		if (EffectivePreBufferMs > 0)
 		{
 			// Pre-buffer: accumulate audio before starting playback.
 			// This absorbs TTS inter-chunk gaps so chunk 2 arrives before
@@ -1404,8 +1455,8 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
 			{
 				const double Tpb2 = FPlatformTime::Seconds() - SessionStartTime;
 				UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
-					TEXT("[T+%.2fs] [Turn %d] Pre-buffering %dms before starting playback."),
-					Tpb2, LastClosedTurnIndex, AudioPreBufferMs);
+					TEXT("[T+%.2fs] [Turn %d] Pre-buffering %dms (adaptive) before starting playback."),
+					Tpb2, LastClosedTurnIndex, EffectivePreBufferMs);
 			}
 		}
 		else
@@ -1433,14 +1484,25 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
 		if (GetOwnerRole() == ROLE_Authority)
 		{
 			bPreBuffering = false;
+			// Measure inter-chunk gap for adaptive pre-buffer.
+			if (!bTurnGapMeasured && TurnFirstChunkTime > 0.0)
+			{
+				const double NowGap = FPlatformTime::Seconds();
+				const double InterChunkGapMs = (NowGap - TurnFirstChunkTime) * 1000.0;
+				// Chunk 1 audio duration: 16kHz 16-bit mono = 32000 bytes/sec.
+				const double Chunk1AudioMs = (TurnFirstChunkBytes > 0)
+					? (static_cast<double>(TurnFirstChunkBytes) / 32.0) : 0.0;
+				TurnIdealPreBufferMs = FMath::Max(0, FMath::RoundToInt32(InterChunkGapMs - Chunk1AudioMs));
+				bTurnGapMeasured = true;
+			}
 			if (bDebug)
 			{
 				const double NowPb = FPlatformTime::Seconds();
 				const double BufferedMs = (NowPb - PreBufferStartTime) * 1000.0;
 				const double Tpb3 = NowPb - SessionStartTime;
 				UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
-					TEXT("[T+%.2fs] [Turn %d] Pre-buffer: second chunk arrived (%.0fms buffered). Starting playback."),
-					Tpb3, LastClosedTurnIndex, BufferedMs);
+					TEXT("[T+%.2fs] [Turn %d] Pre-buffer: second chunk arrived (%.0fms buffered, ideal=%dms). Starting playback."),
+					Tpb3, LastClosedTurnIndex, BufferedMs, TurnIdealPreBufferMs);
 			}
 			if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
 			{
@@ -1467,6 +1529,23 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
 		{
 			AudioPlaybackComponent->Play();
 		}
+		// Measure inter-chunk gap for adaptive pre-buffer (first gap only).
+		if (!bTurnGapMeasured && TurnFirstChunkTime > 0.0 && GetOwnerRole() == ROLE_Authority)
+		{
+			const double NowGap = FPlatformTime::Seconds();
+			const double InterChunkGapMs = (NowGap - TurnFirstChunkTime) * 1000.0;
+			const double Chunk1AudioMs = (TurnFirstChunkBytes > 0)
+				? (static_cast<double>(TurnFirstChunkBytes) / 32.0) : 0.0;
+			TurnIdealPreBufferMs = FMath::Max(0, FMath::RoundToInt32(InterChunkGapMs - Chunk1AudioMs));
+			bTurnGapMeasured = true;
+			if (bDebug)
+			{
+				const double T = NowGap - SessionStartTime;
+				UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
+					TEXT("[T+%.2fs] [Turn %d] Inter-chunk gap: %.0fms, chunk1 audio: %.0fms → ideal pre-buffer: %dms"),
+					T, LastClosedTurnIndex, InterChunkGapMs, Chunk1AudioMs, TurnIdealPreBufferMs);
+			}
+		}
 		// Reset silence counter — new audio arrived, we're not in a gap anymore
 		SilentTickCount = 0;
 	}
@@ -1516,6 +1595,9 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::StopAgentAudio()
 	// Broadcast outside the lock.
 	if (bWasSpeaking)
 	{
+		// Adapt pre-buffer for next turn based on this turn's signals.
+		ApplyPreBufferAdaptation();
+
 		if (bDebug)
 		{
 			const double T = Now - SessionStartTime;
@@ -1536,6 +1618,52 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::StopAgentAudio()
 	}
 }

+void UPS_AI_ConvAgent_ElevenLabsComponent::ApplyPreBufferAdaptation()
+{
+	// Only adapt on Authority (where the WebSocket lives and measurements are taken).
+	if (GetOwnerRole() != ROLE_Authority) return;
+	// Adaptive mode must be enabled, and pre-buffering must be active.
+	if (!bAdaptivePreBuffer || AudioPreBufferMs == 0) return;
+	// No measurement this turn (single-chunk response or no second chunk arrived).
+	if (TurnIdealPreBufferMs < 0) { PreBufferTrend = 0; return; }
+
+	const int32 Prev = AdaptivePreBufferMs;
+
+	// DECREASE-ONLY: the measured ideal tells us the minimum pre-buffer needed.
+	// If the ideal is lower than our current value, the connection is fast enough
+	// that we can reduce the pre-buffer and save latency.
+	// If the ideal is higher (e.g. natural speech pause, slow network), we do NOT
+	// increase — USoundWaveProcedural handles gaps seamlessly in most cases.
+	// The user sets AudioPreBufferMs as the "worst case" starting value;
+	// the system only optimizes downward from there. Resets each conversation.
+	if (TurnIdealPreBufferMs < AdaptivePreBufferMs)
+	{
+		// Ideal is lower — decrease toward it (EMA 30% per turn, with 50ms margin).
+		const int32 TargetMs = FMath::Max(AdaptivePreBufferMinMs, TurnIdealPreBufferMs + 50);
+		AdaptivePreBufferMs = FMath::Max(AdaptivePreBufferMinMs,
+			FMath::RoundToInt32(AdaptivePreBufferMs * 0.7f + TargetMs * 0.3f));
+		PreBufferTrend = (AdaptivePreBufferMs < Prev) ? -1 : 0;
+	}
+	else
+	{
+		// Ideal >= current — connection is same or worse, keep current value.
+		PreBufferTrend = 0;
+	}
+
+	// Reset measurement for next turn.
+	const int32 IdealForLog = TurnIdealPreBufferMs;
+	TurnIdealPreBufferMs = -1;
+	bTurnGapMeasured = false;
+
+	if (bDebug && Prev != AdaptivePreBufferMs)
+	{
+		const double T = FPlatformTime::Seconds() - SessionStartTime;
+		UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
+			TEXT("[T+%.2fs] [Turn %d] Adaptive pre-buffer: %d ms -> %d ms (ideal=%dms)"),
+			T, LastClosedTurnIndex, Prev, AdaptivePreBufferMs, IdealForLog);
+	}
+}
+
 // ─────────────────────────────────────────────────────────────────────────────
 // Microphone → WebSocket
 // ─────────────────────────────────────────────────────────────────────────────
@@ -2404,6 +2532,42 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::DrawDebugHUD() const
 			bWantsReconnect ? TEXT(" (ACTIVE)") : TEXT("")));
 }

+// ─────────────────────────────────────────────────────────────────────────────
+// Server region detection (one-shot HTTP probe)
+// ─────────────────────────────────────────────────────────────────────────────
+void UPS_AI_ConvAgent_ElevenLabsComponent::FetchServerRegion()
+{
+	const UPS_AI_ConvAgent_Settings_ElevenLabs* Settings = FPS_AI_ConvAgentModule::Get().GetSettings();
+	if (!Settings || Settings->API_Key.IsEmpty()) return;
+
+	auto Request = FHttpModule::Get().CreateRequest();
+	Request->SetURL(Settings->GetAPIBaseURL() + TEXT("/v1/models"));
+	Request->SetVerb(TEXT("GET"));
+	Request->SetHeader(TEXT("xi-api-key"), Settings->API_Key);
+
+	TWeakObjectPtr<UPS_AI_ConvAgent_ElevenLabsComponent> WeakThis(this);
+	Request->OnProcessRequestComplete().BindLambda(
+		[WeakThis](FHttpRequestPtr /*Req*/, FHttpResponsePtr Resp, bool bSuccess)
+		{
+			if (!bSuccess || !Resp.IsValid()) return;
+			const FString Region = Resp->GetHeader(TEXT("x-region"));
+			if (Region.IsEmpty()) return;
+
+			AsyncTask(ENamedThreads::GameThread, [WeakThis, Region]()
+			{
+				if (WeakThis.IsValid())
+				{
+					WeakThis->ServerRegion = Region;
+					UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log, TEXT("ElevenLabs server region: %s"), *Region);
+				}
+			});
+		});
+	Request->ProcessRequest();
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Latency debug HUD
+// ─────────────────────────────────────────────────────────────────────────────
 void UPS_AI_ConvAgent_ElevenLabsComponent::DrawLatencyHUD() const
 {
 	if (!GEngine) return;
@@ -2416,25 +2580,58 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::DrawLatencyHUD() const
 	const FColor ValueColor     = FColor::White;
 	const FColor HighlightColor = FColor::Yellow;

-	// Helper: format a single metric — shows "---" when not yet captured this turn
 	auto Fmt = [](float Ms) -> FString
 	{
 		return (Ms > 0.0f) ? FString::Printf(TEXT("%.0f ms"), Ms) : FString(TEXT("---"));
 	};

-	// Title — all times measured from agent_response_started
-	GEngine->AddOnScreenDebugMessage(BaseKey, DisplayTime, TitleColor,
-		TEXT("=== Latency (from gen start) ==="));
+	int32 Row = 0;

-	// 1. Gen → Audio: generation start → first audio chunk (LLM + TTS)
-	GEngine->AddOnScreenDebugMessage(BaseKey + 1, DisplayTime, ValueColor,
-		FString::Printf(TEXT("  Gen>Audio:      %s"), *Fmt(CurrentLatencies.GenToAudioMs)));
+	GEngine->AddOnScreenDebugMessage(BaseKey + Row++, DisplayTime, TitleColor,
+		TEXT("=== Voice-to-Voice Latency ==="));

-	// 2. Pre-buffer wait before playback
-	GEngine->AddOnScreenDebugMessage(BaseKey + 2, DisplayTime, ValueColor,
+	// Client-side breakdown: TTS+Net + Pre-buffer = Gen>Ear
+	// Note: LLM latency is only visible on ElevenLabs dashboard (server-side).
+	// In Server VAD mode, no reliable client-side "end of user speech" marker exists.
+	GEngine->AddOnScreenDebugMessage(BaseKey + Row++, DisplayTime, ValueColor,
+		FString::Printf(TEXT("  TTS+Net:        %s"), *Fmt(CurrentLatencies.GenToAudioMs)));
+
+	// Pre-buffer display depends on adaptive mode.
+	if (bAdaptivePreBuffer && AudioPreBufferMs > 0)
+	{
+		// Adaptive ON: show actual wait + adaptive target with trend arrow.
+		GEngine->AddOnScreenDebugMessage(BaseKey + Row++, DisplayTime, ValueColor,
+			FString::Printf(TEXT("  PreBuf actual:  %s"), *Fmt(CurrentLatencies.PreBufferMs)));
+
+		const TCHAR* TrendArrow = (PreBufferTrend > 0) ? TEXT(" ^")
+		                         : (PreBufferTrend < 0) ? TEXT(" v")
+		                         : TEXT("");
+		const FColor AdaptiveColor = (PreBufferTrend > 0) ? FColor::Red
+		                            : (PreBufferTrend < 0) ? FColor::Green
+		                            : ValueColor;
+		GEngine->AddOnScreenDebugMessage(BaseKey + Row++, DisplayTime, AdaptiveColor,
+			FString::Printf(TEXT("  PreBuf target:  %d ms%s"), AdaptivePreBufferMs, TrendArrow));
+	}
+	else
+	{
+		// Adaptive OFF (or pre-buffer disabled): show fixed pre-buffer value.
+		GEngine->AddOnScreenDebugMessage(BaseKey + Row++, DisplayTime, ValueColor,
 			FString::Printf(TEXT("  Pre-buffer:     %s"), *Fmt(CurrentLatencies.PreBufferMs)));
+	}

-	// 3. Gen → Ear: generation start → playback starts (user-perceived total)
-	GEngine->AddOnScreenDebugMessage(BaseKey + 3, DisplayTime, HighlightColor,
+	GEngine->AddOnScreenDebugMessage(BaseKey + Row++, DisplayTime, HighlightColor,
 		FString::Printf(TEXT("  Gen>Ear:        %s"), *Fmt(CurrentLatencies.GenToEarMs)));
+
+	// Connection section
+	GEngine->AddOnScreenDebugMessage(BaseKey + Row++, DisplayTime, TitleColor,
+		TEXT("--- Connection ---"));
+
+	const int32 PingMs = WebSocketProxy ? WebSocketProxy->GetLastPingMs() : -1;
+	GEngine->AddOnScreenDebugMessage(BaseKey + Row++, DisplayTime, ValueColor,
+		FString::Printf(TEXT("  WS Ping:        %s"),
+			(PingMs >= 0) ? *FString::Printf(TEXT("%d ms"), PingMs) : TEXT("---")));
+
+	GEngine->AddOnScreenDebugMessage(BaseKey + Row++, DisplayTime, ValueColor,
+		FString::Printf(TEXT("  Region:         %s"),
+			ServerRegion.IsEmpty() ? TEXT("---") : *ServerRegion));
 }
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_WebSocket_ElevenLabsProxy.cpp
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Private/PS_AI_ConvAgent_WebSocket_ElevenLabsProxy.cpp
@@ -207,41 +207,58 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::OnWsConnected()
 	// This produces smooth continuous audio chunks without the fragmentation caused by
 	// explicit optimize_streaming_latency or enable_intermediate_response overrides.
 	//
-	// In Client (push-to-talk) mode only, we override turn_timeout to reduce latency.
-	// In Server VAD mode, the config override is empty (matches C++ sample exactly).
+	// Build turn configuration based on mode + latency settings.
 	TSharedPtr<FJsonObject> ConversationConfigOverride = MakeShareable(new FJsonObject());

+	{
+		TSharedPtr<FJsonObject> TurnObj = MakeShareable(new FJsonObject());
+		bool bHasTurnOverrides = false;
+
+		// In Client (push-to-talk) mode, reduce turn_timeout to minimize latency.
 		if (TurnMode == EPS_AI_ConvAgent_TurnMode_ElevenLabs::Client)
 		{
-		// turn_timeout: how long the server waits after VAD detects silence before
-		// processing the user's turn. Default is ~3s. In push-to-talk mode this
-		// directly adds latency — the server waits after the user releases T.
-		// 1s is safe without speculative_turn (which was removed — see history below).
-		//
-		// History:
-		//   turn_timeout=1 was problematic when combined with speculative_turn=true
-		//   (server silently dropped turns 3+). Without speculative_turn, 1s is safe
-		//   and halves the per-turn latency.
-		TSharedPtr<FJsonObject> TurnObj = MakeShareable(new FJsonObject());
 			TurnObj->SetNumberField(TEXT("turn_timeout"), 1);
+			bHasTurnOverrides = true;
+		}

+		// turn_eagerness: controls how quickly the server interprets pauses as end-of-speech.
+		// "eager" = fastest (may cut user off), "normal" = balanced, "patient" = waits longer.
+		if (TurnEagerness != EPS_AI_ConvAgent_TurnEagerness_ElevenLabs::Normal)
+		{
+			FString EagernessStr;
+			switch (TurnEagerness)
+			{
+			case EPS_AI_ConvAgent_TurnEagerness_ElevenLabs::Eager:   EagernessStr = TEXT("eager");   break;
+			case EPS_AI_ConvAgent_TurnEagerness_ElevenLabs::Patient: EagernessStr = TEXT("patient"); break;
+			default:                                                  EagernessStr = TEXT("normal");  break;
+			}
+			TurnObj->SetStringField(TEXT("turn_eagerness"), EagernessStr);
+			bHasTurnOverrides = true;
+		}
+
+		// speculative_turn: start generating a response before confirming end-of-speech.
+		// Reduces latency but may cause occasional false starts (discarded if user continues).
+		if (bSpeculativeTurn)
+		{
+			TurnObj->SetBoolField(TEXT("speculative_turn"), true);
+			bHasTurnOverrides = true;
+		}
+
+		if (bHasTurnOverrides)
+		{
 			TSharedPtr<FJsonObject> AgentObj = MakeShareable(new FJsonObject());
 			AgentObj->SetObjectField(TEXT("turn"), TurnObj);
-
 			ConversationConfigOverride->SetObjectField(TEXT("agent"), AgentObj);
 		}
+	}

 	// NOTE: We intentionally do NOT send these overrides (matching C++ sample):
 	//
-	// - tts.optimize_streaming_latency: Explicitly sending ANY value (even 0) changes
-	//   the TTS chunking behaviour vs server defaults. The C++ sample omits this entirely.
-	//   With value 3: many tiny chunks with 500ms-2s gaps (requires heavy buffering).
-	//   With value 0: fewer larger chunks but ~3s inter-chunk gaps (still causes gaps).
-	//   Server default (omitted): produces smooth continuous audio (no gaps in C++ sample).
+	// - tts.optimize_streaming_latency: deprecated by ElevenLabs. Sending any value
+	//   changes TTS chunking behaviour. Server default (omitted) is optimal.
 	//
 	// - custom_llm_extra_body.enable_intermediate_response: When true, the LLM speaks
-	//   before finishing generation → fragmented audio. When omitted (C++ sample), the
-	//   LLM completes its response first → continuous TTS chunks.
+	//   before finishing generation → fragmented audio. Omitted = server default.
 	//
 	// - custom_llm_extra_body (empty object): Even an empty object might override the
 	//   agent's configured custom_llm_extra_body with nothing. Omit entirely.
@@ -259,12 +276,15 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::OnWsConnected()
 	FJsonSerializer::Serialize(InitMsg.ToSharedRef(), InitWriter);
 	{
 		const UPS_AI_ConvAgent_Settings_ElevenLabs* S = FPS_AI_ConvAgentModule::Get().GetSettings();
-		if (S->bVerboseLogging)
+		if (S && S->bVerboseLogging)
 		{
 			UE_LOG(LogPS_AI_ConvAgent_WS_ElevenLabs, Verbose, TEXT("Sending initiation: %s"), *InitJson);
 		}
 	}
+	if (WebSocket.IsValid())
+	{
 		WebSocket->Send(InitJson);
+	}
 }

 void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::OnWsConnectionError(const FString& Error)
@@ -507,6 +527,10 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::HandleTranscript(const TSharedP
 		return;
 	}

+	// Record arrival time for latency measurement (ASR+LLM breakdown).
+	// user_transcript arrives after server VAD + ASR, just before LLM starts.
+	LastUserTranscriptTime = FPlatformTime::Seconds();
+
 	FPS_AI_ConvAgent_TranscriptSegment_ElevenLabs Segment;
 	Segment.Speaker = TEXT("user");
 	(*TranscriptEvent)->TryGetStringField(TEXT("user_transcript"), Segment.Text);
@@ -679,6 +703,13 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::HandlePing(const TSharedPtr<FJs
 	if (Root->TryGetObjectField(TEXT("ping_event"), PingEvent) && PingEvent)
 	{
 		(*PingEvent)->TryGetNumberField(TEXT("event_id"), EventID);
+
+		// Extract server-reported WS round-trip latency.
+		int32 PingValue = 0;
+		if ((*PingEvent)->TryGetNumberField(TEXT("ping_ms"), PingValue))
+		{
+			LastPingMs.store(PingValue, std::memory_order_relaxed);
+		}
 	}

 	TSharedPtr<FJsonObject> Pong = MakeShareable(new FJsonObject());
@@ -718,7 +749,7 @@ FString UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::BuildWebSocketURL(const FStr
 {
 	const UPS_AI_ConvAgent_Settings_ElevenLabs* Settings = FPS_AI_ConvAgentModule::Get().GetSettings();

-	// Custom URL override takes full precedence
+	// Custom URL override takes full precedence (advanced / proxy use case)
 	if (!Settings->CustomWebSocketURL.IsEmpty())
 	{
 		return Settings->CustomWebSocketURL;
@@ -730,9 +761,9 @@ FString UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::BuildWebSocketURL(const FStr
 		return FString();
 	}

-	// Official ElevenLabs Conversational AI WebSocket endpoint
-	// wss://api.elevenlabs.io/v1/convai/conversation?agent_id=<ID>
+	// Build URL from the region-aware base: wss://<regional-host>/v1/convai/conversation?agent_id=<ID>
+	const FString BaseURL = Settings->GetWSBaseURL();
 	return FString::Printf(
-		TEXT("wss://api.elevenlabs.io/v1/convai/conversation?agent_id=%s"),
-		*AgentIDOverride);
+		TEXT("%s/v1/convai/conversation?agent_id=%s"),
+		*BaseURL, *AgentIDOverride);
 }
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent.h
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent.h
@@ -6,6 +6,22 @@
 #include "Modules/ModuleManager.h"
 #include "PS_AI_ConvAgent.generated.h"

+// ─────────────────────────────────────────────────────────────────────────────
+// ElevenLabs server region
+// ─────────────────────────────────────────────────────────────────────────────
+UENUM()
+enum class EPS_AI_ConvAgent_ElevenLabsRegion : uint8
+{
+	/** Automatic global routing (default). Server chosen by ElevenLabs based on client location. */
+	Global		UMETA(DisplayName = "Global (auto)"),
+	/** Force US servers: api.us.elevenlabs.io */
+	US			UMETA(DisplayName = "US"),
+	/** Force EU servers (Enterprise only): api.eu.residency.elevenlabs.io */
+	EU			UMETA(DisplayName = "EU (Enterprise)"),
+	/** Force India servers (Enterprise only): api.in.residency.elevenlabs.io */
+	India		UMETA(DisplayName = "India (Enterprise)")
+};
+
 // ─────────────────────────────────────────────────────────────────────────────
 // Settings object – exposed in Project Settings → Plugins → PS AI ConvAgent - ElevenLabs
 // ─────────────────────────────────────────────────────────────────────────────
@@ -24,8 +40,17 @@ public:
 	FString API_Key;

 	/**
-	 * Override the ElevenLabs WebSocket base URL. Leave empty to use the default:
-	 *   wss://api.elevenlabs.io/v1/convai/conversation
+	 * Server region for ElevenLabs API.
+	 * - Global (default): automatic routing based on client location.
+	 * - US: force US servers (api.us.elevenlabs.io).
+	 * - EU / India: Enterprise-only data residency endpoints.
+	 */
+	UPROPERTY(Config, EditAnywhere, Category = "PS AI ConvAgent|ElevenLabs API")
+	EPS_AI_ConvAgent_ElevenLabsRegion ServerRegion = EPS_AI_ConvAgent_ElevenLabsRegion::Global;
+
+	/**
+	 * Override the ElevenLabs WebSocket URL entirely. Leave empty to use ServerRegion setting.
+	 * Example: wss://custom-proxy.example.com/v1/convai/conversation?agent_id=YOUR_ID
 	 */
 	UPROPERTY(Config, EditAnywhere, AdvancedDisplay, Category = "PS AI ConvAgent|ElevenLabs API")
 	FString CustomWebSocketURL;
@@ -33,6 +58,30 @@ public:
 	/** Log verbose WebSocket messages to the Output Log (useful during development). */
 	UPROPERTY(Config, EditAnywhere, AdvancedDisplay, Category = "PS AI ConvAgent|ElevenLabs API")
 	bool bVerboseLogging = false;
+
+	/** Return the API base URL (https) for the selected region. */
+	FString GetAPIBaseURL() const
+	{
+		switch (ServerRegion)
+		{
+		case EPS_AI_ConvAgent_ElevenLabsRegion::US:    return TEXT("https://api.us.elevenlabs.io");
+		case EPS_AI_ConvAgent_ElevenLabsRegion::EU:    return TEXT("https://api.eu.residency.elevenlabs.io");
+		case EPS_AI_ConvAgent_ElevenLabsRegion::India: return TEXT("https://api.in.residency.elevenlabs.io");
+		default:                                        return TEXT("https://api.elevenlabs.io");
+		}
+	}
+
+	/** Return the WebSocket base URL (wss) for the selected region. */
+	FString GetWSBaseURL() const
+	{
+		switch (ServerRegion)
+		{
+		case EPS_AI_ConvAgent_ElevenLabsRegion::US:    return TEXT("wss://api.us.elevenlabs.io");
+		case EPS_AI_ConvAgent_ElevenLabsRegion::EU:    return TEXT("wss://api.eu.residency.elevenlabs.io");
+		case EPS_AI_ConvAgent_ElevenLabsRegion::India: return TEXT("wss://api.in.residency.elevenlabs.io");
+		default:                                        return TEXT("wss://api.elevenlabs.io");
+		}
+	}
 };


--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_AgentConfig_ElevenLabs.h
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_AgentConfig_ElevenLabs.h
@@ -4,6 +4,7 @@

 #include "CoreMinimal.h"
 #include "Engine/DataAsset.h"
+#include "PS_AI_ConvAgent_Definitions.h"
 #include "PS_AI_ConvAgent_AgentConfig_ElevenLabs.generated.h"

 /**
@@ -186,6 +187,24 @@ public:
 		ToolTip = "Max conversation turns.\n0 = unlimited."))
 	int32 MaxTurns = 0;

+	// ── Latency / Turn-taking ───────────────────────────────────────────────
+
+	/** How quickly the server detects end-of-speech and starts responding.
+	 *  Eager = fastest response, may cut the user off during pauses.
+	 *  Normal = balanced (default). Patient = waits longer for user to finish.
+	 *  Sent as conversation_config_override at WebSocket connection time. */
+	UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Latency",
+		meta = (ToolTip = "Controls how quickly the server detects end-of-speech.\n- Eager: fastest response, may interrupt mid-pause.\n- Normal: balanced (default).\n- Patient: waits longer for user to finish."))
+	EPS_AI_ConvAgent_TurnEagerness_ElevenLabs TurnEagerness = EPS_AI_ConvAgent_TurnEagerness_ElevenLabs::Normal;
+
+	/** Enable speculative turn processing: the server starts generating a response
+	 *  before it's certain the user has finished speaking. If the user continues,
+	 *  the speculative response is discarded. Reduces perceived latency.
+	 *  May cause occasional false starts — disable if the agent interrupts too often. */
+	UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Latency",
+		meta = (ToolTip = "Start generating a response before confirming end-of-speech.\nReduces latency but may cause occasional false starts.\nDisable if the agent interrupts the user too often."))
+	bool bSpeculativeTurn = false;
+
 	// ── Emotion Tool ─────────────────────────────────────────────────────────

 	/** Include the built-in "set_emotion" client tool in the agent configuration.
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_Definitions.h
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_Definitions.h
@@ -29,6 +29,20 @@ enum class EPS_AI_ConvAgent_TurnMode_ElevenLabs : uint8
 	Client		UMETA(DisplayName = "Client Controlled"),
 };

+// ─────────────────────────────────────────────────────────────────────────────
+// Agent turn eagerness — controls how quickly the server detects end of speech
+// ─────────────────────────────────────────────────────────────────────────────
+UENUM(BlueprintType)
+enum class EPS_AI_ConvAgent_TurnEagerness_ElevenLabs : uint8
+{
+	/** Quick response at the earliest opportunity. Best for customer service. */
+	Eager		UMETA(DisplayName = "Eager"),
+	/** Balanced turn-taking for general scenarios (default). */
+	Normal		UMETA(DisplayName = "Normal"),
+	/** Longer wait for user to finish. Best for information collection. */
+	Patient		UMETA(DisplayName = "Patient"),
+};
+
 // ─────────────────────────────────────────────────────────────────────────────
 // WebSocket message type helpers (internal, not exposed to Blueprint)
 // ─────────────────────────────────────────────────────────────────────────────
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_ElevenLabsComponent.h
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_ElevenLabsComponent.h
@@ -185,14 +185,24 @@ public:
 		meta = (ToolTip = "Fire OnAgentPartialResponse with streaming text fragments as the LLM generates them.\nIdeal for real-time subtitles. Each event gives one text chunk, not the accumulated text."))
 	bool bEnableAgentPartialResponse = false;

-	/** Pre-buffer delay (ms) before starting audio playback on the first chunk.
-	 *  Delays playback start so early TTS chunks can accumulate, preventing
-	 *  mid-sentence pauses when the second chunk hasn't arrived yet.
-	 *  Set to 0 for immediate playback. */
+	/** Pre-buffer delay (ms) before starting audio playback on the first TTS chunk.
+	 *  Set this to your "worst case" value (e.g. 300-1000ms depending on connection).
+	 *  When adaptive mode is on, the system starts here and can only decrease
+	 *  (never increase) as it measures that chunks arrive fast enough.
+	 *  Set to 0 to disable pre-buffering entirely. */
 	UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|ElevenLabs|Latency",
 		meta = (ClampMin = "0", ClampMax = "4000",
-		ToolTip = "Pre-buffer delay in ms before starting audio playback.\nHigher values reduce mid-sentence pauses but add initial latency.\n0 = immediate playback."))
-	int32 AudioPreBufferMs = 2000;
+		ToolTip = "Pre-buffer delay (ms) — your safe 'worst case' value.\nAdaptive mode can only decrease from here, never increase.\nSet 0 to disable pre-buffering entirely."))
+	int32 AudioPreBufferMs = 300;
+
+	/** Enable adaptive pre-buffer: measures inter-chunk timing and automatically
+	 *  lowers the pre-buffer when TTS chunks arrive fast enough.
+	 *  The system can only decrease from AudioPreBufferMs — never increase.
+	 *  Resets to AudioPreBufferMs at the start of each conversation. */
+	UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|ElevenLabs|Latency",
+		meta = (EditCondition = "AudioPreBufferMs > 0",
+		ToolTip = "Automatically lower pre-buffer when connection is good.\nCan only decrease, never increase beyond AudioPreBufferMs.\nResets each conversation."))
+	bool bAdaptivePreBuffer = true;

 	/** Safety timeout: if the server does not start generating a response within this many seconds after the user stops speaking, fire OnAgentResponseTimeout. Set to 0 to disable. A normal response starts within 0.1-0.8s. */
 	UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|ElevenLabs",
@@ -640,18 +650,23 @@ private:
 	double GenerationStartTime = 0.0; // Set in HandleAgentResponseStarted — server starts generating.
 	double PlaybackStartTime   = 0.0; // Set when audio playback actually starts (post pre-buffer).

-	// Current-turn latency measurements (ms). Reset in HandleAgentResponseStarted.
-	// All anchored to GenerationStartTime (agent_response_started event), which is
-	// the closest client-side proxy for "user stopped speaking" in Server VAD mode.
-	// Zero means "not yet measured this turn".
+	// Current-turn latency measurements (ms). Overwritten per-field as each
+	// measurement is captured — NOT reset to zero between turns, so the HUD
+	// always shows the most recent value instead of blinking "---".
+	// All anchored to GenerationStartTime (agent_response_started event).
 	struct FDebugLatencies
 	{
-		float GenToAudioMs  = 0.0f; // agent_response_started → first audio chunk (LLM + TTS)
+		float TurnEndToTextMs = 0.0f; // user turn end → first text from LLM (≈ ASR + LLM TTFT)
+		float GenToAudioMs  = 0.0f; // agent_response_started → first audio chunk (≈ TTS + network)
 		float PreBufferMs   = 0.0f; // Pre-buffer wait before playback starts
 		float GenToEarMs    = 0.0f; // agent_response_started → playback starts (user-perceived)
 	};
 	FDebugLatencies CurrentLatencies;

+	// ElevenLabs server region (from x-region header on REST API). Fetched once per session.
+	FString ServerRegion;
+	void FetchServerRegion();
+
 	// Accumulates incoming PCM bytes until the audio component needs data.
 	// Uses a read offset instead of RemoveAt(0,N) to avoid O(n) memmove every
 	// underflow callback (~60Hz). Compacted periodically when read offset
@@ -664,6 +679,22 @@ private:
 	bool bPreBuffering = false;
 	double PreBufferStartTime = 0.0;

+	// ── Adaptive pre-buffer ─────────────────────────────────────────────────
+	// Runtime pre-buffer duration (ms). Equals AudioPreBufferMs when adaptive is off.
+	// When adaptive is on: initialized from AudioPreBufferMs, adjusted based on
+	// measured inter-chunk timing (not queue-dry detection).
+	int32 AdaptivePreBufferMs = 300;
+	static constexpr int32 AdaptivePreBufferMinMs = 50;
+	// Direction of last adaptation: +1=raised, -1=lowered, 0=stable. Used by HUD.
+	int32 PreBufferTrend = 0;
+	void ApplyPreBufferAdaptation();
+	// Per-turn inter-chunk timing measurement (game thread only).
+	// Set when the second TTS chunk arrives, consumed at turn end.
+	double TurnFirstChunkTime = 0.0;   // When chunk 1 arrived.
+	int32 TurnFirstChunkBytes = 0;     // Bytes in chunk 1 (to estimate audio duration).
+	int32 TurnIdealPreBufferMs = -1;   // Computed ideal pre-buffer. -1 = not measured.
+	bool bTurnGapMeasured = false;     // True after first inter-chunk gap is measured.
+
 	// Debug: track when the AudioQueue runs dry during speech (one-shot log).
 	bool bQueueWasDry = false;

--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_WebSocket_ElevenLabsProxy.h
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgent/Public/PS_AI_ConvAgent_WebSocket_ElevenLabsProxy.h
@@ -197,6 +197,18 @@ public:
 	UFUNCTION(BlueprintPure, Category = "PS AI ConvAgent|ElevenLabs")
 	const FPS_AI_ConvAgent_ConversationInfo_ElevenLabs& GetConversationInfo() const { return ConversationInfo; }

+	/** Latest WebSocket round-trip latency reported by the server (ms).
+	 *  Returns -1 if no ping has been received yet. Thread-safe. */
+	int32 GetLastPingMs() const { return LastPingMs.load(std::memory_order_relaxed); }
+
+	/** Timestamp of the last user audio chunk sent to the server.
+	 *  Used as a proxy for "user stopped speaking" in Server VAD mode. */
+	double GetLastAudioChunkSentTime() const { return LastAudioChunkSentTime; }
+
+	/** Timestamp of the last user_transcript received from the server.
+	 *  Marks when server finished ASR — best anchor for LLM latency measurement. */
+	double GetLastUserTranscriptTime() const { return LastUserTranscriptTime; }
+
 	// ─────────────────────────────────────────────────────────────────────────
 	// Internal
 	// ─────────────────────────────────────────────────────────────────────────
@@ -235,10 +247,16 @@ private:
 	TArray<uint8> BinaryFrameBuffer;

 	// ── Latency tracking ─────────────────────────────────────────────────────
+	// Server-reported WebSocket round-trip latency from ping events (~every 2s).
+	// Atomic: written from WS callback thread, read from game thread (HUD).
+	std::atomic<int32> LastPingMs{-1};
+
 	// Timestamp of the last audio chunk sent (user speech).
 	double LastAudioChunkSentTime = 0.0;
 	// Timestamp when user turn ended (StopListening).
 	double UserTurnEndTime = 0.0;
+	// Timestamp of the last user_transcript received (server finished ASR).
+	double LastUserTranscriptTime = 0.0;
 	// Whether we are waiting for the first response after user stopped speaking.
 	// Atomic: defensive — documents thread-safety contract.
 	std::atomic<bool> bWaitingForResponse{false};
@@ -264,4 +282,10 @@ public:
 	// Set by UPS_AI_ConvAgent_ElevenLabsComponent before calling Connect().
 	// Controls turn_timeout in conversation_initiation_client_data.
 	EPS_AI_ConvAgent_TurnMode_ElevenLabs TurnMode = EPS_AI_ConvAgent_TurnMode_ElevenLabs::Server;
+
+	// Controls how eagerly the server interprets pauses as end-of-speech.
+	EPS_AI_ConvAgent_TurnEagerness_ElevenLabs TurnEagerness = EPS_AI_ConvAgent_TurnEagerness_ElevenLabs::Normal;
+
+	// Start generating before confirming end-of-speech (reduces latency, may cause false starts).
+	bool bSpeculativeTurn = false;
 };
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgentEditor/Private/PS_AI_ConvAgent_AgentConfigCustomization_ElevenLabs.cpp
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_ConvAgent/Source/PS_AI_ConvAgentEditor/Private/PS_AI_ConvAgent_AgentConfigCustomization_ElevenLabs.cpp
@@ -22,33 +22,43 @@

 DEFINE_LOG_CATEGORY_STATIC(LogPS_AI_AgentConfigEditor, Log, All);

-// Approximate LLM latencies as shown on the ElevenLabs dashboard.
-// The API does not expose this data — values are indicative and may change.
+// Approximate LLM latencies as shown on the ElevenLabs dashboard (March 2026).
+// The /v1/convai/llm/list API does NOT expose latency — values are indicative.
 // Update this table periodically to stay current.
 static FString GetLLMLatencyHint(const FString& ModelID)
 {
 	struct FLatencyEntry { const TCHAR* ID; const TCHAR* Latency; };
 	static const FLatencyEntry Entries[] =
 	{
-		// OpenAI
-		{ TEXT("gpt-4o-mini"),         TEXT("~350ms") },
-		{ TEXT("gpt-4o"),              TEXT("~700ms") },
-		{ TEXT("gpt-4"),               TEXT("~900ms") },
-		{ TEXT("gpt-4-turbo"),         TEXT("~650ms") },
-		// Anthropic
-		{ TEXT("claude-sonnet-4-5"),   TEXT("~750ms") },
-		{ TEXT("claude-haiku-4-5"),    TEXT("~350ms") },
-		{ TEXT("claude-3-5-sonnet"),   TEXT("~700ms") },
-		// Google
-		{ TEXT("gemini-1.5-pro"),      TEXT("~500ms") },
-		{ TEXT("gemini-2.0-flash"),    TEXT("~300ms") },
-		{ TEXT("gemini-2.5-flash"),    TEXT("~250ms") },
-		// xAI
-		{ TEXT("grok-beta"),           TEXT("~500ms") },
-		// ElevenLabs-hosted
-		{ TEXT("qwen3-30b-a3b"),       TEXT("~207ms") },
-		{ TEXT("glm-4.5-air"),         TEXT("~980ms") },
-		{ TEXT("gpt-oss-120b"),        TEXT("~331ms") },
+		// ── ElevenLabs-hosted ─────────────────────────────────────────────
+		{ TEXT("glm-4.5-air"),         TEXT("~949ms") },
+		{ TEXT("qwen3-30b-a3b"),       TEXT("~189ms") },
+		{ TEXT("gpt-oss-120b"),        TEXT("~321ms") },
+		// ── Google ────────────────────────────────────────────────────────
+		{ TEXT("gemini-3-pro"),        TEXT("~3.5s")  },
+		{ TEXT("gemini-3-flash"),      TEXT("~1.4s")  },
+		{ TEXT("gemini-2.5-flash"),    TEXT("~967ms") },
+		{ TEXT("gemini-2.5-flash-lite"), TEXT("~605ms") },
+		// ── OpenAI ────────────────────────────────────────────────────────
+		{ TEXT("gpt-5"),               TEXT("~1.1s")  },
+		{ TEXT("gpt-5.1"),             TEXT("~980ms") },
+		{ TEXT("gpt-5.2"),             TEXT("~795ms") },
+		{ TEXT("gpt-5-mini"),          TEXT("~884ms") },
+		{ TEXT("gpt-5-nano"),          TEXT("~734ms") },
+		{ TEXT("gpt-4.1"),             TEXT("~870ms") },
+		{ TEXT("gpt-4.1-mini"),        TEXT("~916ms") },
+		{ TEXT("gpt-4.1-nano"),        TEXT("~574ms") },
+		{ TEXT("gpt-4o"),              TEXT("~728ms") },
+		{ TEXT("gpt-4o-mini"),         TEXT("~767ms") },
+		{ TEXT("gpt-4-turbo"),         TEXT("~1.5s")  },
+		{ TEXT("gpt-3.5-turbo"),       TEXT("~458ms") },
+		// ── Anthropic ─────────────────────────────────────────────────────
+		{ TEXT("claude-sonnet-4-5"),   TEXT("~1.4s")  },
+		{ TEXT("claude-sonnet-4"),     TEXT("~1.1s")  },
+		{ TEXT("claude-haiku-4-5"),    TEXT("~644ms") },
+		{ TEXT("claude-3.7-sonnet"),   TEXT("~1.2s")  },
+		{ TEXT("claude-3-haiku"),      TEXT("~484ms") },
+		{ TEXT("claude-3-5-sonnet"),   TEXT("~1.2s")  },
 	};

 	for (const auto& E : Entries)
@@ -58,6 +68,22 @@ static FString GetLLMLatencyHint(const FString& ModelID)
 	return FString();
 }

+// Infer provider from model ID prefix for display grouping.
+static FString GetLLMProvider(const FString& ModelID)
+{
+	if (ModelID.StartsWith(TEXT("gpt-")) || ModelID.StartsWith(TEXT("o1")) || ModelID.StartsWith(TEXT("o3")))
+		return TEXT("OpenAI");
+	if (ModelID.StartsWith(TEXT("claude-")))
+		return TEXT("Anthropic");
+	if (ModelID.StartsWith(TEXT("gemini-")))
+		return TEXT("Google");
+	if (ModelID.StartsWith(TEXT("grok")))
+		return TEXT("xAI");
+	if (ModelID == TEXT("glm-4.5-air") || ModelID == TEXT("qwen3-30b-a3b") || ModelID == TEXT("gpt-oss-120b"))
+		return TEXT("ElevenLabs");
+	return FString();
+}
+
 // Language code → display name. Shared by BuildAgentPayload (to resolve
 // {Language} placeholder) and the fetch handler (to strip the resolved fragment).
 static FString GetLanguageDisplayName(const FString& LangCode)
@@ -332,9 +358,11 @@ void FPS_AI_ConvAgent_AgentConfigCustomization_ElevenLabs::CustomizeDetails(
 			.Font(IDetailLayoutBuilder::GetDetailFont())
 		]
 		.ValueContent()
+		.MaxDesiredWidth(600.f)
 		[
 			SNew(SBox)
 			.MinDesiredHeight(200.f)
+			.MinDesiredWidth(400.f)
 			[
 				SNew(SMultiLineEditableTextBox)
 				.Font(IDetailLayoutBuilder::GetDetailFont())
@@ -679,6 +707,10 @@ void FPS_AI_ConvAgent_AgentConfigCustomization_ElevenLabs::OnFetchLLMsClicked()
 			Pinned->LLMDisplayNames.Reset();
 			Pinned->LLMModelIDs.Reset();

+			// Collect models grouped by provider for sorted display.
+			struct FLLMEntry { FString ModelID; FString Provider; FString Display; bool bCheckpoint; };
+			TArray<FLLMEntry> AllEntries;
+
 			for (const auto& LLMVal : *LLMs)
 			{
 				const TSharedPtr<FJsonObject>* LLMObj = nullptr;
@@ -703,12 +735,14 @@ void FPS_AI_ConvAgent_AgentConfigCustomization_ElevenLabs::OnFetchLLMsClicked()
 					}
 				}

-				// Check if it's a checkpoint model (sub-version).
 				bool bIsCheckpoint = false;
 				(*LLMObj)->TryGetBoolField(TEXT("is_checkpoint"), bIsCheckpoint);

-				// Build display string: "model-id  (~350ms)" or "  model-id  (checkpoint, ~350ms)"
 				const FString Latency = GetLLMLatencyHint(ModelID);
+				const FString Provider = GetLLMProvider(ModelID);
+
+				// Build display: "  model-id  (checkpoint, ~350ms)" for checkpoints,
+				//                "model-id  (~350ms)" for main models.
 				FString Display;
 				if (bIsCheckpoint)
 				{
@@ -719,12 +753,44 @@ void FPS_AI_ConvAgent_AgentConfigCustomization_ElevenLabs::OnFetchLLMsClicked()
 				else
 				{
 					Display = Latency.IsEmpty()
-						? ModelID
-						: FString::Printf(TEXT("%s  (%s)"), *ModelID, *Latency);
+						? FString::Printf(TEXT("  %s"), *ModelID)
+						: FString::Printf(TEXT("  %s  (%s)"), *ModelID, *Latency);
 				}

-				Pinned->LLMDisplayNames.Add(MakeShareable(new FString(Display)));
-				Pinned->LLMModelIDs.Add(ModelID);
+				AllEntries.Add({ ModelID, Provider, Display, bIsCheckpoint });
+			}
+
+			// Sort by provider order (ElevenLabs, Google, OpenAI, Anthropic, Other),
+			// then main models before checkpoints, then alphabetically.
+			static const TArray<FString> ProviderOrder = {
+				TEXT("ElevenLabs"), TEXT("Google"), TEXT("OpenAI"), TEXT("Anthropic"), TEXT("xAI")
+			};
+			AllEntries.Sort([](const FLLMEntry& A, const FLLMEntry& B)
+			{
+				int32 IdxA = ProviderOrder.IndexOfByKey(A.Provider);
+				int32 IdxB = ProviderOrder.IndexOfByKey(B.Provider);
+				if (IdxA == INDEX_NONE) IdxA = ProviderOrder.Num();
+				if (IdxB == INDEX_NONE) IdxB = ProviderOrder.Num();
+				if (IdxA != IdxB) return IdxA < IdxB;
+				if (A.bCheckpoint != B.bCheckpoint) return !A.bCheckpoint; // main first
+				return A.ModelID < B.ModelID;
+			});
+
+			// Insert provider headers as non-selectable separator entries.
+			FString LastProvider;
+			for (const auto& Entry : AllEntries)
+			{
+				const FString& Prov = Entry.Provider.IsEmpty() ? TEXT("Other") : Entry.Provider;
+				if (Prov != LastProvider)
+				{
+					// Header line: "── OpenAI ──" (not selectable — mapped to empty ModelID)
+					FString Header = FString::Printf(TEXT("── %s ──"), *Prov);
+					Pinned->LLMDisplayNames.Add(MakeShareable(new FString(Header)));
+					Pinned->LLMModelIDs.Add(FString()); // empty = separator
+					LastProvider = Prov;
+				}
+				Pinned->LLMDisplayNames.Add(MakeShareable(new FString(Entry.Display)));
+				Pinned->LLMModelIDs.Add(Entry.ModelID);
 			}

 			// Pre-select the currently set LLMModel if it exists in the list.
@@ -767,6 +833,9 @@ void FPS_AI_ConvAgent_AgentConfigCustomization_ElevenLabs::OnLLMSelected(
 	int32 Idx = LLMDisplayNames.IndexOfByKey(NewSelection);
 	if (Idx == INDEX_NONE) return;

+	// Separator headers have empty ModelID — ignore selection.
+	if (LLMModelIDs[Idx].IsEmpty()) return;
+
 	if (UPS_AI_ConvAgent_AgentConfig_ElevenLabs* Asset = GetEditedAsset())
 	{
 		Asset->Modify();