Compare commits

..

No commits in common. "8fcb8b6f3089e1f2b8380a817050c0e4fa008995" and "5323da6908ed5bc4f267597242f7682becea2469" have entirely different histories.

18 changed files with 312 additions and 125 deletions

View File

@ -32,6 +32,16 @@ void FAnimNode_PS_AI_ConvAgent_FacialExpression::Initialize_AnyThread(const FAni
if (Comp) if (Comp)
{ {
FacialExpressionComponent = Comp; FacialExpressionComponent = Comp;
UE_LOG(LogPS_AI_ConvAgent_FacialExprAnimNode, Log,
TEXT("PS AI ConvAgent Facial Expression AnimNode bound to component on %s."),
*Owner->GetName());
}
else
{
UE_LOG(LogPS_AI_ConvAgent_FacialExprAnimNode, Warning,
TEXT("No PS_AI_ConvAgent_FacialExpressionComponent found on %s. "
"Add the component alongside the Conversational Agent."),
*Owner->GetName());
} }
} }
} }
@ -62,6 +72,9 @@ void FAnimNode_PS_AI_ConvAgent_FacialExpression::Update_AnyThread(const FAnimati
if (Comp) if (Comp)
{ {
FacialExpressionComponent = Comp; FacialExpressionComponent = Comp;
UE_LOG(LogPS_AI_ConvAgent_FacialExprAnimNode, Log,
TEXT("PS AI ConvAgent Facial Expression AnimNode (late) bound to component on %s."),
*Owner->GetName());
} }
} }
} }

View File

@ -32,6 +32,16 @@ void FAnimNode_PS_AI_ConvAgent_LipSync::Initialize_AnyThread(const FAnimationIni
if (Comp) if (Comp)
{ {
LipSyncComponent = Comp; LipSyncComponent = Comp;
UE_LOG(LogPS_AI_ConvAgent_LipSyncAnimNode, Log,
TEXT("PS AI ConvAgent Lip Sync AnimNode bound to component on %s."),
*Owner->GetName());
}
else
{
UE_LOG(LogPS_AI_ConvAgent_LipSyncAnimNode, Warning,
TEXT("No PS_AI_ConvAgent_LipSyncComponent found on %s. "
"Add the component alongside the Conversational Agent."),
*Owner->GetName());
} }
} }
} }
@ -62,6 +72,9 @@ void FAnimNode_PS_AI_ConvAgent_LipSync::Update_AnyThread(const FAnimationUpdateC
if (Comp) if (Comp)
{ {
LipSyncComponent = Comp; LipSyncComponent = Comp;
UE_LOG(LogPS_AI_ConvAgent_LipSyncAnimNode, Log,
TEXT("PS AI ConvAgent Lip Sync AnimNode (late) bound to component on %s."),
*Owner->GetName());
} }
} }
} }

View File

@ -126,8 +126,22 @@ void FAnimNode_PS_AI_ConvAgent_Posture::Initialize_AnyThread(const FAnimationIni
} }
} }
} UE_LOG(LogPS_AI_ConvAgent_PostureAnimNode, Log,
// No posture component is fine — AnimBP may not require it. TEXT("PS AI ConvAgent Posture AnimNode: Owner=%s Mesh=%s Head=%s Eyes=%s Chain=%d HeadComp=%.1f EyeComp=%.1f DriftComp=%.1f"),
*Owner->GetName(), *SkelMesh->GetName(),
bApplyHeadRotation ? TEXT("ON") : TEXT("OFF"),
bApplyEyeCurves ? TEXT("ON") : TEXT("OFF"),
ChainBoneNames.Num(),
Comp->GetHeadAnimationCompensation(),
Comp->GetEyeAnimationCompensation(),
Comp->GetBodyDriftCompensation());
}
else
{
UE_LOG(LogPS_AI_ConvAgent_PostureAnimNode, Warning,
TEXT("No PS_AI_ConvAgent_PostureComponent found on %s."),
*Owner->GetName());
}
} }
} }
} }
@ -173,7 +187,10 @@ void FAnimNode_PS_AI_ConvAgent_Posture::CacheBones_AnyThread(const FAnimationCac
: FQuat::Identity; : FQuat::Identity;
ChainRefPoseRotations.Add(RefRot); ChainRefPoseRotations.Add(RefRot);
// Bone resolved successfully. UE_LOG(LogPS_AI_ConvAgent_PostureAnimNode, Log,
TEXT(" Chain bone [%d] '%s' → index %d (weight=%.2f)"),
i, *ChainBoneNames[i].ToString(), CompactIdx.GetInt(),
ChainBoneWeights[i]);
} }
else else
{ {
@ -200,7 +217,9 @@ void FAnimNode_PS_AI_ConvAgent_Posture::CacheBones_AnyThread(const FAnimationCac
? RefPose[MeshIndex].GetRotation() ? RefPose[MeshIndex].GetRotation()
: FQuat::Identity; : FQuat::Identity;
// Head bone resolved successfully. UE_LOG(LogPS_AI_ConvAgent_PostureAnimNode, Log,
TEXT("Head bone '%s' resolved to index %d."),
*HeadBoneName.ToString(), HeadBoneIndex.GetInt());
} }
else else
{ {
@ -239,7 +258,15 @@ void FAnimNode_PS_AI_ConvAgent_Posture::CacheBones_AnyThread(const FAnimationCac
? RefPose[MeshIdx].GetRotation() ? RefPose[MeshIdx].GetRotation()
: FQuat::Identity; : FQuat::Identity;
// Eye bone resolved successfully. UE_LOG(LogPS_AI_ConvAgent_PostureAnimNode, Log,
TEXT("Eye bone '%s' resolved to index %d."),
*BoneName.ToString(), OutIndex.GetInt());
}
else
{
UE_LOG(LogPS_AI_ConvAgent_PostureAnimNode, Log,
TEXT("Eye bone '%s' not found in skeleton (OK for Body AnimBP)."),
*BoneName.ToString());
} }
}; };
@ -284,7 +311,10 @@ void FAnimNode_PS_AI_ConvAgent_Posture::CacheBones_AnyThread(const FAnimationCac
RefAccumAboveChain = RefAccumAboveChain * RefRot; RefAccumAboveChain = RefAccumAboveChain * RefRot;
} }
// Body drift compensation initialized. UE_LOG(LogPS_AI_ConvAgent_PostureAnimNode, Log,
TEXT("Body drift: %d ancestor bones above chain. RefAccum=(%s)"),
AncestorBoneIndices.Num(),
*RefAccumAboveChain.Rotator().ToCompactString());
} }
} }
} }
@ -309,6 +339,9 @@ void FAnimNode_PS_AI_ConvAgent_Posture::Update_AnyThread(const FAnimationUpdateC
if (Comp) if (Comp)
{ {
PostureComponent = Comp; PostureComponent = Comp;
UE_LOG(LogPS_AI_ConvAgent_PostureAnimNode, Log,
TEXT("PS AI ConvAgent Posture AnimNode (late) bound to component on %s."),
*Owner->GetName());
} }
} }
} }

View File

@ -77,13 +77,10 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
if (WaitTime >= static_cast<double>(ResponseTimeoutSeconds)) if (WaitTime >= static_cast<double>(ResponseTimeoutSeconds))
{ {
bWaitingForAgentResponse = false; bWaitingForAgentResponse = false;
if (bDebug) const double T = FPlatformTime::Seconds() - SessionStartTime;
{ UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning,
const double T = FPlatformTime::Seconds() - SessionStartTime; TEXT("[T+%.2fs] [Turn %d] Response timeout — server did not start generating after %.1fs. Firing OnAgentResponseTimeout."),
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning, T, LastClosedTurnIndex, WaitTime);
TEXT("[T+%.2fs] [Turn %d] Response timeout — server did not start generating after %.1fs."),
T, LastClosedTurnIndex, WaitTime);
}
OnAgentResponseTimeout.Broadcast(); OnAgentResponseTimeout.Broadcast();
} }
} }
@ -99,13 +96,10 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
{ {
bAgentGenerating = false; bAgentGenerating = false;
GeneratingTickCount = 0; GeneratingTickCount = 0;
if (bDebug) const double T = FPlatformTime::Seconds() - SessionStartTime;
{ UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning,
const double T = FPlatformTime::Seconds() - SessionStartTime; TEXT("[T+%.2fs] [Turn %d] Generating timeout (10s) — server generated but no audio arrived. Clearing bAgentGenerating."),
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning, T, LastClosedTurnIndex);
TEXT("[T+%.2fs] [Turn %d] Generating timeout (10s) — no audio arrived. Clearing bAgentGenerating."),
T, LastClosedTurnIndex);
}
} }
} }
else else
@ -122,13 +116,10 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
if (Elapsed >= static_cast<double>(AudioPreBufferMs)) if (Elapsed >= static_cast<double>(AudioPreBufferMs))
{ {
bPreBuffering = false; bPreBuffering = false;
if (bDebug) const double Tpb = FPlatformTime::Seconds() - SessionStartTime;
{ UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
const double Tpb = FPlatformTime::Seconds() - SessionStartTime; TEXT("[T+%.2fs] [Turn %d] Pre-buffer timeout (%dms). Starting playback."),
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log, Tpb, LastClosedTurnIndex, AudioPreBufferMs);
TEXT("[T+%.2fs] [Turn %d] Pre-buffer timeout (%dms). Starting playback."),
Tpb, LastClosedTurnIndex, AudioPreBufferMs);
}
if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying()) if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
{ {
AudioPlaybackComponent->Play(); AudioPlaybackComponent->Play();
@ -191,11 +182,11 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::TickComponent(float DeltaTime, ELevel
// Broadcast OUTSIDE the lock — Blueprint handlers can execute for arbitrary time. // Broadcast OUTSIDE the lock — Blueprint handlers can execute for arbitrary time.
if (bShouldBroadcastStopped) if (bShouldBroadcastStopped)
{ {
if (bHardTimeoutFired && bDebug) if (bHardTimeoutFired)
{ {
const double Tht = FPlatformTime::Seconds() - SessionStartTime; const double Tht = FPlatformTime::Seconds() - SessionStartTime;
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning, UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning,
TEXT("[T+%.2fs] [Turn %d] Agent silence hard-timeout (10s) without agent_response."), TEXT("[T+%.2fs] [Turn %d] Agent silence hard-timeout (10s) without agent_response — declaring agent stopped."),
Tht, LastClosedTurnIndex); Tht, LastClosedTurnIndex);
} }
OnAgentStoppedSpeaking.Broadcast(); OnAgentStoppedSpeaking.Broadcast();
@ -265,6 +256,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::StartConversation_Internal()
// Pass configuration to the proxy before connecting. // Pass configuration to the proxy before connecting.
WebSocketProxy->TurnMode = TurnMode; WebSocketProxy->TurnMode = TurnMode;
WebSocketProxy->bSpeculativeTurn = bSpeculativeTurn;
WebSocketProxy->Connect(AgentID); WebSocketProxy->Connect(AgentID);
} }
@ -320,7 +312,7 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::StartListening()
{ {
if (bAgentSpeaking && bAllowInterruption) if (bAgentSpeaking && bAllowInterruption)
{ {
if (bDebug) UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log, TEXT("StartListening: interrupting agent to allow user to speak.")); UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log, TEXT("StartListening: interrupting agent (speaking) to allow user to speak."));
InterruptAgent(); InterruptAgent();
// InterruptAgent → StopAgentAudio clears bAgentSpeaking / bAgentGenerating, // InterruptAgent → StopAgentAudio clears bAgentSpeaking / bAgentGenerating,
// so we fall through and open the microphone immediately. // so we fall through and open the microphone immediately.
@ -382,12 +374,9 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::StartListening()
Mic->StartCapture(); Mic->StartCapture();
} }
if (bDebug) const double T = TurnStartTime - SessionStartTime;
{ UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log, TEXT("[T+%.2fs] [Turn %d] Mic opened%s — user speaking."),
const double T = TurnStartTime - SessionStartTime; T, TurnIndex, bExternalMicManagement ? TEXT(" (external)") : TEXT(""));
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log, TEXT("[T+%.2fs] [Turn %d] Mic opened%s — user speaking."),
T, TurnIndex, bExternalMicManagement ? TEXT(" (external)") : TEXT(""));
}
} }
void UPS_AI_ConvAgent_ElevenLabsComponent::StopListening() void UPS_AI_ConvAgent_ElevenLabsComponent::StopListening()
@ -451,22 +440,21 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::StopListening()
bWaitingForAgentResponse = true; bWaitingForAgentResponse = true;
LastClosedTurnIndex = TurnIndex; LastClosedTurnIndex = TurnIndex;
} }
if (bDebug) const double T = TurnEndTime - SessionStartTime;
const double TurnDuration = TurnStartTime > 0.0 ? TurnEndTime - TurnStartTime : 0.0;
if (bWaitingForAgentResponse)
{ {
const double T = TurnEndTime - SessionStartTime; UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
const double TurnDuration = TurnStartTime > 0.0 ? TurnEndTime - TurnStartTime : 0.0; TEXT("[T+%.2fs] [Turn %d] Mic closed — user spoke %.2fs. Waiting for server response (timeout %.0fs)..."),
if (bWaitingForAgentResponse) T, TurnIndex, TurnDuration, ResponseTimeoutSeconds);
{ }
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log, else
TEXT("[T+%.2fs] [Turn %d] Mic closed — user spoke %.2fs. Waiting for response (timeout %.0fs)..."), {
T, TurnIndex, TurnDuration, ResponseTimeoutSeconds); // Collision avoidance: StopListening was called from HandleAgentResponseStarted
} // while server was already generating — no need to wait or time out.
else UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
{ TEXT("[T+%.2fs] [Turn %d] Mic closed (collision avoidance) — user spoke %.2fs. Server is already generating Turn %d response."),
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log, T, TurnIndex, TurnDuration, LastClosedTurnIndex);
TEXT("[T+%.2fs] [Turn %d] Mic closed (collision) — user spoke %.2fs. Server generating Turn %d."),
T, TurnIndex, TurnDuration, LastClosedTurnIndex);
}
} }
} }
@ -739,12 +727,9 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::HandleAgentResponseStarted()
} }
} }
if (bDebug) UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
{ TEXT("[T+%.2fs] [Turn %d] Agent generating. (%.2fs after turn end)"),
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log, T, LastClosedTurnIndex, LatencyFromTurnEnd);
TEXT("[T+%.2fs] [Turn %d] Agent generating. (%.2fs after turn end)"),
T, LastClosedTurnIndex, LatencyFromTurnEnd);
}
OnAgentStartedGenerating.Broadcast(); OnAgentStartedGenerating.Broadcast();
// Network: notify all clients. // Network: notify all clients.
@ -814,12 +799,9 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::HandleClientToolCall(const FPS_AI_Con
CurrentEmotion = NewEmotion; CurrentEmotion = NewEmotion;
CurrentEmotionIntensity = NewIntensity; CurrentEmotionIntensity = NewIntensity;
if (bDebug) const double T = FPlatformTime::Seconds() - SessionStartTime;
{ UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log, TEXT("[T+%.2fs] Agent emotion changed to: %s (%s)"),
const double T = FPlatformTime::Seconds() - SessionStartTime; T, *UEnum::GetValueAsString(NewEmotion), *UEnum::GetValueAsString(NewIntensity));
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log, TEXT("[T+%.2fs] Agent emotion changed to: %s (%s)"),
T, *UEnum::GetValueAsString(NewEmotion), *UEnum::GetValueAsString(NewIntensity));
}
OnAgentEmotionChanged.Broadcast(NewEmotion, NewIntensity); OnAgentEmotionChanged.Broadcast(NewEmotion, NewIntensity);
@ -923,6 +905,10 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::OnProceduralUnderflow(
if (!bQueueWasDry) if (!bQueueWasDry)
{ {
bQueueWasDry = true; bQueueWasDry = true;
const double T = FPlatformTime::Seconds() - SessionStartTime;
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning,
TEXT("[T+%.2fs] [Turn %d] AudioQueue DRY — waiting for next TTS chunk (requested %d samples)."),
T, LastClosedTurnIndex, SamplesRequired);
} }
// Do NOT feed silence via QueueAudio! USoundWaveProcedural with // Do NOT feed silence via QueueAudio! USoundWaveProcedural with
@ -955,14 +941,11 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
bQueueWasDry = false; bQueueWasDry = false;
SilentTickCount = 0; SilentTickCount = 0;
if (bDebug) const double T = AgentSpeakStart - SessionStartTime;
{ const double LatencyFromTurnEnd = TurnEndTime > 0.0 ? AgentSpeakStart - TurnEndTime : 0.0;
const double T = AgentSpeakStart - SessionStartTime; UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
const double LatencyFromTurnEnd = TurnEndTime > 0.0 ? AgentSpeakStart - TurnEndTime : 0.0; TEXT("[T+%.2fs] [Turn %d] Agent speaking — first audio chunk. (%.2fs after turn end)"),
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log, T, LastClosedTurnIndex, LatencyFromTurnEnd);
TEXT("[T+%.2fs] [Turn %d] Agent speaking — first audio chunk. (%.2fs after turn end)"),
T, LastClosedTurnIndex, LatencyFromTurnEnd);
}
OnAgentStartedSpeaking.Broadcast(); OnAgentStartedSpeaking.Broadcast();
@ -1018,6 +1001,10 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::EnqueueAgentAudio(const TArray<uint8>
// buffer underrun (TTS inter-batch gap). Restart it if needed. // buffer underrun (TTS inter-batch gap). Restart it if needed.
if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying()) if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
{ {
const double Tbr = FPlatformTime::Seconds() - SessionStartTime;
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Warning,
TEXT("[T+%.2fs] [Turn %d] Audio component stopped during speech (buffer underrun). Restarting playback."),
Tbr, LastClosedTurnIndex);
AudioPlaybackComponent->Play(); AudioPlaybackComponent->Play();
} }
// Reset silence counter — new audio arrived, we're not in a gap anymore // Reset silence counter — new audio arrived, we're not in a gap anymore
@ -1068,15 +1055,12 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::StopAgentAudio()
// Broadcast outside the lock. // Broadcast outside the lock.
if (bWasSpeaking) if (bWasSpeaking)
{ {
if (bDebug) const double T = Now - SessionStartTime;
{ const double AgentSpokeDuration = AgentSpeakStart > 0.0 ? Now - AgentSpeakStart : 0.0;
const double T = Now - SessionStartTime; const double TotalTurnDuration = TurnEndTime > 0.0 ? Now - TurnEndTime : 0.0;
const double AgentSpokeDuration = AgentSpeakStart > 0.0 ? Now - AgentSpeakStart : 0.0; UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
const double TotalTurnDuration = TurnEndTime > 0.0 ? Now - TurnEndTime : 0.0; TEXT("[T+%.2fs] [Turn %d] Agent stopped speaking (spoke %.2fs, full turn round-trip %.2fs)."),
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log, T, LastClosedTurnIndex, AgentSpokeDuration, TotalTurnDuration);
TEXT("[T+%.2fs] [Turn %d] Agent stopped speaking (spoke %.2fs, round-trip %.2fs)."),
T, LastClosedTurnIndex, AgentSpokeDuration, TotalTurnDuration);
}
OnAgentStoppedSpeaking.Broadcast(); OnAgentStoppedSpeaking.Broadcast();
@ -1188,6 +1172,8 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::ServerRequestConversation_Implementat
{ {
if (bNetIsConversing) if (bNetIsConversing)
{ {
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
TEXT("ServerRequestConversation denied — NPC is already in conversation."));
ClientConversationFailed(TEXT("NPC is already in conversation with another player.")); ClientConversationFailed(TEXT("NPC is already in conversation with another player."));
return; return;
} }
@ -1195,11 +1181,17 @@ void UPS_AI_ConvAgent_ElevenLabsComponent::ServerRequestConversation_Implementat
bNetIsConversing = true; bNetIsConversing = true;
NetConversatingPlayer = RequestingPlayer; NetConversatingPlayer = RequestingPlayer;
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log,
TEXT("ServerRequestConversation granted for %s."),
*GetNameSafe(RequestingPlayer));
StartConversation_Internal(); StartConversation_Internal();
} }
void UPS_AI_ConvAgent_ElevenLabsComponent::ServerReleaseConversation_Implementation() void UPS_AI_ConvAgent_ElevenLabsComponent::ServerReleaseConversation_Implementation()
{ {
UE_LOG(LogPS_AI_ConvAgent_ElevenLabs, Log, TEXT("ServerReleaseConversation."));
StopListening(); StopListening();
bWaitingForAgentResponse = false; bWaitingForAgentResponse = false;
StopAgentAudio(); StopAgentAudio();

View File

@ -93,6 +93,8 @@ void UPS_AI_ConvAgent_FacialExpressionComponent::ValidateEmotionPoses()
{ {
if (!EmotionPoseMap || EmotionPoseMap->EmotionPoses.Num() == 0) if (!EmotionPoseMap || EmotionPoseMap->EmotionPoses.Num() == 0)
{ {
UE_LOG(LogPS_AI_ConvAgent_FacialExpr, Log,
TEXT("No emotion poses assigned in EmotionPoseMap — facial expressions disabled."));
return; return;
} }
@ -203,13 +205,10 @@ void UPS_AI_ConvAgent_FacialExpressionComponent::OnEmotionChanged(
// Begin crossfade // Begin crossfade
CrossfadeAlpha = 0.0f; CrossfadeAlpha = 0.0f;
if (bDebug) UE_LOG(LogPS_AI_ConvAgent_FacialExpr, Log,
{ TEXT("Emotion changed: %s (%s) — anim: %s, crossfading over %.1fs..."),
UE_LOG(LogPS_AI_ConvAgent_FacialExpr, Log, *UEnum::GetValueAsString(Emotion), *UEnum::GetValueAsString(Intensity),
TEXT("Emotion changed: %s (%s) — anim: %s"), NewAnim ? *NewAnim->GetName() : TEXT("(none)"), EmotionBlendDuration);
*UEnum::GetValueAsString(Emotion), *UEnum::GetValueAsString(Intensity),
NewAnim ? *NewAnim->GetName() : TEXT("(none)"));
}
} }
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
@ -233,6 +232,9 @@ void UPS_AI_ConvAgent_FacialExpressionComponent::TickComponent(
AgentComponent = Agent; AgentComponent = Agent;
Agent->OnAgentEmotionChanged.AddDynamic( Agent->OnAgentEmotionChanged.AddDynamic(
this, &UPS_AI_ConvAgent_FacialExpressionComponent::OnEmotionChanged); this, &UPS_AI_ConvAgent_FacialExpressionComponent::OnEmotionChanged);
UE_LOG(LogPS_AI_ConvAgent_FacialExpr, Log,
TEXT("Facial expression (late) bound to agent component on %s."),
*Owner->GetName());
} }
} }
} }

View File

@ -45,6 +45,7 @@ void UPS_AI_ConvAgent_InteractionComponent::BeginPlay()
MicComponent->OnAudioCaptured.AddUObject(this, MicComponent->OnAudioCaptured.AddUObject(this,
&UPS_AI_ConvAgent_InteractionComponent::OnMicAudioCaptured); &UPS_AI_ConvAgent_InteractionComponent::OnMicAudioCaptured);
UE_LOG(LogPS_AI_ConvAgent_Select, Log, TEXT("InteractionComponent initialized on %s."), *Owner->GetName());
} }
void UPS_AI_ConvAgent_InteractionComponent::EndPlay(const EEndPlayReason::Type EndPlayReason) void UPS_AI_ConvAgent_InteractionComponent::EndPlay(const EEndPlayReason::Type EndPlayReason)

View File

@ -19,6 +19,8 @@ void UPS_AI_ConvAgent_InteractionSubsystem::RegisterAgent(UPS_AI_ConvAgent_Eleve
} }
RegisteredAgents.Add(Agent); RegisteredAgents.Add(Agent);
UE_LOG(LogPS_AI_ConvAgent_Interaction, Log, TEXT("Agent registered: %s (%d total)"),
*Agent->GetOwner()->GetName(), RegisteredAgents.Num());
} }
void UPS_AI_ConvAgent_InteractionSubsystem::UnregisterAgent(UPS_AI_ConvAgent_ElevenLabsComponent* Agent) void UPS_AI_ConvAgent_InteractionSubsystem::UnregisterAgent(UPS_AI_ConvAgent_ElevenLabsComponent* Agent)
@ -30,6 +32,9 @@ void UPS_AI_ConvAgent_InteractionSubsystem::UnregisterAgent(UPS_AI_ConvAgent_Ele
return !Weak.IsValid() || Weak.Get() == Agent; return !Weak.IsValid() || Weak.Get() == Agent;
}); });
UE_LOG(LogPS_AI_ConvAgent_Interaction, Log, TEXT("Agent unregistered: %s (%d remaining)"),
Agent->GetOwner() ? *Agent->GetOwner()->GetName() : TEXT("(destroyed)"),
RegisteredAgents.Num());
} }
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────

View File

@ -672,6 +672,9 @@ void UPS_AI_ConvAgent_LipSyncComponent::TickComponent(float DeltaTime, ELevelTic
Agent->OnAgentStoppedSpeaking.AddDynamic( Agent->OnAgentStoppedSpeaking.AddDynamic(
this, &UPS_AI_ConvAgent_LipSyncComponent::OnAgentStopped); this, &UPS_AI_ConvAgent_LipSyncComponent::OnAgentStopped);
Agent->bEnableAgentPartialResponse = true; Agent->bEnableAgentPartialResponse = true;
UE_LOG(LogPS_AI_ConvAgent_LipSync, Log,
TEXT("Lip sync (late) bound to agent component on %s."),
*Owner->GetName());
} }
} }
} }
@ -710,6 +713,9 @@ void UPS_AI_ConvAgent_LipSyncComponent::TickComponent(float DeltaTime, ELevelTic
// Timeout — start playback with spectral shapes as fallback // Timeout — start playback with spectral shapes as fallback
bWaitingForText = false; bWaitingForText = false;
PlaybackTimer = 0.0f; PlaybackTimer = 0.0f;
UE_LOG(LogPS_AI_ConvAgent_LipSync, Warning,
TEXT("Text wait timeout (%.0fms). Starting lip sync with spectral shapes (Queue=%d)."),
WaitElapsed * 1000.0, VisemeQueue.Num());
} }
else else
{ {
@ -996,6 +1002,7 @@ void UPS_AI_ConvAgent_LipSyncComponent::TickComponent(float DeltaTime, ELevelTic
void UPS_AI_ConvAgent_LipSyncComponent::OnAgentInterrupted() void UPS_AI_ConvAgent_LipSyncComponent::OnAgentInterrupted()
{ {
UE_LOG(LogPS_AI_ConvAgent_LipSync, Log, TEXT("Agent interrupted — resetting lip sync to neutral."));
ResetToNeutral(); ResetToNeutral();
} }
@ -1004,6 +1011,7 @@ void UPS_AI_ConvAgent_LipSyncComponent::OnAgentStopped()
// Don't clear text state here — it's already handled by TickComponent's // Don't clear text state here — it's already handled by TickComponent's
// "queue runs dry" logic which checks bFullTextReceived. // "queue runs dry" logic which checks bFullTextReceived.
// Just clear the queues so the mouth returns to neutral immediately. // Just clear the queues so the mouth returns to neutral immediately.
UE_LOG(LogPS_AI_ConvAgent_LipSync, Log, TEXT("Agent stopped speaking — clearing lip sync queues."));
VisemeQueue.Reset(); VisemeQueue.Reset();
AmplitudeQueue.Reset(); AmplitudeQueue.Reset();
PlaybackTimer = 0.0f; PlaybackTimer = 0.0f;
@ -1407,6 +1415,9 @@ void UPS_AI_ConvAgent_LipSyncComponent::OnAudioChunkReceived(const TArray<uint8>
bWaitingForText = true; bWaitingForText = true;
WaitingForTextStartTime = FPlatformTime::Seconds(); WaitingForTextStartTime = FPlatformTime::Seconds();
PlaybackTimer = 0.0f; PlaybackTimer = 0.0f;
UE_LOG(LogPS_AI_ConvAgent_LipSync, Log,
TEXT("Waiting for text before starting lip sync (%d frames queued)."),
WindowsQueued);
} }
} }
else if (AccumulatedText.Len() > 0 && TextVisemeSequence.Num() > 0) else if (AccumulatedText.Len() > 0 && TextVisemeSequence.Num() > 0)
@ -1418,14 +1429,11 @@ void UPS_AI_ConvAgent_LipSyncComponent::OnAudioChunkReceived(const TArray<uint8>
ApplyTextVisemesToQueue(); ApplyTextVisemesToQueue();
} }
if (bDebug) UE_LOG(LogPS_AI_ConvAgent_LipSync, Log,
{ TEXT("Audio chunk: %d samples → %d windows | Amp=[%.2f-%.2f] | Queue=%d (%.1fs) | TextVisemes=%d"),
UE_LOG(LogPS_AI_ConvAgent_LipSync, Log, NumSamples, WindowsQueued,
TEXT("Audio chunk: %d samples → %d windows | Amp=[%.2f-%.2f] | Queue=%d (%.1fs) | TextVisemes=%d"), MinAmp, MaxAmp, VisemeQueue.Num(),
NumSamples, WindowsQueued, VisemeQueue.Num() * (512.0f / 16000.0f), TextVisemeSequence.Num());
MinAmp, MaxAmp, VisemeQueue.Num(),
VisemeQueue.Num() * (512.0f / 16000.0f), TextVisemeSequence.Num());
}
} }
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
@ -1450,12 +1458,9 @@ void UPS_AI_ConvAgent_LipSyncComponent::OnPartialTextReceived(const FString& Par
// Convert accumulated text to viseme sequence progressively // Convert accumulated text to viseme sequence progressively
ConvertTextToVisemes(AccumulatedText); ConvertTextToVisemes(AccumulatedText);
if (bDebug) UE_LOG(LogPS_AI_ConvAgent_LipSync, Log,
{ TEXT("Partial text: \"%s\" → %d visemes (accumulated: \"%s\")"),
UE_LOG(LogPS_AI_ConvAgent_LipSync, Log, *PartialText, TextVisemeSequence.Num(), *AccumulatedText);
TEXT("Partial text: \"%s\" → %d visemes (accumulated: \"%s\")"),
*PartialText, TextVisemeSequence.Num(), *AccumulatedText);
}
// If we were waiting for text to arrive before starting playback, // If we were waiting for text to arrive before starting playback,
// apply text visemes to queued frames and start consuming. // apply text visemes to queued frames and start consuming.
@ -1485,11 +1490,8 @@ void UPS_AI_ConvAgent_LipSyncComponent::OnTextResponseReceived(const FString& Re
AccumulatedText = ResponseText; AccumulatedText = ResponseText;
ConvertTextToVisemes(ResponseText); ConvertTextToVisemes(ResponseText);
if (bDebug) UE_LOG(LogPS_AI_ConvAgent_LipSync, Log,
{ TEXT("Full text: \"%s\" → %d visemes"), *ResponseText, TextVisemeSequence.Num());
UE_LOG(LogPS_AI_ConvAgent_LipSync, Log,
TEXT("Full text: \"%s\" → %d visemes"), *ResponseText, TextVisemeSequence.Num());
}
// Apply to any remaining queued frames (or extend timeline in pose mode) // Apply to any remaining queued frames (or extend timeline in pose mode)
{ {
@ -1511,7 +1513,7 @@ void UPS_AI_ConvAgent_LipSyncComponent::OnTextResponseReceived(const FString& Re
WaitElapsed * 1000.0); WaitElapsed * 1000.0);
} }
if (bDebug) // Log the viseme sequence for debugging (Log level so it's always visible)
{ {
FString VisSeq; FString VisSeq;
int32 Count = 0; int32 Count = 0;
@ -1913,6 +1915,10 @@ void UPS_AI_ConvAgent_LipSyncComponent::ConvertTextToVisemes(const FString& Text
} }
} }
UE_LOG(LogPS_AI_ConvAgent_LipSync, Log,
TEXT("Syllable filter: %d raw → %d visible (removed %d invisible consonants/sil)"),
RawCount, Filtered.Num(), RawCount - Filtered.Num());
TextVisemeSequence = MoveTemp(Filtered); TextVisemeSequence = MoveTemp(Filtered);
} }
} }
@ -2051,6 +2057,13 @@ void UPS_AI_ConvAgent_LipSyncComponent::ApplyTextVisemesToQueue()
} }
bTextVisemesApplied = true; bTextVisemesApplied = true;
const float FinalRatio = static_cast<float>(ActiveFrames) / static_cast<float>(SeqNum);
UE_LOG(LogPS_AI_ConvAgent_LipSync, Log,
TEXT("Applied %d syllable-visemes to %d active frames (of %d total) — %.1f frames/viseme (%.0fms/viseme)"),
SeqNum, ActiveFrames, VisemeQueue.Num(),
FinalRatio, FinalRatio * 32.0f);
} }
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
@ -2129,6 +2142,12 @@ void UPS_AI_ConvAgent_LipSyncComponent::BuildVisemeTimeline()
bVisemeTimelineActive = true; bVisemeTimelineActive = true;
bTextVisemesApplied = true; bTextVisemesApplied = true;
UE_LOG(LogPS_AI_ConvAgent_LipSync, Log,
TEXT("Built viseme timeline: %d entries (from %d, max %d), audio=%.0fms, scale=%.2f → %.0fms/viseme avg"),
FinalSequence.Num(), TextVisemeSequence.Num(), MaxVisemes,
AudioDurationSec * 1000.0f, Scale,
(FinalSequence.Num() > 0) ? (CursorSec * 1000.0f / FinalSequence.Num()) : 0.0f);
} }
void UPS_AI_ConvAgent_LipSyncComponent::AnalyzeSpectrum() void UPS_AI_ConvAgent_LipSyncComponent::AnalyzeSpectrum()
@ -2423,7 +2442,15 @@ void UPS_AI_ConvAgent_LipSyncComponent::ApplyMorphTargets()
// OverrideCurveValue() does NOT work because the AnimGraph resets curves // OverrideCurveValue() does NOT work because the AnimGraph resets curves
// before the Control Rig reads them. The custom AnimNode injects curves // before the Control Rig reads them. The custom AnimNode injects curves
// from within the evaluation pipeline where they persist. // from within the evaluation pipeline where they persist.
// Data is read by the AnimNode via GetCurrentBlendshapes(). static bool bLoggedOnce = false;
if (!bLoggedOnce)
{
UE_LOG(LogPS_AI_ConvAgent_LipSync, Log,
TEXT("MetaHuman curve mode: lip sync curves will be injected by the "
"PS AI ConvAgent Lip Sync AnimNode in the Face AnimBP. "
"Ensure the node is placed before the Control Rig in the AnimGraph."));
bLoggedOnce = true;
}
// Data is read by the AnimNode via GetCurrentBlendshapes() — nothing to do here. // Data is read by the AnimNode via GetCurrentBlendshapes() — nothing to do here.
} }
else else

View File

@ -66,6 +66,7 @@ void UPS_AI_ConvAgent_MicrophoneCaptureComponent::StartCapture()
AudioCapture.StartStream(); AudioCapture.StartStream();
bCapturing = true; bCapturing = true;
UE_LOG(LogPS_AI_ConvAgent_Mic, Log, TEXT("Audio capture started."));
} }
void UPS_AI_ConvAgent_MicrophoneCaptureComponent::StopCapture() void UPS_AI_ConvAgent_MicrophoneCaptureComponent::StopCapture()
@ -75,6 +76,7 @@ void UPS_AI_ConvAgent_MicrophoneCaptureComponent::StopCapture()
AudioCapture.StopStream(); AudioCapture.StopStream();
AudioCapture.CloseStream(); AudioCapture.CloseStream();
bCapturing = false; bCapturing = false;
UE_LOG(LogPS_AI_ConvAgent_Mic, Log, TEXT("Audio capture stopped."));
} }
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────

View File

@ -7,10 +7,26 @@
#include "IWebSocket.h" #include "IWebSocket.h"
#include "Json.h" #include "Json.h"
#include "JsonUtilities.h"
#include "Misc/Base64.h" #include "Misc/Base64.h"
DEFINE_LOG_CATEGORY_STATIC(LogPS_AI_ConvAgent_WS_ElevenLabs, Log, All); DEFINE_LOG_CATEGORY_STATIC(LogPS_AI_ConvAgent_WS_ElevenLabs, Log, All);
// ─────────────────────────────────────────────────────────────────────────────
// Helpers
// ─────────────────────────────────────────────────────────────────────────────
static void EL_LOG(bool bVerbose, const TCHAR* Format, ...)
{
if (!bVerbose) return;
va_list Args;
va_start(Args, Format);
// Forward to UE_LOG at Verbose level
TCHAR Buffer[2048];
FCString::GetVarArgs(Buffer, UE_ARRAY_COUNT(Buffer), Format, Args);
va_end(Args);
UE_LOG(LogPS_AI_ConvAgent_WS_ElevenLabs, Verbose, TEXT("%s"), Buffer);
}
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
// Connect / Disconnect // Connect / Disconnect
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
@ -128,7 +144,8 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::SendUserTurnStart()
bFirstAudioResponseLogged = false; bFirstAudioResponseLogged = false;
bAgentResponseStartedFired = false; bAgentResponseStartedFired = false;
// No log here — turn start is implicit from audio chunks following. const double T = FPlatformTime::Seconds() - SessionStartTime;
UE_LOG(LogPS_AI_ConvAgent_WS_ElevenLabs, Log, TEXT("[T+%.2fs] User turn started — mic open, audio chunks will follow."), T);
} }
void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::SendUserTurnEnd() void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::SendUserTurnEnd()
@ -144,7 +161,8 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::SendUserTurnEnd()
// in a loop: part arrives → event → StopListening → SendUserTurnEnd → flag reset → part arrives → loop. // in a loop: part arrives → event → StopListening → SendUserTurnEnd → flag reset → part arrives → loop.
// The flag is only reset in SendUserTurnStart() at the beginning of a new user turn. // The flag is only reset in SendUserTurnStart() at the beginning of a new user turn.
// No log here — turn end is tracked internally for latency measurement. const double T = UserTurnEndTime - SessionStartTime;
UE_LOG(LogPS_AI_ConvAgent_WS_ElevenLabs, Log, TEXT("[T+%.2fs] User turn ended — server VAD silence detection started (turn_timeout=1s)."), T);
} }
void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::SendTextMessage(const FString& Text) void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::SendTextMessage(const FString& Text)
@ -167,6 +185,8 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::SendInterrupt()
{ {
if (!IsConnected()) return; if (!IsConnected()) return;
UE_LOG(LogPS_AI_ConvAgent_WS_ElevenLabs, Log, TEXT("Sending interrupt."));
TSharedPtr<FJsonObject> Msg = MakeShareable(new FJsonObject()); TSharedPtr<FJsonObject> Msg = MakeShareable(new FJsonObject());
Msg->SetStringField(TEXT("type"), PS_AI_ConvAgent_MessageType_ElevenLabs::Interrupt); Msg->SetStringField(TEXT("type"), PS_AI_ConvAgent_MessageType_ElevenLabs::Interrupt);
SendJsonMessage(Msg); SendJsonMessage(Msg);
@ -257,13 +277,7 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::OnWsConnected()
FString InitJson; FString InitJson;
TSharedRef<TJsonWriter<>> InitWriter = TJsonWriterFactory<>::Create(&InitJson); TSharedRef<TJsonWriter<>> InitWriter = TJsonWriterFactory<>::Create(&InitJson);
FJsonSerializer::Serialize(InitMsg.ToSharedRef(), InitWriter); FJsonSerializer::Serialize(InitMsg.ToSharedRef(), InitWriter);
{ UE_LOG(LogPS_AI_ConvAgent_WS_ElevenLabs, Log, TEXT("Sending initiation: %s"), *InitJson);
const UPS_AI_ConvAgent_Settings_ElevenLabs* S = FPS_AI_ConvAgentModule::Get().GetSettings();
if (S->bVerboseLogging)
{
UE_LOG(LogPS_AI_ConvAgent_WS_ElevenLabs, Verbose, TEXT("Sending initiation: %s"), *InitJson);
}
}
WebSocket->Send(InitJson); WebSocket->Send(InitJson);
} }
@ -316,14 +330,8 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::OnWsMessage(const FString& Mess
return; return;
} }
// Message type log gated behind bVerboseLogging to avoid output spam. // Log every non-ping message type received from the server.
{ UE_LOG(LogPS_AI_ConvAgent_WS_ElevenLabs, Log, TEXT("Received message type: %s"), *MsgType);
const UPS_AI_ConvAgent_Settings_ElevenLabs* S = FPS_AI_ConvAgentModule::Get().GetSettings();
if (S->bVerboseLogging)
{
UE_LOG(LogPS_AI_ConvAgent_WS_ElevenLabs, Verbose, TEXT("Received: %s"), *MsgType);
}
}
if (MsgType == PS_AI_ConvAgent_MessageType_ElevenLabs::ConversationInitiation) if (MsgType == PS_AI_ConvAgent_MessageType_ElevenLabs::ConversationInitiation)
{ {
@ -331,22 +339,47 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::OnWsMessage(const FString& Mess
} }
else if (MsgType == PS_AI_ConvAgent_MessageType_ElevenLabs::AudioResponse) else if (MsgType == PS_AI_ConvAgent_MessageType_ElevenLabs::AudioResponse)
{ {
// Log time-to-first-audio: latency between end of user turn and first agent audio.
if (bWaitingForResponse && !bFirstAudioResponseLogged) if (bWaitingForResponse && !bFirstAudioResponseLogged)
{ {
const double Now = FPlatformTime::Seconds();
const double T = Now - SessionStartTime;
const double LatencyFromTurnEnd = (Now - UserTurnEndTime) * 1000.0;
const double LatencyFromLastChunk = (Now - LastAudioChunkSentTime) * 1000.0;
UE_LOG(LogPS_AI_ConvAgent_WS_ElevenLabs, Warning,
TEXT("[T+%.2fs] [LATENCY] First audio: %.0f ms after turn end (%.0f ms after last chunk)"),
T, LatencyFromTurnEnd, LatencyFromLastChunk);
bFirstAudioResponseLogged = true; bFirstAudioResponseLogged = true;
} }
HandleAudioResponse(Root); HandleAudioResponse(Root);
} }
else if (MsgType == PS_AI_ConvAgent_MessageType_ElevenLabs::UserTranscript) else if (MsgType == PS_AI_ConvAgent_MessageType_ElevenLabs::UserTranscript)
{ {
// Log transcription latency.
if (bWaitingForResponse) if (bWaitingForResponse)
{ {
const double Now = FPlatformTime::Seconds();
const double T = Now - SessionStartTime;
const double LatencyFromTurnEnd = (Now - UserTurnEndTime) * 1000.0;
UE_LOG(LogPS_AI_ConvAgent_WS_ElevenLabs, Warning,
TEXT("[T+%.2fs] [LATENCY] User transcript: %.0f ms after turn end"),
T, LatencyFromTurnEnd);
bWaitingForResponse = false; bWaitingForResponse = false;
} }
HandleTranscript(Root); HandleTranscript(Root);
} }
else if (MsgType == PS_AI_ConvAgent_MessageType_ElevenLabs::AgentResponse) else if (MsgType == PS_AI_ConvAgent_MessageType_ElevenLabs::AgentResponse)
{ {
// Log agent text response latency.
if (UserTurnEndTime > 0.0)
{
const double Now = FPlatformTime::Seconds();
const double T = Now - SessionStartTime;
const double LatencyFromTurnEnd = (Now - UserTurnEndTime) * 1000.0;
UE_LOG(LogPS_AI_ConvAgent_WS_ElevenLabs, Warning,
TEXT("[T+%.2fs] [LATENCY] Agent text response: %.0f ms after turn end"),
T, LatencyFromTurnEnd);
}
HandleAgentResponse(Root); HandleAgentResponse(Root);
} }
else if (MsgType == PS_AI_ConvAgent_MessageType_ElevenLabs::AgentChatResponsePart) else if (MsgType == PS_AI_ConvAgent_MessageType_ElevenLabs::AgentChatResponsePart)
@ -558,9 +591,18 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::HandleAgentChatResponsePart(con
// of the previous response). // of the previous response).
if (LastInterruptEventId > 0) if (LastInterruptEventId > 0)
{ {
UE_LOG(LogPS_AI_ConvAgent_WS_ElevenLabs, Log,
TEXT("New generation started — resetting LastInterruptEventId (was %d)."),
LastInterruptEventId);
LastInterruptEventId = 0; LastInterruptEventId = 0;
} }
const double Now = FPlatformTime::Seconds();
const double T = Now - SessionStartTime;
const double LatencyFromTurnEnd = UserTurnEndTime > 0.0 ? (Now - UserTurnEndTime) * 1000.0 : 0.0;
UE_LOG(LogPS_AI_ConvAgent_WS_ElevenLabs, Log,
TEXT("[T+%.2fs] Agent started generating (%.0f ms after turn end — includes VAD silence timeout + LLM start)."),
T, LatencyFromTurnEnd);
OnAgentResponseStarted.Broadcast(); OnAgentResponseStarted.Broadcast();
} }
@ -616,6 +658,7 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::HandleInterruption(const TShare
} }
} }
UE_LOG(LogPS_AI_ConvAgent_WS_ElevenLabs, Log, TEXT("Agent interrupted (server ack, LastInterruptEventId=%d)."), LastInterruptEventId);
OnInterrupted.Broadcast(); OnInterrupted.Broadcast();
} }
@ -654,6 +697,10 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::HandleClientToolCall(const TSha
} }
} }
const double T = FPlatformTime::Seconds() - SessionStartTime;
UE_LOG(LogPS_AI_ConvAgent_WS_ElevenLabs, Log, TEXT("[T+%.2fs] Client tool call: %s (id=%s, %d params)"),
T, *ToolCall.ToolName, *ToolCall.ToolCallId, ToolCall.Parameters.Num());
OnClientToolCall.Broadcast(ToolCall); OnClientToolCall.Broadcast(ToolCall);
} }
@ -667,6 +714,10 @@ void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::SendClientToolResult(const FStr
Msg->SetStringField(TEXT("result"), Result); Msg->SetStringField(TEXT("result"), Result);
Msg->SetBoolField(TEXT("is_error"), bIsError); Msg->SetBoolField(TEXT("is_error"), bIsError);
SendJsonMessage(Msg); SendJsonMessage(Msg);
const double T = FPlatformTime::Seconds() - SessionStartTime;
UE_LOG(LogPS_AI_ConvAgent_WS_ElevenLabs, Log, TEXT("[T+%.2fs] Sent client_tool_result for %s: %s (error=%s)"),
T, *ToolCallId, *Result, bIsError ? TEXT("true") : TEXT("false"));
} }
void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::HandlePing(const TSharedPtr<FJsonObject>& Root) void UPS_AI_ConvAgent_WebSocket_ElevenLabsProxy::HandlePing(const TSharedPtr<FJsonObject>& Root)

View File

@ -15,6 +15,14 @@ class PS_AI_CONVAGENT_API UPS_AI_ConvAgent_Settings_ElevenLabs : public UObject
GENERATED_BODY() GENERATED_BODY()
public: public:
UPS_AI_ConvAgent_Settings_ElevenLabs(const FObjectInitializer& ObjectInitializer)
: Super(ObjectInitializer)
{
API_Key = TEXT("");
AgentID = TEXT("");
bSignedURLMode = false;
}
/** /**
* ElevenLabs API key. * ElevenLabs API key.
* Obtain from https://elevenlabs.io used to authenticate WebSocket connections. * Obtain from https://elevenlabs.io used to authenticate WebSocket connections.
@ -30,6 +38,23 @@ public:
UPROPERTY(Config, EditAnywhere, Category = "PS AI ConvAgent|ElevenLabs API") UPROPERTY(Config, EditAnywhere, Category = "PS AI ConvAgent|ElevenLabs API")
FString AgentID; FString AgentID;
/**
* When true, the plugin fetches a signed WebSocket URL from your own backend
* before connecting, so the API key is never exposed in the client.
* Set SignedURLEndpoint to point to your server that returns the signed URL.
*/
UPROPERTY(Config, EditAnywhere, Category = "PS AI ConvAgent|ElevenLabs API|Security")
bool bSignedURLMode;
/**
* Your backend endpoint that returns a signed WebSocket URL for ElevenLabs.
* Only used when bSignedURLMode = true.
* Expected response body: { "signed_url": "wss://..." }
*/
UPROPERTY(Config, EditAnywhere, Category = "PS AI ConvAgent|ElevenLabs API|Security",
meta = (EditCondition = "bSignedURLMode"))
FString SignedURLEndpoint;
/** /**
* Override the ElevenLabs WebSocket base URL. Leave empty to use the default: * Override the ElevenLabs WebSocket base URL. Leave empty to use the default:
* wss://api.elevenlabs.io/v1/convai/conversation * wss://api.elevenlabs.io/v1/convai/conversation

View File

@ -36,6 +36,8 @@ namespace PS_AI_ConvAgent_MessageType_ElevenLabs
{ {
// Client → Server // Client → Server
static const FString AudioChunk = TEXT("user_audio_chunk"); static const FString AudioChunk = TEXT("user_audio_chunk");
// Client turn mode: signal user is currently active/speaking
static const FString UserActivity = TEXT("user_activity");
// Client turn mode: send a text message without audio // Client turn mode: send a text message without audio
static const FString UserMessage = TEXT("user_message"); static const FString UserMessage = TEXT("user_message");
static const FString Interrupt = TEXT("interrupt"); static const FString Interrupt = TEXT("interrupt");
@ -53,6 +55,7 @@ namespace PS_AI_ConvAgent_MessageType_ElevenLabs
static const FString InterruptionEvent = TEXT("interruption"); static const FString InterruptionEvent = TEXT("interruption");
static const FString PingEvent = TEXT("ping"); static const FString PingEvent = TEXT("ping");
static const FString ClientToolCall = TEXT("client_tool_call"); static const FString ClientToolCall = TEXT("client_tool_call");
static const FString InternalTentativeAgent = TEXT("internal_tentative_agent_response");
} }
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────

View File

@ -125,6 +125,11 @@ public:
meta = (ToolTip = "Auto-open the microphone when the conversation starts.\nOnly applies in Server VAD mode. In push-to-talk mode, call StartListening() manually.")) meta = (ToolTip = "Auto-open the microphone when the conversation starts.\nOnly applies in Server VAD mode. In push-to-talk mode, call StartListening() manually."))
bool bAutoStartListening = true; bool bAutoStartListening = true;
/** Let the LLM start generating a response during silence, before the VAD is fully confident the user has finished speaking. Saves 200-500ms of latency but may be unstable in long multi-turn sessions. Disabled by default. */
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|ElevenLabs|Latency",
meta = (ToolTip = "Speculative turn: the LLM begins generating during silence before full turn-end confidence.\nReduces latency by 200-500ms. May be unstable in long sessions — test before enabling in production."))
bool bSpeculativeTurn = false;
/** How many milliseconds of microphone audio to accumulate before sending a chunk to ElevenLabs. Lower values reduce latency but may degrade voice detection accuracy. Higher values are more reliable but add delay. */ /** How many milliseconds of microphone audio to accumulate before sending a chunk to ElevenLabs. Lower values reduce latency but may degrade voice detection accuracy. Higher values are more reliable but add delay. */
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|ElevenLabs|Latency", UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|ElevenLabs|Latency",
meta = (ClampMin = "20", ClampMax = "500", meta = (ClampMin = "20", ClampMax = "500",
@ -540,6 +545,10 @@ private:
TArray<uint8> AudioQueue; TArray<uint8> AudioQueue;
FCriticalSection AudioQueueLock; FCriticalSection AudioQueueLock;
// Reusable zero-filled buffer fed to USoundWaveProcedural during TTS gaps
// to keep the audio component alive (prevents stop on buffer underrun).
TArray<uint8> SilenceBuffer;
// Pre-buffer state: delay playback start to absorb TTS inter-chunk gaps. // Pre-buffer state: delay playback start to absorb TTS inter-chunk gaps.
bool bPreBuffering = false; bool bPreBuffering = false;
double PreBufferStartTime = 0.0; double PreBufferStartTime = 0.0;

View File

@ -4,6 +4,7 @@
#include "CoreMinimal.h" #include "CoreMinimal.h"
#include "Engine/DataAsset.h" #include "Engine/DataAsset.h"
#include "Engine/AssetManager.h"
#include "PS_AI_ConvAgent_Definitions.h" #include "PS_AI_ConvAgent_Definitions.h"
#include "PS_AI_ConvAgent_EmotionPoseMap.generated.h" #include "PS_AI_ConvAgent_EmotionPoseMap.generated.h"

View File

@ -210,6 +210,9 @@ private:
UPROPERTY() UPROPERTY()
UPS_AI_ConvAgent_MicrophoneCaptureComponent* MicComponent = nullptr; UPS_AI_ConvAgent_MicrophoneCaptureComponent* MicComponent = nullptr;
/** True while ForceSelectAgent is active (suppresses automatic re-evaluation for one frame). */
bool bForceSelectionActive = false;
// ── Posture timers ─────────────────────────────────────────────────────── // ── Posture timers ───────────────────────────────────────────────────────
FTimerHandle PostureAttachTimerHandle; FTimerHandle PostureAttachTimerHandle;

View File

@ -261,6 +261,9 @@ private:
// with fast attack (mouth opens quickly) and slow release (closes gradually). // with fast attack (mouth opens quickly) and slow release (closes gradually).
float AudioEnvelopeValue = 0.0f; float AudioEnvelopeValue = 0.0f;
// Whether we have pending analysis results to process
bool bHasPendingAnalysis = false;
// ── Text-driven lip sync ────────────────────────────────────────────────── // ── Text-driven lip sync ──────────────────────────────────────────────────
// Accumulated partial text from streaming (agent_chat_response_part events). // Accumulated partial text from streaming (agent_chat_response_part events).

View File

@ -4,6 +4,7 @@
#include "CoreMinimal.h" #include "CoreMinimal.h"
#include "Engine/DataAsset.h" #include "Engine/DataAsset.h"
#include "Engine/AssetManager.h"
#include "PS_AI_ConvAgent_LipSyncPoseMap.generated.h" #include "PS_AI_ConvAgent_LipSyncPoseMap.generated.h"
class UAnimSequence; class UAnimSequence;

View File

@ -261,4 +261,7 @@ public:
// Set by UPS_AI_ConvAgent_ElevenLabsComponent before calling Connect(). // Set by UPS_AI_ConvAgent_ElevenLabsComponent before calling Connect().
// Controls turn_timeout in conversation_initiation_client_data. // Controls turn_timeout in conversation_initiation_client_data.
EPS_AI_ConvAgent_TurnMode_ElevenLabs TurnMode = EPS_AI_ConvAgent_TurnMode_ElevenLabs::Server; EPS_AI_ConvAgent_TurnMode_ElevenLabs TurnMode = EPS_AI_ConvAgent_TurnMode_ElevenLabs::Server;
// Speculative turn: start LLM generation during silence before full turn confidence.
bool bSpeculativeTurn = true;
}; };