Add local VAD and conversation-driven NPC pause

MicrophoneCaptureComponent:
- Local Voice Activity Detection (RMS-based, independent of ElevenLabs)
- Configurable threshold, onset time, silence time
- bIsUserSpeaking flag + OnUserVoiceActivityChanged delegate
- Hysteresis prevents flickering between speech/silence

AIController gaze bridge:
- Resolve MicComponent from player Pawn (not NPC) via reflection
- ConversationPaused BB key blocks movement branches via BT decorator
- NPC stops only when user actually speaks (not just on proximity connect)
- NPC resumes when conversation disconnects
- Spline PauseFollowing/ResumeFollowing on conversation start/end

BT setup required:
- Add Blackboard Condition (ConversationPaused Is Not Set, Aborts=Both)
  on spline and patrol branches

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
j.foucher 2026-04-02 16:22:21 +02:00
parent 90bee03b44
commit c32aba9902
29 changed files with 228 additions and 8 deletions

View File

@ -100,6 +100,9 @@ void APS_AI_Behavior_AIController::OnUnPossess()
ClearGazeTarget();
CachedGazeComponent = nullptr;
CachedConvAgentComponent = nullptr;
CachedMicComponent = nullptr;
bConversationPaused = false;
bUserHasSpokenInConversation = false;
ProximityGazeTarget = nullptr;
PersonalityComp = nullptr;
@ -199,6 +202,12 @@ void APS_AI_Behavior_AIController::SetupBlackboard()
PreferCoverEntry.EntryName = PS_AI_Behavior_BB::PreferCover;
PreferCoverEntry.KeyType = NewObject<UBlackboardKeyType_Bool>(BlackboardAsset);
BlackboardAsset->Keys.Add(PreferCoverEntry);
// ConversationPaused (bool: NPC paused during active conversation)
FBlackboardEntry ConvPausedEntry;
ConvPausedEntry.EntryName = PS_AI_Behavior_BB::ConversationPaused;
ConvPausedEntry.KeyType = NewObject<UBlackboardKeyType_Bool>(BlackboardAsset);
BlackboardAsset->Keys.Add(ConvPausedEntry);
}
UBlackboardComponent* RawBBComp = nullptr;
@ -536,12 +545,29 @@ void APS_AI_Behavior_AIController::TryBindGazeComponent()
}
}
// Cache MicrophoneCaptureComponent for local VAD
// The mic lives on the PLAYER's Pawn, not the NPC — find it on the first player
static UClass* MicClass = nullptr;
if (!MicClass)
{
MicClass = LoadClass<UActorComponent>(nullptr,
TEXT("/Script/PS_AI_ConvAgent.PS_AI_ConvAgent_MicrophoneCaptureComponent"));
}
if (MicClass)
{
MicProp_bIsUserSpeaking = CastField<FBoolProperty>(
MicClass->FindPropertyByName(TEXT("bIsUserSpeaking")));
// Don't cache the component here — resolve dynamically in UpdateGazeTarget
// because the player Pawn may not exist yet at OnPossess time
}
UE_LOG(LogPS_AI_Behavior, Log,
TEXT("[%s] Gaze bridge bound: TargetActor=%s, BodyTracking=%s, Conversation=%s"),
TEXT("[%s] Gaze bridge bound: TargetActor=%s, BodyTracking=%s, Conversation=%s, VAD=%s"),
*GetName(),
GazeProp_TargetActor ? TEXT("OK") : TEXT("MISS"),
GazeProp_bEnableBodyTracking ? TEXT("OK") : TEXT("MISS"),
ConvProp_bNetIsConversing ? TEXT("OK") : TEXT("N/A"));
ConvProp_bNetIsConversing ? TEXT("OK") : TEXT("N/A"),
MicProp_bIsUserSpeaking ? TEXT("OK") : TEXT("N/A"));
}
void APS_AI_Behavior_AIController::SetGazeTarget(AActor* Target, bool bEnableBody)
@ -628,13 +654,117 @@ void APS_AI_Behavior_AIController::UpdateGazeTarget(float DeltaSeconds)
}
}
// ── Priority 2: Conversation active → do NOT touch gaze ─────────
// TODO: Add movement pause when local VAD detects user speech (not just connection)
if (IsConversationActiveOnPawn())
// ── Priority 2: Conversation active → manage movement pause via local VAD
{
ProximityGazeTarget = nullptr;
ProximityGazeDuration = 0.0f;
return;
const bool bConversing = IsConversationActiveOnPawn();
// Check local VAD: is the user actually speaking?
// The mic is on the PLAYER's Pawn — find it dynamically
bool bUserSpeaking = false;
if (MicProp_bIsUserSpeaking)
{
// Try cached mic first
UActorComponent* MicComp = CachedMicComponent.Get();
if (!MicComp)
{
// Find mic on any player pawn
static UClass* MicClass = LoadClass<UActorComponent>(nullptr,
TEXT("/Script/PS_AI_ConvAgent.PS_AI_ConvAgent_MicrophoneCaptureComponent"));
if (MicClass)
{
for (FConstPlayerControllerIterator It = GetWorld()->GetPlayerControllerIterator(); It; ++It)
{
if (APlayerController* PC = It->Get())
{
if (APawn* PlayerPawn = PC->GetPawn())
{
MicComp = PlayerPawn->FindComponentByClass(MicClass);
if (MicComp)
{
CachedMicComponent = MicComp;
break;
}
}
}
}
}
}
if (MicComp)
{
bUserSpeaking = MicProp_bIsUserSpeaking->GetPropertyValue_InContainer(MicComp);
}
}
// Once user has spoken during this conversation, stay paused until conversation ends
if (bConversing && bUserSpeaking)
{
bUserHasSpokenInConversation = true;
}
if (bConversing && bUserHasSpokenInConversation)
{
// Set BB flag to block movement branches via decorator
if (!bConversationPaused)
{
bConversationPaused = true;
if (Blackboard)
{
Blackboard->SetValueAsBool(PS_AI_Behavior_BB::ConversationPaused, true);
}
StopMovement();
APawn* ConvPawn = GetPawn();
if (ConvPawn)
{
auto* Spline = ConvPawn->FindComponentByClass<UPS_AI_Behavior_SplineFollowerComponent>();
if (Spline)
{
Spline->PauseFollowing();
}
}
UE_LOG(LogPS_AI_Behavior, Log, TEXT("[%s] User speaking — conversation pause."), *GetName());
}
ProximityGazeTarget = nullptr;
ProximityGazeDuration = 0.0f;
return; // Gaze managed by ConvAgent
}
if (bConversing)
{
// Connected but user hasn't spoken yet — don't touch gaze, don't pause
ProximityGazeTarget = nullptr;
ProximityGazeDuration = 0.0f;
return;
}
// Conversation ended — clear BB flag, resume movement
if (bConversationPaused)
{
bConversationPaused = false;
bUserHasSpokenInConversation = false;
if (Blackboard)
{
Blackboard->SetValueAsBool(PS_AI_Behavior_BB::ConversationPaused, false);
}
APawn* ConvPawn = GetPawn();
if (ConvPawn)
{
if (auto* Spline = ConvPawn->FindComponentByClass<UPS_AI_Behavior_SplineFollowerComponent>())
{
Spline->ResumeFollowing();
}
}
UE_LOG(LogPS_AI_Behavior, Log, TEXT("[%s] Conversation ended — resumed."), *GetName());
}
}
// ── Priority 3/4: Proximity gaze ────────────────────────────────
@ -750,3 +880,4 @@ void APS_AI_Behavior_AIController::UpdateGazeTarget(float DeltaSeconds)
}
#endif
}

View File

@ -162,6 +162,14 @@ private:
TWeakObjectPtr<UActorComponent> CachedConvAgentComponent;
FBoolProperty* ConvProp_bNetIsConversing = nullptr;
// Mic VAD cache (local voice activity detection)
TWeakObjectPtr<UActorComponent> CachedMicComponent;
FBoolProperty* MicProp_bIsUserSpeaking = nullptr;
// Conversation pause state
bool bConversationPaused = false;
bool bUserHasSpokenInConversation = false;
// Proximity gaze state
TWeakObjectPtr<AActor> ProximityGazeTarget;
float ProximityGazeDuration = 0.0f;

View File

@ -188,4 +188,5 @@ namespace PS_AI_Behavior_BB
inline const FName LastKnownTargetPosition = TEXT("LastKnownTargetPosition");
inline const FName ThreatPawnName = TEXT("ThreatPawnName"); // Debug: name of the owning Pawn behind ThreatActor
inline const FName PreferCover = TEXT("PreferCover"); // Bool: personality-driven cover preference cycle
inline const FName ConversationPaused = TEXT("ConversationPaused"); // Bool: NPC paused during active conversation
}

View File

@ -198,6 +198,54 @@ void UPS_AI_ConvAgent_MicrophoneCaptureComponent::OnAudioGenerate(
PeakLevel.store(Peak, std::memory_order_relaxed);
}
// ── Local Voice Activity Detection ──────────────────────────────────
// Runs on the audio thread for minimal latency. Uses hysteresis to
// prevent flickering. Dispatches state change to game thread.
{
const float Rms = CurrentRMS.load(std::memory_order_relaxed);
const float BufferDuration = static_cast<float>(NumFrames) / FMath::Max(1, InSampleRate);
const bool bAboveThreshold = (Rms > VoiceActivityThreshold);
if (bAboveThreshold)
{
VoiceSilenceAccumulator.store(0.0f, std::memory_order_relaxed);
const float NewOnset = VoiceOnsetAccumulator.load(std::memory_order_relaxed) + BufferDuration;
VoiceOnsetAccumulator.store(NewOnset, std::memory_order_relaxed);
if (!bIsUserSpeaking && NewOnset >= VoiceOnsetTime)
{
bIsUserSpeaking = true;
TWeakObjectPtr<UPS_AI_ConvAgent_MicrophoneCaptureComponent> WeakSelf(this);
AsyncTask(ENamedThreads::GameThread, [WeakSelf]()
{
if (auto* Self = WeakSelf.Get())
{
Self->OnUserVoiceActivityChanged.Broadcast(true);
}
});
}
}
else
{
VoiceOnsetAccumulator.store(0.0f, std::memory_order_relaxed);
const float NewSilence = VoiceSilenceAccumulator.load(std::memory_order_relaxed) + BufferDuration;
VoiceSilenceAccumulator.store(NewSilence, std::memory_order_relaxed);
if (bIsUserSpeaking && NewSilence >= VoiceSilenceTime)
{
bIsUserSpeaking = false;
TWeakObjectPtr<UPS_AI_ConvAgent_MicrophoneCaptureComponent> WeakSelf(this);
AsyncTask(ENamedThreads::GameThread, [WeakSelf]()
{
if (auto* Self = WeakSelf.Get())
{
Self->OnUserVoiceActivityChanged.Broadcast(false);
}
});
}
}
}
// Resample + downmix to 16000 Hz mono.
TArray<float> Resampled = ResampleTo16000(FloatAudio, NumFrames, InNumChannels, InSampleRate);

View File

@ -11,6 +11,9 @@
// Delivers captured float PCM samples (16000 Hz mono, resampled from device rate).
DECLARE_MULTICAST_DELEGATE_OneParam(FOnPS_AI_ConvAgent_AudioCaptured, const TArray<float>& /*FloatPCM*/);
// Fired when local voice activity changes (user starts/stops speaking).
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnPS_AI_ConvAgent_VoiceActivityChanged, bool, bIsSpeaking);
/**
* Lightweight microphone capture component.
* Captures from the default audio input device, resamples to 16000 Hz mono,
@ -46,6 +49,31 @@ public:
* Set by the agent component for echo suppression (skip mic while agent speaks). */
std::atomic<bool>* EchoSuppressFlag = nullptr;
// ── Local Voice Activity Detection ──────────────────────────────────
/** RMS threshold above which audio is considered voice. Adjust based on mic sensitivity and environment noise. */
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|Voice Activity",
meta = (ClampMin = "0.001", ClampMax = "0.2"))
float VoiceActivityThreshold = 0.015f;
/** Time (seconds) RMS must stay above threshold before declaring speech onset. Prevents false triggers from clicks/bumps. */
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|Voice Activity",
meta = (ClampMin = "0.01", ClampMax = "1.0"))
float VoiceOnsetTime = 0.1f;
/** Time (seconds) RMS must stay below threshold before declaring speech ended. Prevents cutting off between words. */
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|Voice Activity",
meta = (ClampMin = "0.1", ClampMax = "3.0"))
float VoiceSilenceTime = 0.5f;
/** Whether the user is currently speaking (local VAD, independent of ElevenLabs server). */
UPROPERTY(BlueprintReadOnly, Category = "PS AI ConvAgent|Voice Activity")
bool bIsUserSpeaking = false;
/** Fired when voice activity changes. Use to pause NPC movement, trigger animations, etc. */
UPROPERTY(BlueprintAssignable, Category = "PS AI ConvAgent|Voice Activity")
FOnPS_AI_ConvAgent_VoiceActivityChanged OnUserVoiceActivityChanged;
// ── Debug ────────────────────────────────────────────────────────────────
/** Enable debug logging for this component.
@ -99,6 +127,10 @@ private:
std::atomic<float> CurrentRMS{0.0f};
std::atomic<float> PeakLevel{0.0f};
// VAD accumulators (written from audio callback thread)
std::atomic<float> VoiceOnsetAccumulator{0.0f};
std::atomic<float> VoiceSilenceAccumulator{0.0f};
// Device name cached on StartCapture for HUD display.
FString CachedDeviceName;
};