Add local VAD and conversation-driven NPC pause
MicrophoneCaptureComponent: - Local Voice Activity Detection (RMS-based, independent of ElevenLabs) - Configurable threshold, onset time, silence time - bIsUserSpeaking flag + OnUserVoiceActivityChanged delegate - Hysteresis prevents flickering between speech/silence AIController gaze bridge: - Resolve MicComponent from player Pawn (not NPC) via reflection - ConversationPaused BB key blocks movement branches via BT decorator - NPC stops only when user actually speaks (not just on proximity connect) - NPC resumes when conversation disconnects - Spline PauseFollowing/ResumeFollowing on conversation start/end BT setup required: - Add Blackboard Condition (ConversationPaused Is Not Set, Aborts=Both) on spline and patrol branches Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
90bee03b44
commit
c32aba9902
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -100,6 +100,9 @@ void APS_AI_Behavior_AIController::OnUnPossess()
|
|||||||
ClearGazeTarget();
|
ClearGazeTarget();
|
||||||
CachedGazeComponent = nullptr;
|
CachedGazeComponent = nullptr;
|
||||||
CachedConvAgentComponent = nullptr;
|
CachedConvAgentComponent = nullptr;
|
||||||
|
CachedMicComponent = nullptr;
|
||||||
|
bConversationPaused = false;
|
||||||
|
bUserHasSpokenInConversation = false;
|
||||||
ProximityGazeTarget = nullptr;
|
ProximityGazeTarget = nullptr;
|
||||||
|
|
||||||
PersonalityComp = nullptr;
|
PersonalityComp = nullptr;
|
||||||
@ -199,6 +202,12 @@ void APS_AI_Behavior_AIController::SetupBlackboard()
|
|||||||
PreferCoverEntry.EntryName = PS_AI_Behavior_BB::PreferCover;
|
PreferCoverEntry.EntryName = PS_AI_Behavior_BB::PreferCover;
|
||||||
PreferCoverEntry.KeyType = NewObject<UBlackboardKeyType_Bool>(BlackboardAsset);
|
PreferCoverEntry.KeyType = NewObject<UBlackboardKeyType_Bool>(BlackboardAsset);
|
||||||
BlackboardAsset->Keys.Add(PreferCoverEntry);
|
BlackboardAsset->Keys.Add(PreferCoverEntry);
|
||||||
|
|
||||||
|
// ConversationPaused (bool: NPC paused during active conversation)
|
||||||
|
FBlackboardEntry ConvPausedEntry;
|
||||||
|
ConvPausedEntry.EntryName = PS_AI_Behavior_BB::ConversationPaused;
|
||||||
|
ConvPausedEntry.KeyType = NewObject<UBlackboardKeyType_Bool>(BlackboardAsset);
|
||||||
|
BlackboardAsset->Keys.Add(ConvPausedEntry);
|
||||||
}
|
}
|
||||||
|
|
||||||
UBlackboardComponent* RawBBComp = nullptr;
|
UBlackboardComponent* RawBBComp = nullptr;
|
||||||
@ -536,12 +545,29 @@ void APS_AI_Behavior_AIController::TryBindGazeComponent()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cache MicrophoneCaptureComponent for local VAD
|
||||||
|
// The mic lives on the PLAYER's Pawn, not the NPC — find it on the first player
|
||||||
|
static UClass* MicClass = nullptr;
|
||||||
|
if (!MicClass)
|
||||||
|
{
|
||||||
|
MicClass = LoadClass<UActorComponent>(nullptr,
|
||||||
|
TEXT("/Script/PS_AI_ConvAgent.PS_AI_ConvAgent_MicrophoneCaptureComponent"));
|
||||||
|
}
|
||||||
|
if (MicClass)
|
||||||
|
{
|
||||||
|
MicProp_bIsUserSpeaking = CastField<FBoolProperty>(
|
||||||
|
MicClass->FindPropertyByName(TEXT("bIsUserSpeaking")));
|
||||||
|
// Don't cache the component here — resolve dynamically in UpdateGazeTarget
|
||||||
|
// because the player Pawn may not exist yet at OnPossess time
|
||||||
|
}
|
||||||
|
|
||||||
UE_LOG(LogPS_AI_Behavior, Log,
|
UE_LOG(LogPS_AI_Behavior, Log,
|
||||||
TEXT("[%s] Gaze bridge bound: TargetActor=%s, BodyTracking=%s, Conversation=%s"),
|
TEXT("[%s] Gaze bridge bound: TargetActor=%s, BodyTracking=%s, Conversation=%s, VAD=%s"),
|
||||||
*GetName(),
|
*GetName(),
|
||||||
GazeProp_TargetActor ? TEXT("OK") : TEXT("MISS"),
|
GazeProp_TargetActor ? TEXT("OK") : TEXT("MISS"),
|
||||||
GazeProp_bEnableBodyTracking ? TEXT("OK") : TEXT("MISS"),
|
GazeProp_bEnableBodyTracking ? TEXT("OK") : TEXT("MISS"),
|
||||||
ConvProp_bNetIsConversing ? TEXT("OK") : TEXT("N/A"));
|
ConvProp_bNetIsConversing ? TEXT("OK") : TEXT("N/A"),
|
||||||
|
MicProp_bIsUserSpeaking ? TEXT("OK") : TEXT("N/A"));
|
||||||
}
|
}
|
||||||
|
|
||||||
void APS_AI_Behavior_AIController::SetGazeTarget(AActor* Target, bool bEnableBody)
|
void APS_AI_Behavior_AIController::SetGazeTarget(AActor* Target, bool bEnableBody)
|
||||||
@ -628,13 +654,117 @@ void APS_AI_Behavior_AIController::UpdateGazeTarget(float DeltaSeconds)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Priority 2: Conversation active → do NOT touch gaze ─────────
|
// ── Priority 2: Conversation active → manage movement pause via local VAD
|
||||||
// TODO: Add movement pause when local VAD detects user speech (not just connection)
|
|
||||||
if (IsConversationActiveOnPawn())
|
|
||||||
{
|
{
|
||||||
ProximityGazeTarget = nullptr;
|
const bool bConversing = IsConversationActiveOnPawn();
|
||||||
ProximityGazeDuration = 0.0f;
|
|
||||||
return;
|
|
||||||
|
// Check local VAD: is the user actually speaking?
|
||||||
|
// The mic is on the PLAYER's Pawn — find it dynamically
|
||||||
|
bool bUserSpeaking = false;
|
||||||
|
if (MicProp_bIsUserSpeaking)
|
||||||
|
{
|
||||||
|
// Try cached mic first
|
||||||
|
UActorComponent* MicComp = CachedMicComponent.Get();
|
||||||
|
if (!MicComp)
|
||||||
|
{
|
||||||
|
// Find mic on any player pawn
|
||||||
|
static UClass* MicClass = LoadClass<UActorComponent>(nullptr,
|
||||||
|
TEXT("/Script/PS_AI_ConvAgent.PS_AI_ConvAgent_MicrophoneCaptureComponent"));
|
||||||
|
if (MicClass)
|
||||||
|
{
|
||||||
|
for (FConstPlayerControllerIterator It = GetWorld()->GetPlayerControllerIterator(); It; ++It)
|
||||||
|
{
|
||||||
|
if (APlayerController* PC = It->Get())
|
||||||
|
{
|
||||||
|
if (APawn* PlayerPawn = PC->GetPawn())
|
||||||
|
{
|
||||||
|
MicComp = PlayerPawn->FindComponentByClass(MicClass);
|
||||||
|
if (MicComp)
|
||||||
|
{
|
||||||
|
CachedMicComponent = MicComp;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (MicComp)
|
||||||
|
{
|
||||||
|
bUserSpeaking = MicProp_bIsUserSpeaking->GetPropertyValue_InContainer(MicComp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Once user has spoken during this conversation, stay paused until conversation ends
|
||||||
|
if (bConversing && bUserSpeaking)
|
||||||
|
{
|
||||||
|
bUserHasSpokenInConversation = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bConversing && bUserHasSpokenInConversation)
|
||||||
|
{
|
||||||
|
// Set BB flag to block movement branches via decorator
|
||||||
|
if (!bConversationPaused)
|
||||||
|
{
|
||||||
|
bConversationPaused = true;
|
||||||
|
|
||||||
|
if (Blackboard)
|
||||||
|
{
|
||||||
|
Blackboard->SetValueAsBool(PS_AI_Behavior_BB::ConversationPaused, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
StopMovement();
|
||||||
|
|
||||||
|
APawn* ConvPawn = GetPawn();
|
||||||
|
if (ConvPawn)
|
||||||
|
{
|
||||||
|
auto* Spline = ConvPawn->FindComponentByClass<UPS_AI_Behavior_SplineFollowerComponent>();
|
||||||
|
if (Spline)
|
||||||
|
{
|
||||||
|
Spline->PauseFollowing();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
UE_LOG(LogPS_AI_Behavior, Log, TEXT("[%s] User speaking — conversation pause."), *GetName());
|
||||||
|
}
|
||||||
|
|
||||||
|
ProximityGazeTarget = nullptr;
|
||||||
|
ProximityGazeDuration = 0.0f;
|
||||||
|
return; // Gaze managed by ConvAgent
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bConversing)
|
||||||
|
{
|
||||||
|
// Connected but user hasn't spoken yet — don't touch gaze, don't pause
|
||||||
|
ProximityGazeTarget = nullptr;
|
||||||
|
ProximityGazeDuration = 0.0f;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Conversation ended — clear BB flag, resume movement
|
||||||
|
if (bConversationPaused)
|
||||||
|
{
|
||||||
|
bConversationPaused = false;
|
||||||
|
bUserHasSpokenInConversation = false;
|
||||||
|
|
||||||
|
if (Blackboard)
|
||||||
|
{
|
||||||
|
Blackboard->SetValueAsBool(PS_AI_Behavior_BB::ConversationPaused, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
APawn* ConvPawn = GetPawn();
|
||||||
|
if (ConvPawn)
|
||||||
|
{
|
||||||
|
if (auto* Spline = ConvPawn->FindComponentByClass<UPS_AI_Behavior_SplineFollowerComponent>())
|
||||||
|
{
|
||||||
|
Spline->ResumeFollowing();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
UE_LOG(LogPS_AI_Behavior, Log, TEXT("[%s] Conversation ended — resumed."), *GetName());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Priority 3/4: Proximity gaze ────────────────────────────────
|
// ── Priority 3/4: Proximity gaze ────────────────────────────────
|
||||||
@ -750,3 +880,4 @@ void APS_AI_Behavior_AIController::UpdateGazeTarget(float DeltaSeconds)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -162,6 +162,14 @@ private:
|
|||||||
TWeakObjectPtr<UActorComponent> CachedConvAgentComponent;
|
TWeakObjectPtr<UActorComponent> CachedConvAgentComponent;
|
||||||
FBoolProperty* ConvProp_bNetIsConversing = nullptr;
|
FBoolProperty* ConvProp_bNetIsConversing = nullptr;
|
||||||
|
|
||||||
|
// Mic VAD cache (local voice activity detection)
|
||||||
|
TWeakObjectPtr<UActorComponent> CachedMicComponent;
|
||||||
|
FBoolProperty* MicProp_bIsUserSpeaking = nullptr;
|
||||||
|
|
||||||
|
// Conversation pause state
|
||||||
|
bool bConversationPaused = false;
|
||||||
|
bool bUserHasSpokenInConversation = false;
|
||||||
|
|
||||||
// Proximity gaze state
|
// Proximity gaze state
|
||||||
TWeakObjectPtr<AActor> ProximityGazeTarget;
|
TWeakObjectPtr<AActor> ProximityGazeTarget;
|
||||||
float ProximityGazeDuration = 0.0f;
|
float ProximityGazeDuration = 0.0f;
|
||||||
|
|||||||
@ -188,4 +188,5 @@ namespace PS_AI_Behavior_BB
|
|||||||
inline const FName LastKnownTargetPosition = TEXT("LastKnownTargetPosition");
|
inline const FName LastKnownTargetPosition = TEXT("LastKnownTargetPosition");
|
||||||
inline const FName ThreatPawnName = TEXT("ThreatPawnName"); // Debug: name of the owning Pawn behind ThreatActor
|
inline const FName ThreatPawnName = TEXT("ThreatPawnName"); // Debug: name of the owning Pawn behind ThreatActor
|
||||||
inline const FName PreferCover = TEXT("PreferCover"); // Bool: personality-driven cover preference cycle
|
inline const FName PreferCover = TEXT("PreferCover"); // Bool: personality-driven cover preference cycle
|
||||||
|
inline const FName ConversationPaused = TEXT("ConversationPaused"); // Bool: NPC paused during active conversation
|
||||||
}
|
}
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -198,6 +198,54 @@ void UPS_AI_ConvAgent_MicrophoneCaptureComponent::OnAudioGenerate(
|
|||||||
PeakLevel.store(Peak, std::memory_order_relaxed);
|
PeakLevel.store(Peak, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Local Voice Activity Detection ──────────────────────────────────
|
||||||
|
// Runs on the audio thread for minimal latency. Uses hysteresis to
|
||||||
|
// prevent flickering. Dispatches state change to game thread.
|
||||||
|
{
|
||||||
|
const float Rms = CurrentRMS.load(std::memory_order_relaxed);
|
||||||
|
const float BufferDuration = static_cast<float>(NumFrames) / FMath::Max(1, InSampleRate);
|
||||||
|
const bool bAboveThreshold = (Rms > VoiceActivityThreshold);
|
||||||
|
|
||||||
|
if (bAboveThreshold)
|
||||||
|
{
|
||||||
|
VoiceSilenceAccumulator.store(0.0f, std::memory_order_relaxed);
|
||||||
|
const float NewOnset = VoiceOnsetAccumulator.load(std::memory_order_relaxed) + BufferDuration;
|
||||||
|
VoiceOnsetAccumulator.store(NewOnset, std::memory_order_relaxed);
|
||||||
|
|
||||||
|
if (!bIsUserSpeaking && NewOnset >= VoiceOnsetTime)
|
||||||
|
{
|
||||||
|
bIsUserSpeaking = true;
|
||||||
|
TWeakObjectPtr<UPS_AI_ConvAgent_MicrophoneCaptureComponent> WeakSelf(this);
|
||||||
|
AsyncTask(ENamedThreads::GameThread, [WeakSelf]()
|
||||||
|
{
|
||||||
|
if (auto* Self = WeakSelf.Get())
|
||||||
|
{
|
||||||
|
Self->OnUserVoiceActivityChanged.Broadcast(true);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
VoiceOnsetAccumulator.store(0.0f, std::memory_order_relaxed);
|
||||||
|
const float NewSilence = VoiceSilenceAccumulator.load(std::memory_order_relaxed) + BufferDuration;
|
||||||
|
VoiceSilenceAccumulator.store(NewSilence, std::memory_order_relaxed);
|
||||||
|
|
||||||
|
if (bIsUserSpeaking && NewSilence >= VoiceSilenceTime)
|
||||||
|
{
|
||||||
|
bIsUserSpeaking = false;
|
||||||
|
TWeakObjectPtr<UPS_AI_ConvAgent_MicrophoneCaptureComponent> WeakSelf(this);
|
||||||
|
AsyncTask(ENamedThreads::GameThread, [WeakSelf]()
|
||||||
|
{
|
||||||
|
if (auto* Self = WeakSelf.Get())
|
||||||
|
{
|
||||||
|
Self->OnUserVoiceActivityChanged.Broadcast(false);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Resample + downmix to 16000 Hz mono.
|
// Resample + downmix to 16000 Hz mono.
|
||||||
TArray<float> Resampled = ResampleTo16000(FloatAudio, NumFrames, InNumChannels, InSampleRate);
|
TArray<float> Resampled = ResampleTo16000(FloatAudio, NumFrames, InNumChannels, InSampleRate);
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,9 @@
|
|||||||
// Delivers captured float PCM samples (16000 Hz mono, resampled from device rate).
|
// Delivers captured float PCM samples (16000 Hz mono, resampled from device rate).
|
||||||
DECLARE_MULTICAST_DELEGATE_OneParam(FOnPS_AI_ConvAgent_AudioCaptured, const TArray<float>& /*FloatPCM*/);
|
DECLARE_MULTICAST_DELEGATE_OneParam(FOnPS_AI_ConvAgent_AudioCaptured, const TArray<float>& /*FloatPCM*/);
|
||||||
|
|
||||||
|
// Fired when local voice activity changes (user starts/stops speaking).
|
||||||
|
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnPS_AI_ConvAgent_VoiceActivityChanged, bool, bIsSpeaking);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lightweight microphone capture component.
|
* Lightweight microphone capture component.
|
||||||
* Captures from the default audio input device, resamples to 16000 Hz mono,
|
* Captures from the default audio input device, resamples to 16000 Hz mono,
|
||||||
@ -46,6 +49,31 @@ public:
|
|||||||
* Set by the agent component for echo suppression (skip mic while agent speaks). */
|
* Set by the agent component for echo suppression (skip mic while agent speaks). */
|
||||||
std::atomic<bool>* EchoSuppressFlag = nullptr;
|
std::atomic<bool>* EchoSuppressFlag = nullptr;
|
||||||
|
|
||||||
|
// ── Local Voice Activity Detection ──────────────────────────────────
|
||||||
|
|
||||||
|
/** RMS threshold above which audio is considered voice. Adjust based on mic sensitivity and environment noise. */
|
||||||
|
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|Voice Activity",
|
||||||
|
meta = (ClampMin = "0.001", ClampMax = "0.2"))
|
||||||
|
float VoiceActivityThreshold = 0.015f;
|
||||||
|
|
||||||
|
/** Time (seconds) RMS must stay above threshold before declaring speech onset. Prevents false triggers from clicks/bumps. */
|
||||||
|
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|Voice Activity",
|
||||||
|
meta = (ClampMin = "0.01", ClampMax = "1.0"))
|
||||||
|
float VoiceOnsetTime = 0.1f;
|
||||||
|
|
||||||
|
/** Time (seconds) RMS must stay below threshold before declaring speech ended. Prevents cutting off between words. */
|
||||||
|
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "PS AI ConvAgent|Voice Activity",
|
||||||
|
meta = (ClampMin = "0.1", ClampMax = "3.0"))
|
||||||
|
float VoiceSilenceTime = 0.5f;
|
||||||
|
|
||||||
|
/** Whether the user is currently speaking (local VAD, independent of ElevenLabs server). */
|
||||||
|
UPROPERTY(BlueprintReadOnly, Category = "PS AI ConvAgent|Voice Activity")
|
||||||
|
bool bIsUserSpeaking = false;
|
||||||
|
|
||||||
|
/** Fired when voice activity changes. Use to pause NPC movement, trigger animations, etc. */
|
||||||
|
UPROPERTY(BlueprintAssignable, Category = "PS AI ConvAgent|Voice Activity")
|
||||||
|
FOnPS_AI_ConvAgent_VoiceActivityChanged OnUserVoiceActivityChanged;
|
||||||
|
|
||||||
// ── Debug ────────────────────────────────────────────────────────────────
|
// ── Debug ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
/** Enable debug logging for this component.
|
/** Enable debug logging for this component.
|
||||||
@ -99,6 +127,10 @@ private:
|
|||||||
std::atomic<float> CurrentRMS{0.0f};
|
std::atomic<float> CurrentRMS{0.0f};
|
||||||
std::atomic<float> PeakLevel{0.0f};
|
std::atomic<float> PeakLevel{0.0f};
|
||||||
|
|
||||||
|
// VAD accumulators (written from audio callback thread)
|
||||||
|
std::atomic<float> VoiceOnsetAccumulator{0.0f};
|
||||||
|
std::atomic<float> VoiceSilenceAccumulator{0.0f};
|
||||||
|
|
||||||
// Device name cached on StartCapture for HUD display.
|
// Device name cached on StartCapture for HUD display.
|
||||||
FString CachedDeviceName;
|
FString CachedDeviceName;
|
||||||
};
|
};
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user