v2.3.0: Separate emotion data asset + code cleanup

- Create dedicated UElevenLabsEmotionPoseMap data asset for emotions
- FacialExpressionComponent now uses EmotionPoseMap (not LipSyncPoseMap)
- Remove emotion data (FElevenLabsEmotionPoseSet, EmotionPoses TMap) from LipSyncPoseMap
- Add ElevenLabsEmotionPoseMapFactory for Content Browser asset creation
- Standardize log format to [T+Xs] [Turn N] in ConversationalAgentComponent
- Remove unused #include "Engine/World.h"
- Simplify collision log to single line

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
j.foucher 2026-02-24 20:17:04 +01:00
parent f6541bd7e2
commit 88c175909e
11 changed files with 162 additions and 68 deletions

View File

@ -7,7 +7,6 @@
#include "Components/AudioComponent.h"
#include "Sound/SoundWaveProcedural.h"
#include "GameFramework/Actor.h"
#include "Engine/World.h"
DEFINE_LOG_CATEGORY_STATIC(LogElevenLabsAgent, Log, All);
@ -89,9 +88,10 @@ void UElevenLabsConversationalAgentComponent::TickComponent(float DeltaTime, ELe
if (Elapsed >= static_cast<double>(AudioPreBufferMs))
{
bPreBuffering = false;
const double Tpb = FPlatformTime::Seconds() - SessionStartTime;
UE_LOG(LogElevenLabsAgent, Log,
TEXT("[Turn %d] Pre-buffer timeout (%dms). Starting playback."),
LastClosedTurnIndex, AudioPreBufferMs);
TEXT("[T+%.2fs] [Turn %d] Pre-buffer timeout (%dms). Starting playback."),
Tpb, LastClosedTurnIndex, AudioPreBufferMs);
if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
{
AudioPlaybackComponent->Play();
@ -145,9 +145,10 @@ void UElevenLabsConversationalAgentComponent::TickComponent(float DeltaTime, ELe
{
if (bHardTimeoutFired)
{
const double Tht = FPlatformTime::Seconds() - SessionStartTime;
UE_LOG(LogElevenLabsAgent, Warning,
TEXT("[Turn %d] Agent silence hard-timeout (10s) without agent_response — declaring agent stopped."),
LastClosedTurnIndex);
TEXT("[T+%.2fs] [Turn %d] Agent silence hard-timeout (10s) without agent_response — declaring agent stopped."),
Tht, LastClosedTurnIndex);
}
OnAgentStoppedSpeaking.Broadcast();
}
@ -519,13 +520,10 @@ void UElevenLabsConversationalAgentComponent::HandleAgentResponseStarted()
}
else
{
// Collision: server started generating Turn N's response while Turn M (M>N) mic was open.
// Stop the mic WITHOUT flushing the accumulated audio buffer (see StopListening's
// bAgentGenerating guard). Flushing would send audio to a server that is mid-generation,
// causing it to re-enter "user speaking" state and stall — both sides stuck.
// Collision: server generating while mic was open — stop mic without flushing.
UE_LOG(LogElevenLabsAgent, Log,
TEXT("[T+%.2fs] [Turn %d → Turn %d collision] Agent generating Turn %d response — mic (Turn %d) was open, stopping. (%.2fs after turn end)"),
T, LastClosedTurnIndex, TurnIndex, LastClosedTurnIndex, TurnIndex, LatencyFromTurnEnd);
TEXT("[T+%.2fs] [Turn %d] Collision — mic was open, stopping. (%.2fs after turn end)"),
T, LastClosedTurnIndex, LatencyFromTurnEnd);
StopListening();
}
}
@ -736,9 +734,10 @@ void UElevenLabsConversationalAgentComponent::EnqueueAgentAudio(const TArray<uin
// chunk 1 finishes playing, eliminating mid-sentence pauses.
bPreBuffering = true;
PreBufferStartTime = FPlatformTime::Seconds();
const double Tpb2 = FPlatformTime::Seconds() - SessionStartTime;
UE_LOG(LogElevenLabsAgent, Log,
TEXT("[Turn %d] Pre-buffering %dms before starting playback."),
LastClosedTurnIndex, AudioPreBufferMs);
TEXT("[T+%.2fs] [Turn %d] Pre-buffering %dms before starting playback."),
Tpb2, LastClosedTurnIndex, AudioPreBufferMs);
}
else if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
{
@ -750,10 +749,12 @@ void UElevenLabsConversationalAgentComponent::EnqueueAgentAudio(const TArray<uin
// Second (or later) audio chunk arrived during pre-buffer period.
// We now have both chunks buffered — start playback immediately.
bPreBuffering = false;
const double BufferedMs = (FPlatformTime::Seconds() - PreBufferStartTime) * 1000.0;
const double NowPb = FPlatformTime::Seconds();
const double BufferedMs = (NowPb - PreBufferStartTime) * 1000.0;
const double Tpb3 = NowPb - SessionStartTime;
UE_LOG(LogElevenLabsAgent, Log,
TEXT("[Turn %d] Pre-buffer: second chunk arrived (%.0fms buffered). Starting playback."),
LastClosedTurnIndex, BufferedMs);
TEXT("[T+%.2fs] [Turn %d] Pre-buffer: second chunk arrived (%.0fms buffered). Starting playback."),
Tpb3, LastClosedTurnIndex, BufferedMs);
if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
{
AudioPlaybackComponent->Play();
@ -766,9 +767,10 @@ void UElevenLabsConversationalAgentComponent::EnqueueAgentAudio(const TArray<uin
// buffer underrun (TTS inter-batch gap). Restart it if needed.
if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
{
const double Tbr = FPlatformTime::Seconds() - SessionStartTime;
UE_LOG(LogElevenLabsAgent, Warning,
TEXT("[Turn %d] Audio component stopped during speech (buffer underrun). Restarting playback."),
LastClosedTurnIndex);
TEXT("[T+%.2fs] [Turn %d] Audio component stopped during speech (buffer underrun). Restarting playback."),
Tbr, LastClosedTurnIndex);
AudioPlaybackComponent->Play();
}
// Reset silence counter — new audio arrived, we're not in a gap anymore

View File

@ -2,7 +2,7 @@
#include "ElevenLabsFacialExpressionComponent.h"
#include "ElevenLabsConversationalAgentComponent.h"
#include "ElevenLabsLipSyncPoseMap.h"
#include "ElevenLabsEmotionPoseMap.h"
#include "Animation/AnimSequence.h"
#include "Animation/AnimData/IAnimationDataModel.h"
@ -52,8 +52,20 @@ void UElevenLabsFacialExpressionComponent::BeginPlay()
*Owner->GetName());
}
// Validate emotion poses from PoseMap
// Validate emotion poses from EmotionPoseMap
ValidateEmotionPoses();
// Auto-start the default emotion animation (Neutral) so the face
// is alive from the start (blinking, micro-movements, breathing)
// without waiting for the first set_emotion call.
ActiveAnim = FindAnimForEmotion(ActiveEmotion, ActiveEmotionIntensity);
if (ActiveAnim)
{
ActivePlaybackTime = 0.0f;
CrossfadeAlpha = 1.0f; // No crossfade needed on startup
UE_LOG(LogElevenLabsFacialExpr, Log,
TEXT("Auto-started default emotion anim: %s"), *ActiveAnim->GetName());
}
}
void UElevenLabsFacialExpressionComponent::EndPlay(const EEndPlayReason::Type EndPlayReason)
@ -73,15 +85,15 @@ void UElevenLabsFacialExpressionComponent::EndPlay(const EEndPlayReason::Type En
void UElevenLabsFacialExpressionComponent::ValidateEmotionPoses()
{
if (!PoseMap || PoseMap->EmotionPoses.Num() == 0)
if (!EmotionPoseMap || EmotionPoseMap->EmotionPoses.Num() == 0)
{
UE_LOG(LogElevenLabsFacialExpr, Log,
TEXT("No emotion poses assigned in PoseMap — facial expressions disabled."));
TEXT("No emotion poses assigned in EmotionPoseMap — facial expressions disabled."));
return;
}
int32 AnimCount = 0;
for (const auto& EmotionPair : PoseMap->EmotionPoses)
for (const auto& EmotionPair : EmotionPoseMap->EmotionPoses)
{
const FElevenLabsEmotionPoseSet& PoseSet = EmotionPair.Value;
if (PoseSet.Normal) ++AnimCount;
@ -91,7 +103,7 @@ void UElevenLabsFacialExpressionComponent::ValidateEmotionPoses()
UE_LOG(LogElevenLabsFacialExpr, Log,
TEXT("=== Emotion poses: %d emotions, %d anim slots available ==="),
PoseMap->EmotionPoses.Num(), AnimCount);
EmotionPoseMap->EmotionPoses.Num(), AnimCount);
}
// ─────────────────────────────────────────────────────────────────────────────
@ -101,9 +113,9 @@ void UElevenLabsFacialExpressionComponent::ValidateEmotionPoses()
UAnimSequence* UElevenLabsFacialExpressionComponent::FindAnimForEmotion(
EElevenLabsEmotion Emotion, EElevenLabsEmotionIntensity Intensity) const
{
if (!PoseMap) return nullptr;
if (!EmotionPoseMap) return nullptr;
const FElevenLabsEmotionPoseSet* PoseSet = PoseMap->EmotionPoses.Find(Emotion);
const FElevenLabsEmotionPoseSet* PoseSet = EmotionPoseMap->EmotionPoses.Find(Emotion);
if (!PoseSet) return nullptr;
// Direct match

View File

@ -0,0 +1,61 @@
// Copyright ASTERION. All Rights Reserved.
#pragma once
#include "CoreMinimal.h"
#include "Engine/DataAsset.h"
#include "Engine/AssetManager.h"
#include "ElevenLabsDefinitions.h"
#include "ElevenLabsEmotionPoseMap.generated.h"
class UAnimSequence;
// ─────────────────────────────────────────────────────────────────────────────
// Emotion pose set: 3 intensity levels (Normal / Medium / Extreme)
// ─────────────────────────────────────────────────────────────────────────────
USTRUCT(BlueprintType)
struct PS_AI_AGENT_ELEVENLABS_API FElevenLabsEmotionPoseSet
{
GENERATED_BODY()
/** Low intensity expression (subtle). E.g. MHF_Happy_N */
UPROPERTY(EditAnywhere, BlueprintReadWrite,
meta = (ToolTip = "Low intensity (Normal). E.g. MHF_Happy_N"))
TObjectPtr<UAnimSequence> Normal;
/** Medium intensity expression. E.g. MHF_Happy_M */
UPROPERTY(EditAnywhere, BlueprintReadWrite,
meta = (ToolTip = "Medium intensity. E.g. MHF_Happy_M"))
TObjectPtr<UAnimSequence> Medium;
/** High intensity expression (extreme). E.g. MHF_Happy_E */
UPROPERTY(EditAnywhere, BlueprintReadWrite,
meta = (ToolTip = "High intensity (Extreme). E.g. MHF_Happy_E"))
TObjectPtr<UAnimSequence> Extreme;
};
/**
* Reusable data asset that maps emotions to facial expression AnimSequences.
*
* Create ONE instance of this asset in the Content Browser
* (right-click Miscellaneous Data Asset ElevenLabsEmotionPoseMap),
* assign your emotion AnimSequences, then reference this asset
* on the ElevenLabs Facial Expression component.
*
* The component plays the AnimSequence in real-time (looping) to drive
* emotion-based facial expressions (eyes, eyebrows, cheeks, mouth mood).
* Lip sync overrides the mouth-area curves on top.
*/
UCLASS(BlueprintType, Blueprintable, DisplayName = "ElevenLabs Emotion Pose Map")
class PS_AI_AGENT_ELEVENLABS_API UElevenLabsEmotionPoseMap : public UPrimaryDataAsset
{
GENERATED_BODY()
public:
/** Map of emotions to their AnimSequence sets (Normal / Medium / Extreme).
* Add entries for each emotion your agent uses (Joy, Sadness, Anger, Surprise, Fear, Disgust).
* Neutral is recommended it plays by default at startup (blinking, breathing). */
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Emotion Poses",
meta = (ToolTip = "Emotion → AnimSequence mapping with 3 intensity levels.\nThese drive the base facial expression (eyes, brows, cheeks).\nLip sync overrides the mouth area on top."))
TMap<EElevenLabsEmotion, FElevenLabsEmotionPoseSet> EmotionPoses;
};

View File

@ -8,7 +8,7 @@
#include "ElevenLabsFacialExpressionComponent.generated.h"
class UElevenLabsConversationalAgentComponent;
class UElevenLabsLipSyncPoseMap;
class UElevenLabsEmotionPoseMap;
class UAnimSequence;
// ─────────────────────────────────────────────────────────────────────────────
@ -41,11 +41,11 @@ public:
// ── Configuration ─────────────────────────────────────────────────────────
/** Pose map asset containing emotion AnimSequences (Normal / Medium / Extreme per emotion).
* Can be the same PoseMap asset used by the LipSync component. */
/** Emotion pose map asset containing emotion AnimSequences (Normal / Medium / Extreme per emotion).
* Create a dedicated ElevenLabsEmotionPoseMap asset in the Content Browser. */
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|FacialExpression",
meta = (ToolTip = "Pose map with Emotion Poses filled in.\nCan be the same asset as the LipSync component."))
TObjectPtr<UElevenLabsLipSyncPoseMap> PoseMap;
meta = (ToolTip = "Dedicated Emotion Pose Map asset.\nRight-click Content Browser → Miscellaneous → ElevenLabs Emotion Pose Map."))
TObjectPtr<UElevenLabsEmotionPoseMap> EmotionPoseMap;
/** Emotion crossfade duration in seconds. */
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|FacialExpression",

View File

@ -5,35 +5,10 @@
#include "CoreMinimal.h"
#include "Engine/DataAsset.h"
#include "Engine/AssetManager.h"
#include "ElevenLabsDefinitions.h"
#include "ElevenLabsLipSyncPoseMap.generated.h"
class UAnimSequence;
// ─────────────────────────────────────────────────────────────────────────────
// Emotion pose set: 3 intensity levels (Normal / Medium / Extreme)
// ─────────────────────────────────────────────────────────────────────────────
USTRUCT(BlueprintType)
struct PS_AI_AGENT_ELEVENLABS_API FElevenLabsEmotionPoseSet
{
GENERATED_BODY()
/** Low intensity expression (subtle). E.g. MHF_Happy_N */
UPROPERTY(EditAnywhere, BlueprintReadWrite,
meta = (ToolTip = "Low intensity (Normal). E.g. MHF_Happy_N"))
TObjectPtr<UAnimSequence> Normal;
/** Medium intensity expression. E.g. MHF_Happy_M */
UPROPERTY(EditAnywhere, BlueprintReadWrite,
meta = (ToolTip = "Medium intensity. E.g. MHF_Happy_M"))
TObjectPtr<UAnimSequence> Medium;
/** High intensity expression (extreme). E.g. MHF_Happy_E */
UPROPERTY(EditAnywhere, BlueprintReadWrite,
meta = (ToolTip = "High intensity (Extreme). E.g. MHF_Happy_E"))
TObjectPtr<UAnimSequence> Extreme;
};
/**
* Reusable data asset that maps OVR visemes to phoneme pose AnimSequences.
*
@ -129,16 +104,4 @@ public:
meta = (ToolTip = "Close back vowel (OO). E.g. MHF_OU"))
TObjectPtr<UAnimSequence> PoseOU;
// ── Emotion Poses ────────────────────────────────────────────────────────
//
// Facial expression animations for each emotion, with 3 intensity levels.
// These are applied as a BASE layer (eyes, eyebrows, cheeks).
// Lip sync MODULATES on top, overriding only mouth-area curves.
/** Map of emotions to their pose sets (Normal / Medium / Extreme).
* Add entries for each emotion your agent uses (Joy, Sadness, Anger, Surprise, Fear, Disgust).
* Neutral is optional absence means no base expression. */
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Emotion Poses",
meta = (ToolTip = "Emotion → AnimSequence mapping with 3 intensity levels.\nThese drive the base facial expression (eyes, brows, cheeks).\nLip sync overrides the mouth area on top."))
TMap<EElevenLabsEmotion, FElevenLabsEmotionPoseSet> EmotionPoses;
};

View File

@ -0,0 +1,29 @@
// Copyright ASTERION. All Rights Reserved.
#include "ElevenLabsEmotionPoseMapFactory.h"
#include "ElevenLabsEmotionPoseMap.h"
#include "AssetTypeCategories.h"
UElevenLabsEmotionPoseMapFactory::UElevenLabsEmotionPoseMapFactory()
{
SupportedClass = UElevenLabsEmotionPoseMap::StaticClass();
bCreateNew = true;
bEditAfterNew = true;
}
UObject* UElevenLabsEmotionPoseMapFactory::FactoryCreateNew(
UClass* Class, UObject* InParent, FName Name, EObjectFlags Flags,
UObject* Context, FFeedbackContext* Warn)
{
return NewObject<UElevenLabsEmotionPoseMap>(InParent, Class, Name, Flags);
}
FText UElevenLabsEmotionPoseMapFactory::GetDisplayName() const
{
return FText::FromString(TEXT("ElevenLabs Emotion Pose Map"));
}
uint32 UElevenLabsEmotionPoseMapFactory::GetMenuCategories() const
{
return EAssetTypeCategories::Misc;
}

View File

@ -0,0 +1,27 @@
// Copyright ASTERION. All Rights Reserved.
#pragma once
#include "CoreMinimal.h"
#include "Factories/Factory.h"
#include "ElevenLabsEmotionPoseMapFactory.generated.h"
/**
* Factory that lets users create ElevenLabsEmotionPoseMap assets
* directly from the Content Browser (right-click Miscellaneous).
*/
UCLASS()
class UElevenLabsEmotionPoseMapFactory : public UFactory
{
GENERATED_BODY()
public:
UElevenLabsEmotionPoseMapFactory();
virtual UObject* FactoryCreateNew(UClass* Class, UObject* InParent,
FName Name, EObjectFlags Flags, UObject* Context,
FFeedbackContext* Warn) override;
virtual FText GetDisplayName() const override;
virtual uint32 GetMenuCategories() const override;
};