v2.3.0: Separate emotion data asset + code cleanup
- Create dedicated UElevenLabsEmotionPoseMap data asset for emotions - FacialExpressionComponent now uses EmotionPoseMap (not LipSyncPoseMap) - Remove emotion data (FElevenLabsEmotionPoseSet, EmotionPoses TMap) from LipSyncPoseMap - Add ElevenLabsEmotionPoseMapFactory for Content Browser asset creation - Standardize log format to [T+Xs] [Turn N] in ConversationalAgentComponent - Remove unused #include "Engine/World.h" - Simplify collision log to single line Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
f6541bd7e2
commit
88c175909e
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
Unreal/PS_AI_Agent/Content/NewElevenLabsEmotionPoseMap.uasset
Normal file
BIN
Unreal/PS_AI_Agent/Content/NewElevenLabsEmotionPoseMap.uasset
Normal file
Binary file not shown.
@ -7,7 +7,6 @@
|
||||
#include "Components/AudioComponent.h"
|
||||
#include "Sound/SoundWaveProcedural.h"
|
||||
#include "GameFramework/Actor.h"
|
||||
#include "Engine/World.h"
|
||||
|
||||
DEFINE_LOG_CATEGORY_STATIC(LogElevenLabsAgent, Log, All);
|
||||
|
||||
@ -89,9 +88,10 @@ void UElevenLabsConversationalAgentComponent::TickComponent(float DeltaTime, ELe
|
||||
if (Elapsed >= static_cast<double>(AudioPreBufferMs))
|
||||
{
|
||||
bPreBuffering = false;
|
||||
const double Tpb = FPlatformTime::Seconds() - SessionStartTime;
|
||||
UE_LOG(LogElevenLabsAgent, Log,
|
||||
TEXT("[Turn %d] Pre-buffer timeout (%dms). Starting playback."),
|
||||
LastClosedTurnIndex, AudioPreBufferMs);
|
||||
TEXT("[T+%.2fs] [Turn %d] Pre-buffer timeout (%dms). Starting playback."),
|
||||
Tpb, LastClosedTurnIndex, AudioPreBufferMs);
|
||||
if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
|
||||
{
|
||||
AudioPlaybackComponent->Play();
|
||||
@ -145,9 +145,10 @@ void UElevenLabsConversationalAgentComponent::TickComponent(float DeltaTime, ELe
|
||||
{
|
||||
if (bHardTimeoutFired)
|
||||
{
|
||||
const double Tht = FPlatformTime::Seconds() - SessionStartTime;
|
||||
UE_LOG(LogElevenLabsAgent, Warning,
|
||||
TEXT("[Turn %d] Agent silence hard-timeout (10s) without agent_response — declaring agent stopped."),
|
||||
LastClosedTurnIndex);
|
||||
TEXT("[T+%.2fs] [Turn %d] Agent silence hard-timeout (10s) without agent_response — declaring agent stopped."),
|
||||
Tht, LastClosedTurnIndex);
|
||||
}
|
||||
OnAgentStoppedSpeaking.Broadcast();
|
||||
}
|
||||
@ -519,13 +520,10 @@ void UElevenLabsConversationalAgentComponent::HandleAgentResponseStarted()
|
||||
}
|
||||
else
|
||||
{
|
||||
// Collision: server started generating Turn N's response while Turn M (M>N) mic was open.
|
||||
// Stop the mic WITHOUT flushing the accumulated audio buffer (see StopListening's
|
||||
// bAgentGenerating guard). Flushing would send audio to a server that is mid-generation,
|
||||
// causing it to re-enter "user speaking" state and stall — both sides stuck.
|
||||
// Collision: server generating while mic was open — stop mic without flushing.
|
||||
UE_LOG(LogElevenLabsAgent, Log,
|
||||
TEXT("[T+%.2fs] [Turn %d → Turn %d collision] Agent generating Turn %d response — mic (Turn %d) was open, stopping. (%.2fs after turn end)"),
|
||||
T, LastClosedTurnIndex, TurnIndex, LastClosedTurnIndex, TurnIndex, LatencyFromTurnEnd);
|
||||
TEXT("[T+%.2fs] [Turn %d] Collision — mic was open, stopping. (%.2fs after turn end)"),
|
||||
T, LastClosedTurnIndex, LatencyFromTurnEnd);
|
||||
StopListening();
|
||||
}
|
||||
}
|
||||
@ -736,9 +734,10 @@ void UElevenLabsConversationalAgentComponent::EnqueueAgentAudio(const TArray<uin
|
||||
// chunk 1 finishes playing, eliminating mid-sentence pauses.
|
||||
bPreBuffering = true;
|
||||
PreBufferStartTime = FPlatformTime::Seconds();
|
||||
const double Tpb2 = FPlatformTime::Seconds() - SessionStartTime;
|
||||
UE_LOG(LogElevenLabsAgent, Log,
|
||||
TEXT("[Turn %d] Pre-buffering %dms before starting playback."),
|
||||
LastClosedTurnIndex, AudioPreBufferMs);
|
||||
TEXT("[T+%.2fs] [Turn %d] Pre-buffering %dms before starting playback."),
|
||||
Tpb2, LastClosedTurnIndex, AudioPreBufferMs);
|
||||
}
|
||||
else if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
|
||||
{
|
||||
@ -750,10 +749,12 @@ void UElevenLabsConversationalAgentComponent::EnqueueAgentAudio(const TArray<uin
|
||||
// Second (or later) audio chunk arrived during pre-buffer period.
|
||||
// We now have both chunks buffered — start playback immediately.
|
||||
bPreBuffering = false;
|
||||
const double BufferedMs = (FPlatformTime::Seconds() - PreBufferStartTime) * 1000.0;
|
||||
const double NowPb = FPlatformTime::Seconds();
|
||||
const double BufferedMs = (NowPb - PreBufferStartTime) * 1000.0;
|
||||
const double Tpb3 = NowPb - SessionStartTime;
|
||||
UE_LOG(LogElevenLabsAgent, Log,
|
||||
TEXT("[Turn %d] Pre-buffer: second chunk arrived (%.0fms buffered). Starting playback."),
|
||||
LastClosedTurnIndex, BufferedMs);
|
||||
TEXT("[T+%.2fs] [Turn %d] Pre-buffer: second chunk arrived (%.0fms buffered). Starting playback."),
|
||||
Tpb3, LastClosedTurnIndex, BufferedMs);
|
||||
if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
|
||||
{
|
||||
AudioPlaybackComponent->Play();
|
||||
@ -766,9 +767,10 @@ void UElevenLabsConversationalAgentComponent::EnqueueAgentAudio(const TArray<uin
|
||||
// buffer underrun (TTS inter-batch gap). Restart it if needed.
|
||||
if (AudioPlaybackComponent && !AudioPlaybackComponent->IsPlaying())
|
||||
{
|
||||
const double Tbr = FPlatformTime::Seconds() - SessionStartTime;
|
||||
UE_LOG(LogElevenLabsAgent, Warning,
|
||||
TEXT("[Turn %d] Audio component stopped during speech (buffer underrun). Restarting playback."),
|
||||
LastClosedTurnIndex);
|
||||
TEXT("[T+%.2fs] [Turn %d] Audio component stopped during speech (buffer underrun). Restarting playback."),
|
||||
Tbr, LastClosedTurnIndex);
|
||||
AudioPlaybackComponent->Play();
|
||||
}
|
||||
// Reset silence counter — new audio arrived, we're not in a gap anymore
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
#include "ElevenLabsFacialExpressionComponent.h"
|
||||
#include "ElevenLabsConversationalAgentComponent.h"
|
||||
#include "ElevenLabsLipSyncPoseMap.h"
|
||||
#include "ElevenLabsEmotionPoseMap.h"
|
||||
#include "Animation/AnimSequence.h"
|
||||
#include "Animation/AnimData/IAnimationDataModel.h"
|
||||
|
||||
@ -52,8 +52,20 @@ void UElevenLabsFacialExpressionComponent::BeginPlay()
|
||||
*Owner->GetName());
|
||||
}
|
||||
|
||||
// Validate emotion poses from PoseMap
|
||||
// Validate emotion poses from EmotionPoseMap
|
||||
ValidateEmotionPoses();
|
||||
|
||||
// Auto-start the default emotion animation (Neutral) so the face
|
||||
// is alive from the start (blinking, micro-movements, breathing)
|
||||
// without waiting for the first set_emotion call.
|
||||
ActiveAnim = FindAnimForEmotion(ActiveEmotion, ActiveEmotionIntensity);
|
||||
if (ActiveAnim)
|
||||
{
|
||||
ActivePlaybackTime = 0.0f;
|
||||
CrossfadeAlpha = 1.0f; // No crossfade needed on startup
|
||||
UE_LOG(LogElevenLabsFacialExpr, Log,
|
||||
TEXT("Auto-started default emotion anim: %s"), *ActiveAnim->GetName());
|
||||
}
|
||||
}
|
||||
|
||||
void UElevenLabsFacialExpressionComponent::EndPlay(const EEndPlayReason::Type EndPlayReason)
|
||||
@ -73,15 +85,15 @@ void UElevenLabsFacialExpressionComponent::EndPlay(const EEndPlayReason::Type En
|
||||
|
||||
void UElevenLabsFacialExpressionComponent::ValidateEmotionPoses()
|
||||
{
|
||||
if (!PoseMap || PoseMap->EmotionPoses.Num() == 0)
|
||||
if (!EmotionPoseMap || EmotionPoseMap->EmotionPoses.Num() == 0)
|
||||
{
|
||||
UE_LOG(LogElevenLabsFacialExpr, Log,
|
||||
TEXT("No emotion poses assigned in PoseMap — facial expressions disabled."));
|
||||
TEXT("No emotion poses assigned in EmotionPoseMap — facial expressions disabled."));
|
||||
return;
|
||||
}
|
||||
|
||||
int32 AnimCount = 0;
|
||||
for (const auto& EmotionPair : PoseMap->EmotionPoses)
|
||||
for (const auto& EmotionPair : EmotionPoseMap->EmotionPoses)
|
||||
{
|
||||
const FElevenLabsEmotionPoseSet& PoseSet = EmotionPair.Value;
|
||||
if (PoseSet.Normal) ++AnimCount;
|
||||
@ -91,7 +103,7 @@ void UElevenLabsFacialExpressionComponent::ValidateEmotionPoses()
|
||||
|
||||
UE_LOG(LogElevenLabsFacialExpr, Log,
|
||||
TEXT("=== Emotion poses: %d emotions, %d anim slots available ==="),
|
||||
PoseMap->EmotionPoses.Num(), AnimCount);
|
||||
EmotionPoseMap->EmotionPoses.Num(), AnimCount);
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
@ -101,9 +113,9 @@ void UElevenLabsFacialExpressionComponent::ValidateEmotionPoses()
|
||||
UAnimSequence* UElevenLabsFacialExpressionComponent::FindAnimForEmotion(
|
||||
EElevenLabsEmotion Emotion, EElevenLabsEmotionIntensity Intensity) const
|
||||
{
|
||||
if (!PoseMap) return nullptr;
|
||||
if (!EmotionPoseMap) return nullptr;
|
||||
|
||||
const FElevenLabsEmotionPoseSet* PoseSet = PoseMap->EmotionPoses.Find(Emotion);
|
||||
const FElevenLabsEmotionPoseSet* PoseSet = EmotionPoseMap->EmotionPoses.Find(Emotion);
|
||||
if (!PoseSet) return nullptr;
|
||||
|
||||
// Direct match
|
||||
|
||||
@ -0,0 +1,61 @@
|
||||
// Copyright ASTERION. All Rights Reserved.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "CoreMinimal.h"
|
||||
#include "Engine/DataAsset.h"
|
||||
#include "Engine/AssetManager.h"
|
||||
#include "ElevenLabsDefinitions.h"
|
||||
#include "ElevenLabsEmotionPoseMap.generated.h"
|
||||
|
||||
class UAnimSequence;
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Emotion pose set: 3 intensity levels (Normal / Medium / Extreme)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
USTRUCT(BlueprintType)
|
||||
struct PS_AI_AGENT_ELEVENLABS_API FElevenLabsEmotionPoseSet
|
||||
{
|
||||
GENERATED_BODY()
|
||||
|
||||
/** Low intensity expression (subtle). E.g. MHF_Happy_N */
|
||||
UPROPERTY(EditAnywhere, BlueprintReadWrite,
|
||||
meta = (ToolTip = "Low intensity (Normal). E.g. MHF_Happy_N"))
|
||||
TObjectPtr<UAnimSequence> Normal;
|
||||
|
||||
/** Medium intensity expression. E.g. MHF_Happy_M */
|
||||
UPROPERTY(EditAnywhere, BlueprintReadWrite,
|
||||
meta = (ToolTip = "Medium intensity. E.g. MHF_Happy_M"))
|
||||
TObjectPtr<UAnimSequence> Medium;
|
||||
|
||||
/** High intensity expression (extreme). E.g. MHF_Happy_E */
|
||||
UPROPERTY(EditAnywhere, BlueprintReadWrite,
|
||||
meta = (ToolTip = "High intensity (Extreme). E.g. MHF_Happy_E"))
|
||||
TObjectPtr<UAnimSequence> Extreme;
|
||||
};
|
||||
|
||||
/**
|
||||
* Reusable data asset that maps emotions to facial expression AnimSequences.
|
||||
*
|
||||
* Create ONE instance of this asset in the Content Browser
|
||||
* (right-click → Miscellaneous → Data Asset → ElevenLabsEmotionPoseMap),
|
||||
* assign your emotion AnimSequences, then reference this asset
|
||||
* on the ElevenLabs Facial Expression component.
|
||||
*
|
||||
* The component plays the AnimSequence in real-time (looping) to drive
|
||||
* emotion-based facial expressions (eyes, eyebrows, cheeks, mouth mood).
|
||||
* Lip sync overrides the mouth-area curves on top.
|
||||
*/
|
||||
UCLASS(BlueprintType, Blueprintable, DisplayName = "ElevenLabs Emotion Pose Map")
|
||||
class PS_AI_AGENT_ELEVENLABS_API UElevenLabsEmotionPoseMap : public UPrimaryDataAsset
|
||||
{
|
||||
GENERATED_BODY()
|
||||
|
||||
public:
|
||||
/** Map of emotions to their AnimSequence sets (Normal / Medium / Extreme).
|
||||
* Add entries for each emotion your agent uses (Joy, Sadness, Anger, Surprise, Fear, Disgust).
|
||||
* Neutral is recommended — it plays by default at startup (blinking, breathing). */
|
||||
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Emotion Poses",
|
||||
meta = (ToolTip = "Emotion → AnimSequence mapping with 3 intensity levels.\nThese drive the base facial expression (eyes, brows, cheeks).\nLip sync overrides the mouth area on top."))
|
||||
TMap<EElevenLabsEmotion, FElevenLabsEmotionPoseSet> EmotionPoses;
|
||||
};
|
||||
@ -8,7 +8,7 @@
|
||||
#include "ElevenLabsFacialExpressionComponent.generated.h"
|
||||
|
||||
class UElevenLabsConversationalAgentComponent;
|
||||
class UElevenLabsLipSyncPoseMap;
|
||||
class UElevenLabsEmotionPoseMap;
|
||||
class UAnimSequence;
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
@ -41,11 +41,11 @@ public:
|
||||
|
||||
// ── Configuration ─────────────────────────────────────────────────────────
|
||||
|
||||
/** Pose map asset containing emotion AnimSequences (Normal / Medium / Extreme per emotion).
|
||||
* Can be the same PoseMap asset used by the LipSync component. */
|
||||
/** Emotion pose map asset containing emotion AnimSequences (Normal / Medium / Extreme per emotion).
|
||||
* Create a dedicated ElevenLabsEmotionPoseMap asset in the Content Browser. */
|
||||
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|FacialExpression",
|
||||
meta = (ToolTip = "Pose map with Emotion Poses filled in.\nCan be the same asset as the LipSync component."))
|
||||
TObjectPtr<UElevenLabsLipSyncPoseMap> PoseMap;
|
||||
meta = (ToolTip = "Dedicated Emotion Pose Map asset.\nRight-click Content Browser → Miscellaneous → ElevenLabs Emotion Pose Map."))
|
||||
TObjectPtr<UElevenLabsEmotionPoseMap> EmotionPoseMap;
|
||||
|
||||
/** Emotion crossfade duration in seconds. */
|
||||
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|FacialExpression",
|
||||
|
||||
@ -5,35 +5,10 @@
|
||||
#include "CoreMinimal.h"
|
||||
#include "Engine/DataAsset.h"
|
||||
#include "Engine/AssetManager.h"
|
||||
#include "ElevenLabsDefinitions.h"
|
||||
#include "ElevenLabsLipSyncPoseMap.generated.h"
|
||||
|
||||
class UAnimSequence;
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Emotion pose set: 3 intensity levels (Normal / Medium / Extreme)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
USTRUCT(BlueprintType)
|
||||
struct PS_AI_AGENT_ELEVENLABS_API FElevenLabsEmotionPoseSet
|
||||
{
|
||||
GENERATED_BODY()
|
||||
|
||||
/** Low intensity expression (subtle). E.g. MHF_Happy_N */
|
||||
UPROPERTY(EditAnywhere, BlueprintReadWrite,
|
||||
meta = (ToolTip = "Low intensity (Normal). E.g. MHF_Happy_N"))
|
||||
TObjectPtr<UAnimSequence> Normal;
|
||||
|
||||
/** Medium intensity expression. E.g. MHF_Happy_M */
|
||||
UPROPERTY(EditAnywhere, BlueprintReadWrite,
|
||||
meta = (ToolTip = "Medium intensity. E.g. MHF_Happy_M"))
|
||||
TObjectPtr<UAnimSequence> Medium;
|
||||
|
||||
/** High intensity expression (extreme). E.g. MHF_Happy_E */
|
||||
UPROPERTY(EditAnywhere, BlueprintReadWrite,
|
||||
meta = (ToolTip = "High intensity (Extreme). E.g. MHF_Happy_E"))
|
||||
TObjectPtr<UAnimSequence> Extreme;
|
||||
};
|
||||
|
||||
/**
|
||||
* Reusable data asset that maps OVR visemes to phoneme pose AnimSequences.
|
||||
*
|
||||
@ -129,16 +104,4 @@ public:
|
||||
meta = (ToolTip = "Close back vowel (OO). E.g. MHF_OU"))
|
||||
TObjectPtr<UAnimSequence> PoseOU;
|
||||
|
||||
// ── Emotion Poses ────────────────────────────────────────────────────────
|
||||
//
|
||||
// Facial expression animations for each emotion, with 3 intensity levels.
|
||||
// These are applied as a BASE layer (eyes, eyebrows, cheeks).
|
||||
// Lip sync MODULATES on top, overriding only mouth-area curves.
|
||||
|
||||
/** Map of emotions to their pose sets (Normal / Medium / Extreme).
|
||||
* Add entries for each emotion your agent uses (Joy, Sadness, Anger, Surprise, Fear, Disgust).
|
||||
* Neutral is optional — absence means no base expression. */
|
||||
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Emotion Poses",
|
||||
meta = (ToolTip = "Emotion → AnimSequence mapping with 3 intensity levels.\nThese drive the base facial expression (eyes, brows, cheeks).\nLip sync overrides the mouth area on top."))
|
||||
TMap<EElevenLabsEmotion, FElevenLabsEmotionPoseSet> EmotionPoses;
|
||||
};
|
||||
|
||||
@ -0,0 +1,29 @@
|
||||
// Copyright ASTERION. All Rights Reserved.
|
||||
|
||||
#include "ElevenLabsEmotionPoseMapFactory.h"
|
||||
#include "ElevenLabsEmotionPoseMap.h"
|
||||
#include "AssetTypeCategories.h"
|
||||
|
||||
UElevenLabsEmotionPoseMapFactory::UElevenLabsEmotionPoseMapFactory()
|
||||
{
|
||||
SupportedClass = UElevenLabsEmotionPoseMap::StaticClass();
|
||||
bCreateNew = true;
|
||||
bEditAfterNew = true;
|
||||
}
|
||||
|
||||
UObject* UElevenLabsEmotionPoseMapFactory::FactoryCreateNew(
|
||||
UClass* Class, UObject* InParent, FName Name, EObjectFlags Flags,
|
||||
UObject* Context, FFeedbackContext* Warn)
|
||||
{
|
||||
return NewObject<UElevenLabsEmotionPoseMap>(InParent, Class, Name, Flags);
|
||||
}
|
||||
|
||||
FText UElevenLabsEmotionPoseMapFactory::GetDisplayName() const
|
||||
{
|
||||
return FText::FromString(TEXT("ElevenLabs Emotion Pose Map"));
|
||||
}
|
||||
|
||||
uint32 UElevenLabsEmotionPoseMapFactory::GetMenuCategories() const
|
||||
{
|
||||
return EAssetTypeCategories::Misc;
|
||||
}
|
||||
@ -0,0 +1,27 @@
|
||||
// Copyright ASTERION. All Rights Reserved.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "CoreMinimal.h"
|
||||
#include "Factories/Factory.h"
|
||||
#include "ElevenLabsEmotionPoseMapFactory.generated.h"
|
||||
|
||||
/**
|
||||
* Factory that lets users create ElevenLabsEmotionPoseMap assets
|
||||
* directly from the Content Browser (right-click → Miscellaneous).
|
||||
*/
|
||||
UCLASS()
|
||||
class UElevenLabsEmotionPoseMapFactory : public UFactory
|
||||
{
|
||||
GENERATED_BODY()
|
||||
|
||||
public:
|
||||
UElevenLabsEmotionPoseMapFactory();
|
||||
|
||||
virtual UObject* FactoryCreateNew(UClass* Class, UObject* InParent,
|
||||
FName Name, EObjectFlags Flags, UObject* Context,
|
||||
FFeedbackContext* Warn) override;
|
||||
|
||||
virtual FText GetDisplayName() const override;
|
||||
virtual uint32 GetMenuCategories() const override;
|
||||
};
|
||||
Loading…
x
Reference in New Issue
Block a user