WIP: Emotion facial expressions + client tool support

- ElevenLabs client tool call/result WebSocket support (set_emotion)
- EElevenLabsEmotion + EElevenLabsEmotionIntensity enums
- Emotion poses in PoseMap data asset (Normal/Medium/Extreme per emotion)
- Standalone ElevenLabsFacialExpressionComponent (separated from LipSync)
- Two-layer architecture: emotion base (eyes/brows/cheeks) + lip sync on top (mouth)
- Smooth emotion blending (~500ms configurable transitions)
- LipSync reads emotion curves from FacialExpressionComponent via GetCurrentEmotionCurves()

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
j.foucher 2026-02-24 18:08:23 +01:00
parent e57be0a1d9
commit f57bb65297
11 changed files with 675 additions and 1 deletions

View File

@ -179,6 +179,8 @@ void UElevenLabsConversationalAgentComponent::StartConversation()
&UElevenLabsConversationalAgentComponent::HandleAgentResponseStarted);
WebSocketProxy->OnAgentResponsePart.AddDynamic(this,
&UElevenLabsConversationalAgentComponent::HandleAgentResponsePart);
WebSocketProxy->OnClientToolCall.AddDynamic(this,
&UElevenLabsConversationalAgentComponent::HandleClientToolCall);
}
// Pass configuration to the proxy before connecting.
@ -429,6 +431,8 @@ void UElevenLabsConversationalAgentComponent::HandleDisconnected(int32 StatusCod
GeneratingTickCount = 0;
TurnIndex = 0;
LastClosedTurnIndex = 0;
CurrentEmotion = EElevenLabsEmotion::Neutral;
CurrentEmotionIntensity = EElevenLabsEmotionIntensity::Medium;
{
FScopeLock Lock(&MicSendLock);
MicAccumulationBuffer.Reset();
@ -540,6 +544,77 @@ void UElevenLabsConversationalAgentComponent::HandleAgentResponsePart(const FStr
}
}
void UElevenLabsConversationalAgentComponent::HandleClientToolCall(const FElevenLabsClientToolCall& ToolCall)
{
// Built-in handler for the "set_emotion" tool: parse emotion + intensity, auto-respond, broadcast.
if (ToolCall.ToolName == TEXT("set_emotion"))
{
// Parse emotion
EElevenLabsEmotion NewEmotion = EElevenLabsEmotion::Neutral;
const FString* EmotionStr = ToolCall.Parameters.Find(TEXT("emotion"));
if (EmotionStr)
{
const FString Lower = EmotionStr->ToLower();
if (Lower == TEXT("joy") || Lower == TEXT("happy") || Lower == TEXT("happiness"))
NewEmotion = EElevenLabsEmotion::Joy;
else if (Lower == TEXT("sadness") || Lower == TEXT("sad"))
NewEmotion = EElevenLabsEmotion::Sadness;
else if (Lower == TEXT("anger") || Lower == TEXT("angry"))
NewEmotion = EElevenLabsEmotion::Anger;
else if (Lower == TEXT("surprise") || Lower == TEXT("surprised"))
NewEmotion = EElevenLabsEmotion::Surprise;
else if (Lower == TEXT("fear") || Lower == TEXT("afraid") || Lower == TEXT("scared"))
NewEmotion = EElevenLabsEmotion::Fear;
else if (Lower == TEXT("disgust") || Lower == TEXT("disgusted"))
NewEmotion = EElevenLabsEmotion::Disgust;
else if (Lower == TEXT("neutral"))
NewEmotion = EElevenLabsEmotion::Neutral;
else
UE_LOG(LogElevenLabsAgent, Warning, TEXT("Unknown emotion '%s', defaulting to Neutral."), **EmotionStr);
}
// Parse intensity (default: medium)
EElevenLabsEmotionIntensity NewIntensity = EElevenLabsEmotionIntensity::Medium;
const FString* IntensityStr = ToolCall.Parameters.Find(TEXT("intensity"));
if (IntensityStr)
{
const FString Lower = IntensityStr->ToLower();
if (Lower == TEXT("low") || Lower == TEXT("subtle") || Lower == TEXT("light"))
NewIntensity = EElevenLabsEmotionIntensity::Low;
else if (Lower == TEXT("medium") || Lower == TEXT("moderate") || Lower == TEXT("normal"))
NewIntensity = EElevenLabsEmotionIntensity::Medium;
else if (Lower == TEXT("high") || Lower == TEXT("strong") || Lower == TEXT("extreme") || Lower == TEXT("intense"))
NewIntensity = EElevenLabsEmotionIntensity::High;
else
UE_LOG(LogElevenLabsAgent, Warning, TEXT("Unknown intensity '%s', defaulting to Medium."), **IntensityStr);
}
CurrentEmotion = NewEmotion;
CurrentEmotionIntensity = NewIntensity;
const double T = FPlatformTime::Seconds() - SessionStartTime;
UE_LOG(LogElevenLabsAgent, Log, TEXT("[T+%.2fs] Agent emotion changed to: %s (%s)"),
T, *UEnum::GetValueAsString(NewEmotion), *UEnum::GetValueAsString(NewIntensity));
OnAgentEmotionChanged.Broadcast(NewEmotion, NewIntensity);
// Auto-respond to the tool call so the agent can continue.
if (WebSocketProxy)
{
WebSocketProxy->SendClientToolResult(
ToolCall.ToolCallId,
FString::Printf(TEXT("emotion set to %s (%s)"),
*UEnum::GetValueAsString(NewEmotion),
*UEnum::GetValueAsString(NewIntensity)),
false);
}
}
else
{
// Unknown tool — forward to Blueprint for custom handling.
OnAgentClientToolCall.Broadcast(ToolCall);
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Audio playback
// ─────────────────────────────────────────────────────────────────────────────

View File

@ -0,0 +1,239 @@
// Copyright ASTERION. All Rights Reserved.
#include "ElevenLabsFacialExpressionComponent.h"
#include "ElevenLabsConversationalAgentComponent.h"
#include "ElevenLabsLipSyncPoseMap.h"
#include "Animation/AnimSequence.h"
#include "Components/SkeletalMeshComponent.h"
DEFINE_LOG_CATEGORY_STATIC(LogElevenLabsFacialExpr, Log, All);
// ─────────────────────────────────────────────────────────────────────────────
// Construction
// ─────────────────────────────────────────────────────────────────────────────
UElevenLabsFacialExpressionComponent::UElevenLabsFacialExpressionComponent()
{
PrimaryComponentTick.bCanEverTick = true;
PrimaryComponentTick.TickGroup = TG_PrePhysics;
}
// ─────────────────────────────────────────────────────────────────────────────
// BeginPlay / EndPlay
// ─────────────────────────────────────────────────────────────────────────────
void UElevenLabsFacialExpressionComponent::BeginPlay()
{
Super::BeginPlay();
// Find the agent component on the same actor
AActor* Owner = GetOwner();
if (!Owner)
{
UE_LOG(LogElevenLabsFacialExpr, Warning, TEXT("No owner actor — facial expressions disabled."));
return;
}
auto* Agent = Owner->FindComponentByClass<UElevenLabsConversationalAgentComponent>();
if (Agent)
{
AgentComponent = Agent;
Agent->OnAgentEmotionChanged.AddDynamic(
this, &UElevenLabsFacialExpressionComponent::OnEmotionChanged);
UE_LOG(LogElevenLabsFacialExpr, Log,
TEXT("Facial expression bound to agent component on %s."), *Owner->GetName());
}
else
{
UE_LOG(LogElevenLabsFacialExpr, Warning,
TEXT("No ElevenLabsConversationalAgentComponent found on %s — "
"facial expression will not respond to emotion changes."),
*Owner->GetName());
}
// Extract emotion curves from PoseMap
InitializeEmotionPoses();
}
void UElevenLabsFacialExpressionComponent::EndPlay(const EEndPlayReason::Type EndPlayReason)
{
if (AgentComponent.IsValid())
{
AgentComponent->OnAgentEmotionChanged.RemoveDynamic(
this, &UElevenLabsFacialExpressionComponent::OnEmotionChanged);
}
Super::EndPlay(EndPlayReason);
}
// ─────────────────────────────────────────────────────────────────────────────
// Emotion pose initialization
// ─────────────────────────────────────────────────────────────────────────────
void UElevenLabsFacialExpressionComponent::InitializeEmotionPoses()
{
EmotionCurveMap.Reset();
if (!PoseMap || PoseMap->EmotionPoses.Num() == 0)
{
UE_LOG(LogElevenLabsFacialExpr, Log,
TEXT("No emotion poses assigned in PoseMap — facial expressions disabled."));
return;
}
int32 EmotionCount = 0;
for (const auto& EmotionPair : PoseMap->EmotionPoses)
{
const EElevenLabsEmotion Emotion = EmotionPair.Key;
const FElevenLabsEmotionPoseSet& PoseSet = EmotionPair.Value;
auto& IntensityMap = EmotionCurveMap.FindOrAdd(Emotion);
if (PoseSet.Normal)
{
IntensityMap.Add(EElevenLabsEmotionIntensity::Low, ExtractCurvesFromAnim(PoseSet.Normal));
++EmotionCount;
}
if (PoseSet.Medium)
{
IntensityMap.Add(EElevenLabsEmotionIntensity::Medium, ExtractCurvesFromAnim(PoseSet.Medium));
++EmotionCount;
}
if (PoseSet.Extreme)
{
IntensityMap.Add(EElevenLabsEmotionIntensity::High, ExtractCurvesFromAnim(PoseSet.Extreme));
++EmotionCount;
}
}
UE_LOG(LogElevenLabsFacialExpr, Log,
TEXT("=== Emotion poses: %d emotions, %d total anim slots loaded ==="),
PoseMap->EmotionPoses.Num(), EmotionCount);
}
TMap<FName, float> UElevenLabsFacialExpressionComponent::ExtractCurvesFromAnim(UAnimSequence* AnimSeq)
{
TMap<FName, float> CurveValues;
if (!AnimSeq) return CurveValues;
const IAnimationDataModel* DataModel = AnimSeq->GetDataModel();
if (!DataModel) return CurveValues;
const TArray<FFloatCurve>& FloatCurves = DataModel->GetFloatCurves();
for (const FFloatCurve& Curve : FloatCurves)
{
const FName CurveName = Curve.GetName();
const float Value = Curve.FloatCurve.Eval(0.0f);
if (FMath::Abs(Value) < 0.001f) continue;
CurveValues.Add(CurveName, Value);
}
UE_LOG(LogElevenLabsFacialExpr, Log,
TEXT("Emotion anim '%s': Extracted %d non-zero curves."),
*AnimSeq->GetName(), CurveValues.Num());
return CurveValues;
}
// ─────────────────────────────────────────────────────────────────────────────
// Emotion change handler
// ─────────────────────────────────────────────────────────────────────────────
void UElevenLabsFacialExpressionComponent::OnEmotionChanged(
EElevenLabsEmotion Emotion, EElevenLabsEmotionIntensity Intensity)
{
if (Emotion == ActiveEmotion && Intensity == ActiveEmotionIntensity)
return; // No change
ActiveEmotion = Emotion;
ActiveEmotionIntensity = Intensity;
// Look up target emotion curves
TargetEmotionCurves.Reset();
const auto* IntensityMap = EmotionCurveMap.Find(Emotion);
if (IntensityMap)
{
const auto* Curves = IntensityMap->Find(Intensity);
if (Curves)
{
TargetEmotionCurves = *Curves;
}
else
{
// Fallback: try Medium, then Low, then High
static const EElevenLabsEmotionIntensity Fallbacks[] = {
EElevenLabsEmotionIntensity::Medium,
EElevenLabsEmotionIntensity::Low,
EElevenLabsEmotionIntensity::High
};
for (EElevenLabsEmotionIntensity Fb : Fallbacks)
{
Curves = IntensityMap->Find(Fb);
if (Curves) { TargetEmotionCurves = *Curves; break; }
}
}
}
// Start blending from current to target
EmotionBlendAlpha = 0.0f;
UE_LOG(LogElevenLabsFacialExpr, Log,
TEXT("Emotion target set: %s (%s) — %d curves, blending over %.1fs..."),
*UEnum::GetValueAsString(Emotion), *UEnum::GetValueAsString(Intensity),
TargetEmotionCurves.Num(), EmotionBlendDuration);
}
// ─────────────────────────────────────────────────────────────────────────────
// Tick — smooth emotion blending
// ─────────────────────────────────────────────────────────────────────────────
void UElevenLabsFacialExpressionComponent::TickComponent(
float DeltaTime, ELevelTick TickType, FActorComponentTickFunction* ThisTickFunction)
{
Super::TickComponent(DeltaTime, TickType, ThisTickFunction);
if (EmotionCurveMap.Num() == 0)
return; // No emotion data loaded
// Advance blend alpha
if (EmotionBlendAlpha < 1.0f)
{
const float BlendSpeed = 1.0f / FMath::Max(0.05f, EmotionBlendDuration);
EmotionBlendAlpha = FMath::Min(1.0f, EmotionBlendAlpha + DeltaTime * BlendSpeed);
}
// Blend CurrentEmotionCurves toward TargetEmotionCurves
{
TSet<FName> AllCurves;
for (const auto& P : CurrentEmotionCurves) AllCurves.Add(P.Key);
for (const auto& P : TargetEmotionCurves) AllCurves.Add(P.Key);
for (const FName& CurveName : AllCurves)
{
const float Current = CurrentEmotionCurves.Contains(CurveName)
? CurrentEmotionCurves[CurveName] : 0.0f;
const float Target = TargetEmotionCurves.Contains(CurveName)
? TargetEmotionCurves[CurveName] : 0.0f;
const float Blended = FMath::Lerp(Current, Target, EmotionBlendAlpha);
if (FMath::Abs(Blended) > 0.001f)
CurrentEmotionCurves.FindOrAdd(CurveName) = Blended;
else
CurrentEmotionCurves.Remove(CurveName);
}
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Mouth curve classification
// ─────────────────────────────────────────────────────────────────────────────
bool UElevenLabsFacialExpressionComponent::IsMouthCurve(const FName& CurveName)
{
const FString Name = CurveName.ToString().ToLower();
return Name.Contains(TEXT("jaw"))
|| Name.Contains(TEXT("mouth"))
|| Name.Contains(TEXT("lips"))
|| Name.Contains(TEXT("tongue"))
|| Name.Contains(TEXT("cheekpuff"));
}

View File

@ -3,7 +3,7 @@
#include "ElevenLabsLipSyncComponent.h"
#include "ElevenLabsLipSyncPoseMap.h"
#include "ElevenLabsConversationalAgentComponent.h"
#include "ElevenLabsDefinitions.h"
#include "ElevenLabsFacialExpressionComponent.h"
#include "Components/SkeletalMeshComponent.h"
#include "Engine/SkeletalMesh.h"
#include "Animation/MorphTarget.h"
@ -539,6 +539,7 @@ void UElevenLabsLipSyncComponent::InitializePoseMappings()
UE_LOG(LogElevenLabsLipSync, Log,
TEXT("No phoneme pose AnimSequences assigned — using hardcoded ARKit mapping."));
}
}
void UElevenLabsLipSyncComponent::EndPlay(const EEndPlayReason::Type EndPlayReason)
@ -2274,6 +2275,44 @@ void UElevenLabsLipSyncComponent::MapVisemesToBlendshapes()
}
}
// ── Merge emotion base layer from FacialExpressionComponent ──────────
// Emotion provides the base expression (eyes, brows, cheeks).
// Lip sync overrides only mouth-area curves.
if (AActor* Owner = GetOwner())
{
if (auto* FaceExpr = Owner->FindComponentByClass<UElevenLabsFacialExpressionComponent>())
{
const TMap<FName, float>& EmotionCurves = FaceExpr->GetCurrentEmotionCurves();
if (EmotionCurves.Num() > 0)
{
// Collect which curves lip sync is actively driving (mouth area)
TSet<FName> LipSyncMouthCurves;
for (const auto& Pair : CurrentBlendshapes)
{
if (UElevenLabsFacialExpressionComponent::IsMouthCurve(Pair.Key) && Pair.Value > 0.01f)
LipSyncMouthCurves.Add(Pair.Key);
}
// Add non-mouth emotion curves (eyes, brows, cheeks, nose)
for (const auto& Pair : EmotionCurves)
{
if (!UElevenLabsFacialExpressionComponent::IsMouthCurve(Pair.Key))
{
// Emotion controls non-mouth curves exclusively
CurrentBlendshapes.FindOrAdd(Pair.Key) = Pair.Value;
}
else if (!LipSyncMouthCurves.Contains(Pair.Key))
{
// Mouth curves from emotion only if lip sync has nothing active there
// (e.g. during silence, the emotion's mouth pose shows through)
CurrentBlendshapes.FindOrAdd(Pair.Key) = Pair.Value;
}
// Otherwise: lip sync already has a value for this mouth curve — keep it
}
}
}
}
// Clamp all values. Use wider range for pose data (CTRL curves can exceed 1.0).
const float MaxClamp = bUsePoseMapping ? 2.0f : 1.0f;
for (auto& Pair : CurrentBlendshapes)

View File

@ -391,6 +391,10 @@ void UElevenLabsWebSocketProxy::OnWsMessage(const FString& Message)
// Silently ignore — corrected text after interruption.
UE_LOG(LogElevenLabsWS, Verbose, TEXT("agent_response_correction received (ignored)."));
}
else if (MsgType == ElevenLabsMessageType::ClientToolCall)
{
HandleClientToolCall(Root);
}
else if (MsgType == ElevenLabsMessageType::InterruptionEvent)
{
HandleInterruption(Root);
@ -658,6 +662,64 @@ void UElevenLabsWebSocketProxy::HandleInterruption(const TSharedPtr<FJsonObject>
OnInterrupted.Broadcast();
}
void UElevenLabsWebSocketProxy::HandleClientToolCall(const TSharedPtr<FJsonObject>& Root)
{
// Incoming: { "type": "client_tool_call", "client_tool_call": {
// "tool_name": "set_emotion", "tool_call_id": "abc123",
// "parameters": { "emotion": "surprise" } } }
const TSharedPtr<FJsonObject>* ToolCallObj = nullptr;
if (!Root->TryGetObjectField(TEXT("client_tool_call"), ToolCallObj) || !ToolCallObj)
{
UE_LOG(LogElevenLabsWS, Warning, TEXT("client_tool_call: missing client_tool_call object."));
return;
}
FElevenLabsClientToolCall ToolCall;
(*ToolCallObj)->TryGetStringField(TEXT("tool_name"), ToolCall.ToolName);
(*ToolCallObj)->TryGetStringField(TEXT("tool_call_id"), ToolCall.ToolCallId);
// Extract parameters as string key-value pairs
const TSharedPtr<FJsonObject>* ParamsObj = nullptr;
if ((*ToolCallObj)->TryGetObjectField(TEXT("parameters"), ParamsObj) && ParamsObj)
{
for (const auto& Pair : (*ParamsObj)->Values)
{
FString Value;
if (Pair.Value->TryGetString(Value))
{
ToolCall.Parameters.Add(Pair.Key, Value);
}
else
{
// For non-string values, serialize to string
ToolCall.Parameters.Add(Pair.Key, Pair.Value->AsString());
}
}
}
const double T = FPlatformTime::Seconds() - SessionStartTime;
UE_LOG(LogElevenLabsWS, Log, TEXT("[T+%.2fs] Client tool call: %s (id=%s, %d params)"),
T, *ToolCall.ToolName, *ToolCall.ToolCallId, ToolCall.Parameters.Num());
OnClientToolCall.Broadcast(ToolCall);
}
void UElevenLabsWebSocketProxy::SendClientToolResult(const FString& ToolCallId, const FString& Result, bool bIsError)
{
// Outgoing: { "type": "client_tool_result", "tool_call_id": "abc123",
// "result": "emotion set to surprise", "is_error": false }
TSharedPtr<FJsonObject> Msg = MakeShareable(new FJsonObject());
Msg->SetStringField(TEXT("type"), ElevenLabsMessageType::ClientToolResult);
Msg->SetStringField(TEXT("tool_call_id"), ToolCallId);
Msg->SetStringField(TEXT("result"), Result);
Msg->SetBoolField(TEXT("is_error"), bIsError);
SendJsonMessage(Msg);
const double T = FPlatformTime::Seconds() - SessionStartTime;
UE_LOG(LogElevenLabsWS, Log, TEXT("[T+%.2fs] Sent client_tool_result for %s: %s (error=%s)"),
T, *ToolCallId, *Result, bIsError ? TEXT("true") : TEXT("false"));
}
void UElevenLabsWebSocketProxy::HandlePing(const TSharedPtr<FJsonObject>& Root)
{
// Reply with a pong to keep the connection alive.

View File

@ -62,6 +62,23 @@ DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnAgentPartialResponse,
*/
DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentResponseTimeout);
/**
* Fired when the agent sets an emotion via the "set_emotion" client tool.
* Use this to drive facial expressions on your character (MetaHuman blendshapes, etc.).
* The emotion changes BEFORE the corresponding audio arrives, giving time to blend.
*/
DECLARE_DYNAMIC_MULTICAST_DELEGATE_TwoParams(FOnAgentEmotionChanged,
EElevenLabsEmotion, Emotion,
EElevenLabsEmotionIntensity, Intensity);
/**
* Fired for any client tool call that is NOT automatically handled (i.e. not "set_emotion").
* Use this to implement custom client tools in Blueprint.
* You MUST call SendClientToolResult on the WebSocketProxy to acknowledge the call.
*/
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnAgentClientToolCall,
const FElevenLabsClientToolCall&, ToolCall);
// Non-dynamic delegate for raw agent audio (high-frequency, C++ consumers only).
// Delivers PCM chunks as int16, 16kHz mono, little-endian.
DECLARE_MULTICAST_DELEGATE_OneParam(FOnAgentAudioData, const TArray<uint8>& /*PCMData*/);
@ -208,6 +225,24 @@ public:
meta = (ToolTip = "Fires if the server doesn't respond within ResponseTimeoutSeconds.\nUse to show 'try again' or re-open the mic automatically."))
FOnAgentResponseTimeout OnAgentResponseTimeout;
/** Fired when the agent changes emotion via the "set_emotion" client tool. The emotion is set BEFORE the corresponding audio arrives, giving you time to smoothly blend facial expressions. */
UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events",
meta = (ToolTip = "Fires when the agent sets an emotion (joy, sadness, surprise, fear, anger, disgust).\nDriven by the 'set_emotion' client tool. Arrives before the audio."))
FOnAgentEmotionChanged OnAgentEmotionChanged;
/** Fired for client tool calls that are NOT automatically handled (i.e. not "set_emotion"). You must call GetWebSocketProxy()->SendClientToolResult() to respond. */
UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events",
meta = (ToolTip = "Fires for custom client tool calls (not set_emotion).\nYou must respond via GetWebSocketProxy()->SendClientToolResult()."))
FOnAgentClientToolCall OnAgentClientToolCall;
/** The current emotion of the agent, as set by the "set_emotion" client tool. Defaults to Neutral. */
UPROPERTY(BlueprintReadOnly, Category = "ElevenLabs")
EElevenLabsEmotion CurrentEmotion = EElevenLabsEmotion::Neutral;
/** The current emotion intensity. Defaults to Medium. */
UPROPERTY(BlueprintReadOnly, Category = "ElevenLabs")
EElevenLabsEmotionIntensity CurrentEmotionIntensity = EElevenLabsEmotionIntensity::Medium;
// ── Raw audio data (C++ only, used by LipSync component) ────────────────
/** Raw PCM audio from the agent (int16, 16kHz mono). Fires for each WebSocket audio chunk.
* Used internally by UElevenLabsLipSyncComponent for spectral analysis. */
@ -312,6 +347,9 @@ private:
UFUNCTION()
void HandleAgentResponsePart(const FString& PartialText);
UFUNCTION()
void HandleClientToolCall(const FElevenLabsClientToolCall& ToolCall);
// ── Audio playback ────────────────────────────────────────────────────────
void InitAudioPlayback();
void EnqueueAgentAudio(const TArray<uint8>& PCMData);

View File

@ -108,3 +108,50 @@ struct PS_AI_AGENT_ELEVENLABS_API FElevenLabsTranscriptSegment
UPROPERTY(BlueprintReadOnly, Category = "ElevenLabs")
bool bIsFinal = false;
};
// ─────────────────────────────────────────────────────────────────────────────
// Agent emotion (driven by client tool "set_emotion" from the LLM)
// ─────────────────────────────────────────────────────────────────────────────
UENUM(BlueprintType)
enum class EElevenLabsEmotion : uint8
{
Neutral UMETA(DisplayName = "Neutral"),
Joy UMETA(DisplayName = "Joy"),
Sadness UMETA(DisplayName = "Sadness"),
Anger UMETA(DisplayName = "Anger"),
Surprise UMETA(DisplayName = "Surprise"),
Fear UMETA(DisplayName = "Fear"),
Disgust UMETA(DisplayName = "Disgust"),
};
// ─────────────────────────────────────────────────────────────────────────────
// Emotion intensity (maps to Normal/Medium/Extreme pose variants)
// ─────────────────────────────────────────────────────────────────────────────
UENUM(BlueprintType)
enum class EElevenLabsEmotionIntensity : uint8
{
Low UMETA(DisplayName = "Low (Normal)"),
Medium UMETA(DisplayName = "Medium"),
High UMETA(DisplayName = "High (Extreme)"),
};
// ─────────────────────────────────────────────────────────────────────────────
// Client tool call received from ElevenLabs server
// ─────────────────────────────────────────────────────────────────────────────
USTRUCT(BlueprintType)
struct PS_AI_AGENT_ELEVENLABS_API FElevenLabsClientToolCall
{
GENERATED_BODY()
/** Name of the tool the agent wants to invoke (e.g. "set_emotion"). */
UPROPERTY(BlueprintReadOnly, Category = "ElevenLabs")
FString ToolName;
/** Unique ID for this tool invocation — must be echoed back in client_tool_result. */
UPROPERTY(BlueprintReadOnly, Category = "ElevenLabs")
FString ToolCallId;
/** Raw JSON parameters as key-value string pairs. */
UPROPERTY(BlueprintReadOnly, Category = "ElevenLabs")
TMap<FString, FString> Parameters;
};

View File

@ -0,0 +1,116 @@
// Copyright ASTERION. All Rights Reserved.
#pragma once
#include "CoreMinimal.h"
#include "Components/ActorComponent.h"
#include "ElevenLabsDefinitions.h"
#include "ElevenLabsFacialExpressionComponent.generated.h"
class UElevenLabsConversationalAgentComponent;
class UElevenLabsLipSyncPoseMap;
class USkeletalMeshComponent;
// ─────────────────────────────────────────────────────────────────────────────
// UElevenLabsFacialExpressionComponent
//
// Drives emotion-based facial expressions on a MetaHuman (or any skeletal mesh)
// as a BASE layer. Lip sync (from ElevenLabsLipSyncComponent) modulates on top,
// overriding only mouth-area curves.
//
// Workflow:
// 1. Assign a PoseMap data asset with Emotion Poses filled in.
// 2. Assign the TargetMesh (same mesh as the LipSync component).
// 3. The component listens to OnAgentEmotionChanged from the agent component.
// 4. Emotion curves are smoothly blended (~500ms transitions).
// 5. The LipSync component reads GetCurrentEmotionCurves() to merge as base layer.
// ─────────────────────────────────────────────────────────────────────────────
UCLASS(ClassGroup = "ElevenLabs", meta = (BlueprintSpawnableComponent),
DisplayName = "ElevenLabs Facial Expression")
class PS_AI_AGENT_ELEVENLABS_API UElevenLabsFacialExpressionComponent : public UActorComponent
{
GENERATED_BODY()
public:
UElevenLabsFacialExpressionComponent();
// ── Configuration ─────────────────────────────────────────────────────────
/** Pose map asset containing emotion AnimSequences (Normal / Medium / Extreme per emotion).
* Can be the same PoseMap asset used by the LipSync component. */
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|FacialExpression",
meta = (ToolTip = "Pose map with Emotion Poses filled in.\nCan be the same asset as the LipSync component."))
TObjectPtr<UElevenLabsLipSyncPoseMap> PoseMap;
/** Skeletal mesh to apply emotion curves to.
* Should be the same mesh as the LipSync component's TargetMesh. */
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|FacialExpression",
meta = (ToolTip = "Skeletal mesh for emotion curves.\nShould match the LipSync component's TargetMesh."))
TObjectPtr<USkeletalMeshComponent> TargetMesh;
/** Emotion transition duration in seconds. */
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|FacialExpression",
meta = (ClampMin = "0.1", ClampMax = "3.0",
ToolTip = "How long (seconds) to blend between emotions.\n0.5 = snappy, 1.5 = smooth."))
float EmotionBlendDuration = 0.5f;
// ── Getters ───────────────────────────────────────────────────────────────
/** Get the current smoothed emotion curves (for the LipSync component to merge). */
UFUNCTION(BlueprintCallable, Category = "ElevenLabs|FacialExpression")
const TMap<FName, float>& GetCurrentEmotionCurves() const { return CurrentEmotionCurves; }
/** Get the active emotion. */
UFUNCTION(BlueprintPure, Category = "ElevenLabs|FacialExpression")
EElevenLabsEmotion GetActiveEmotion() const { return ActiveEmotion; }
/** Get the active emotion intensity. */
UFUNCTION(BlueprintPure, Category = "ElevenLabs|FacialExpression")
EElevenLabsEmotionIntensity GetActiveIntensity() const { return ActiveEmotionIntensity; }
/** Check if a curve name belongs to the mouth area (overridden by lip sync). */
UFUNCTION(BlueprintPure, Category = "ElevenLabs|FacialExpression")
static bool IsMouthCurve(const FName& CurveName);
// ── UActorComponent overrides ─────────────────────────────────────────────
virtual void BeginPlay() override;
virtual void EndPlay(const EEndPlayReason::Type EndPlayReason) override;
virtual void TickComponent(float DeltaTime, ELevelTick TickType,
FActorComponentTickFunction* ThisTickFunction) override;
private:
// ── Event handlers ────────────────────────────────────────────────────────
/** Called when the agent changes emotion via client tool. */
UFUNCTION()
void OnEmotionChanged(EElevenLabsEmotion Emotion, EElevenLabsEmotionIntensity Intensity);
// ── Curve extraction ──────────────────────────────────────────────────────
/** Extract curve values at t=0 from an AnimSequence. */
TMap<FName, float> ExtractCurvesFromAnim(UAnimSequence* AnimSeq);
/** Initialize emotion curve data from PoseMap at BeginPlay. */
void InitializeEmotionPoses();
// ── State ─────────────────────────────────────────────────────────────────
/** Extracted curve data: Emotion → Intensity → { CurveName → Value }. */
TMap<EElevenLabsEmotion, TMap<EElevenLabsEmotionIntensity, TMap<FName, float>>> EmotionCurveMap;
/** Current smoothed emotion curves (blended each tick). */
TMap<FName, float> CurrentEmotionCurves;
/** Target emotion curves (set when emotion changes, blended toward). */
TMap<FName, float> TargetEmotionCurves;
/** Current blend progress (0 = old emotion, 1 = target emotion). */
float EmotionBlendAlpha = 1.0f;
/** Active emotion (for change detection). */
EElevenLabsEmotion ActiveEmotion = EElevenLabsEmotion::Neutral;
EElevenLabsEmotionIntensity ActiveEmotionIntensity = EElevenLabsEmotionIntensity::Medium;
/** Cached reference to the agent component on the same Actor. */
TWeakObjectPtr<UElevenLabsConversationalAgentComponent> AgentComponent;
};

View File

@ -5,10 +5,35 @@
#include "CoreMinimal.h"
#include "Engine/DataAsset.h"
#include "Engine/AssetManager.h"
#include "ElevenLabsDefinitions.h"
#include "ElevenLabsLipSyncPoseMap.generated.h"
class UAnimSequence;
// ─────────────────────────────────────────────────────────────────────────────
// Emotion pose set: 3 intensity levels (Normal / Medium / Extreme)
// ─────────────────────────────────────────────────────────────────────────────
USTRUCT(BlueprintType)
struct PS_AI_AGENT_ELEVENLABS_API FElevenLabsEmotionPoseSet
{
GENERATED_BODY()
/** Low intensity expression (subtle). E.g. MHF_Happy_N */
UPROPERTY(EditAnywhere, BlueprintReadWrite,
meta = (ToolTip = "Low intensity (Normal). E.g. MHF_Happy_N"))
TObjectPtr<UAnimSequence> Normal;
/** Medium intensity expression. E.g. MHF_Happy_M */
UPROPERTY(EditAnywhere, BlueprintReadWrite,
meta = (ToolTip = "Medium intensity. E.g. MHF_Happy_M"))
TObjectPtr<UAnimSequence> Medium;
/** High intensity expression (extreme). E.g. MHF_Happy_E */
UPROPERTY(EditAnywhere, BlueprintReadWrite,
meta = (ToolTip = "High intensity (Extreme). E.g. MHF_Happy_E"))
TObjectPtr<UAnimSequence> Extreme;
};
/**
* Reusable data asset that maps OVR visemes to phoneme pose AnimSequences.
*
@ -103,4 +128,17 @@ public:
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Phoneme Poses",
meta = (ToolTip = "Close back vowel (OO). E.g. MHF_OU"))
TObjectPtr<UAnimSequence> PoseOU;
// ── Emotion Poses ────────────────────────────────────────────────────────
//
// Facial expression animations for each emotion, with 3 intensity levels.
// These are applied as a BASE layer (eyes, eyebrows, cheeks).
// Lip sync MODULATES on top, overriding only mouth-area curves.
/** Map of emotions to their pose sets (Normal / Medium / Extreme).
* Add entries for each emotion your agent uses (Joy, Sadness, Anger, Surprise, Fear, Disgust).
* Neutral is optional absence means no base expression. */
UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Emotion Poses",
meta = (ToolTip = "Emotion → AnimSequence mapping with 3 intensity levels.\nThese drive the base facial expression (eyes, brows, cheeks).\nLip sync overrides the mouth area on top."))
TMap<EElevenLabsEmotion, FElevenLabsEmotionPoseSet> EmotionPoses;
};

View File

@ -48,6 +48,10 @@ DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnElevenLabsAgentResponseStarted);
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnElevenLabsAgentResponsePart,
const FString&, PartialText);
/** Fired when the server sends a client_tool_call — the agent wants the client to execute a tool. */
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnElevenLabsClientToolCall,
const FElevenLabsClientToolCall&, ToolCall);
// ─────────────────────────────────────────────────────────────────────────────
// WebSocket Proxy
@ -103,6 +107,10 @@ public:
UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events")
FOnElevenLabsAgentResponsePart OnAgentResponsePart;
/** Fired when the agent invokes a client tool. Handle the call and reply with SendClientToolResult. */
UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events")
FOnElevenLabsClientToolCall OnClientToolCall;
// ── Lifecycle ─────────────────────────────────────────────────────────────
/**
@ -172,6 +180,17 @@ public:
UFUNCTION(BlueprintCallable, Category = "ElevenLabs")
void SendInterrupt();
/**
* Send the result of a client tool call back to ElevenLabs.
* Must be called after receiving a OnClientToolCall event.
*
* @param ToolCallId The tool_call_id from the original client_tool_call.
* @param Result A string result to return to the agent.
* @param bIsError True if the tool execution failed.
*/
UFUNCTION(BlueprintCallable, Category = "ElevenLabs")
void SendClientToolResult(const FString& ToolCallId, const FString& Result, bool bIsError = false);
// ── Info ──────────────────────────────────────────────────────────────────
UFUNCTION(BlueprintPure, Category = "ElevenLabs")
@ -193,6 +212,7 @@ private:
void HandleAgentResponse(const TSharedPtr<FJsonObject>& Payload);
void HandleAgentChatResponsePart(const TSharedPtr<FJsonObject>& Payload);
void HandleInterruption(const TSharedPtr<FJsonObject>& Payload);
void HandleClientToolCall(const TSharedPtr<FJsonObject>& Payload);
void HandlePing(const TSharedPtr<FJsonObject>& Payload);
/** Build and send a JSON text frame to the server. */