diff --git a/Unreal/PS_AI_Agent/Content/Demo_VoiceOnly.umap b/Unreal/PS_AI_Agent/Content/Demo_VoiceOnly.umap index b00a002..a7094bb 100644 Binary files a/Unreal/PS_AI_Agent/Content/Demo_VoiceOnly.umap and b/Unreal/PS_AI_Agent/Content/Demo_VoiceOnly.umap differ diff --git a/Unreal/PS_AI_Agent/Content/MetaHumans/Taro/BP_Taro.uasset b/Unreal/PS_AI_Agent/Content/MetaHumans/Taro/BP_Taro.uasset index f17f98a..bf43278 100644 Binary files a/Unreal/PS_AI_Agent/Content/MetaHumans/Taro/BP_Taro.uasset and b/Unreal/PS_AI_Agent/Content/MetaHumans/Taro/BP_Taro.uasset differ diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsConversationalAgentComponent.cpp b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsConversationalAgentComponent.cpp index 1ef314a..0adf3cf 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsConversationalAgentComponent.cpp +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsConversationalAgentComponent.cpp @@ -179,6 +179,8 @@ void UElevenLabsConversationalAgentComponent::StartConversation() &UElevenLabsConversationalAgentComponent::HandleAgentResponseStarted); WebSocketProxy->OnAgentResponsePart.AddDynamic(this, &UElevenLabsConversationalAgentComponent::HandleAgentResponsePart); + WebSocketProxy->OnClientToolCall.AddDynamic(this, + &UElevenLabsConversationalAgentComponent::HandleClientToolCall); } // Pass configuration to the proxy before connecting. @@ -429,6 +431,8 @@ void UElevenLabsConversationalAgentComponent::HandleDisconnected(int32 StatusCod GeneratingTickCount = 0; TurnIndex = 0; LastClosedTurnIndex = 0; + CurrentEmotion = EElevenLabsEmotion::Neutral; + CurrentEmotionIntensity = EElevenLabsEmotionIntensity::Medium; { FScopeLock Lock(&MicSendLock); MicAccumulationBuffer.Reset(); @@ -540,6 +544,77 @@ void UElevenLabsConversationalAgentComponent::HandleAgentResponsePart(const FStr } } +void UElevenLabsConversationalAgentComponent::HandleClientToolCall(const FElevenLabsClientToolCall& ToolCall) +{ + // Built-in handler for the "set_emotion" tool: parse emotion + intensity, auto-respond, broadcast. + if (ToolCall.ToolName == TEXT("set_emotion")) + { + // Parse emotion + EElevenLabsEmotion NewEmotion = EElevenLabsEmotion::Neutral; + const FString* EmotionStr = ToolCall.Parameters.Find(TEXT("emotion")); + if (EmotionStr) + { + const FString Lower = EmotionStr->ToLower(); + if (Lower == TEXT("joy") || Lower == TEXT("happy") || Lower == TEXT("happiness")) + NewEmotion = EElevenLabsEmotion::Joy; + else if (Lower == TEXT("sadness") || Lower == TEXT("sad")) + NewEmotion = EElevenLabsEmotion::Sadness; + else if (Lower == TEXT("anger") || Lower == TEXT("angry")) + NewEmotion = EElevenLabsEmotion::Anger; + else if (Lower == TEXT("surprise") || Lower == TEXT("surprised")) + NewEmotion = EElevenLabsEmotion::Surprise; + else if (Lower == TEXT("fear") || Lower == TEXT("afraid") || Lower == TEXT("scared")) + NewEmotion = EElevenLabsEmotion::Fear; + else if (Lower == TEXT("disgust") || Lower == TEXT("disgusted")) + NewEmotion = EElevenLabsEmotion::Disgust; + else if (Lower == TEXT("neutral")) + NewEmotion = EElevenLabsEmotion::Neutral; + else + UE_LOG(LogElevenLabsAgent, Warning, TEXT("Unknown emotion '%s', defaulting to Neutral."), **EmotionStr); + } + + // Parse intensity (default: medium) + EElevenLabsEmotionIntensity NewIntensity = EElevenLabsEmotionIntensity::Medium; + const FString* IntensityStr = ToolCall.Parameters.Find(TEXT("intensity")); + if (IntensityStr) + { + const FString Lower = IntensityStr->ToLower(); + if (Lower == TEXT("low") || Lower == TEXT("subtle") || Lower == TEXT("light")) + NewIntensity = EElevenLabsEmotionIntensity::Low; + else if (Lower == TEXT("medium") || Lower == TEXT("moderate") || Lower == TEXT("normal")) + NewIntensity = EElevenLabsEmotionIntensity::Medium; + else if (Lower == TEXT("high") || Lower == TEXT("strong") || Lower == TEXT("extreme") || Lower == TEXT("intense")) + NewIntensity = EElevenLabsEmotionIntensity::High; + else + UE_LOG(LogElevenLabsAgent, Warning, TEXT("Unknown intensity '%s', defaulting to Medium."), **IntensityStr); + } + + CurrentEmotion = NewEmotion; + CurrentEmotionIntensity = NewIntensity; + const double T = FPlatformTime::Seconds() - SessionStartTime; + UE_LOG(LogElevenLabsAgent, Log, TEXT("[T+%.2fs] Agent emotion changed to: %s (%s)"), + T, *UEnum::GetValueAsString(NewEmotion), *UEnum::GetValueAsString(NewIntensity)); + + OnAgentEmotionChanged.Broadcast(NewEmotion, NewIntensity); + + // Auto-respond to the tool call so the agent can continue. + if (WebSocketProxy) + { + WebSocketProxy->SendClientToolResult( + ToolCall.ToolCallId, + FString::Printf(TEXT("emotion set to %s (%s)"), + *UEnum::GetValueAsString(NewEmotion), + *UEnum::GetValueAsString(NewIntensity)), + false); + } + } + else + { + // Unknown tool — forward to Blueprint for custom handling. + OnAgentClientToolCall.Broadcast(ToolCall); + } +} + // ───────────────────────────────────────────────────────────────────────────── // Audio playback // ───────────────────────────────────────────────────────────────────────────── diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsFacialExpressionComponent.cpp b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsFacialExpressionComponent.cpp new file mode 100644 index 0000000..1213771 --- /dev/null +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsFacialExpressionComponent.cpp @@ -0,0 +1,239 @@ +// Copyright ASTERION. All Rights Reserved. + +#include "ElevenLabsFacialExpressionComponent.h" +#include "ElevenLabsConversationalAgentComponent.h" +#include "ElevenLabsLipSyncPoseMap.h" +#include "Animation/AnimSequence.h" +#include "Components/SkeletalMeshComponent.h" + +DEFINE_LOG_CATEGORY_STATIC(LogElevenLabsFacialExpr, Log, All); + +// ───────────────────────────────────────────────────────────────────────────── +// Construction +// ───────────────────────────────────────────────────────────────────────────── + +UElevenLabsFacialExpressionComponent::UElevenLabsFacialExpressionComponent() +{ + PrimaryComponentTick.bCanEverTick = true; + PrimaryComponentTick.TickGroup = TG_PrePhysics; +} + +// ───────────────────────────────────────────────────────────────────────────── +// BeginPlay / EndPlay +// ───────────────────────────────────────────────────────────────────────────── + +void UElevenLabsFacialExpressionComponent::BeginPlay() +{ + Super::BeginPlay(); + + // Find the agent component on the same actor + AActor* Owner = GetOwner(); + if (!Owner) + { + UE_LOG(LogElevenLabsFacialExpr, Warning, TEXT("No owner actor — facial expressions disabled.")); + return; + } + + auto* Agent = Owner->FindComponentByClass(); + if (Agent) + { + AgentComponent = Agent; + Agent->OnAgentEmotionChanged.AddDynamic( + this, &UElevenLabsFacialExpressionComponent::OnEmotionChanged); + + UE_LOG(LogElevenLabsFacialExpr, Log, + TEXT("Facial expression bound to agent component on %s."), *Owner->GetName()); + } + else + { + UE_LOG(LogElevenLabsFacialExpr, Warning, + TEXT("No ElevenLabsConversationalAgentComponent found on %s — " + "facial expression will not respond to emotion changes."), + *Owner->GetName()); + } + + // Extract emotion curves from PoseMap + InitializeEmotionPoses(); +} + +void UElevenLabsFacialExpressionComponent::EndPlay(const EEndPlayReason::Type EndPlayReason) +{ + if (AgentComponent.IsValid()) + { + AgentComponent->OnAgentEmotionChanged.RemoveDynamic( + this, &UElevenLabsFacialExpressionComponent::OnEmotionChanged); + } + + Super::EndPlay(EndPlayReason); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Emotion pose initialization +// ───────────────────────────────────────────────────────────────────────────── + +void UElevenLabsFacialExpressionComponent::InitializeEmotionPoses() +{ + EmotionCurveMap.Reset(); + + if (!PoseMap || PoseMap->EmotionPoses.Num() == 0) + { + UE_LOG(LogElevenLabsFacialExpr, Log, + TEXT("No emotion poses assigned in PoseMap — facial expressions disabled.")); + return; + } + + int32 EmotionCount = 0; + for (const auto& EmotionPair : PoseMap->EmotionPoses) + { + const EElevenLabsEmotion Emotion = EmotionPair.Key; + const FElevenLabsEmotionPoseSet& PoseSet = EmotionPair.Value; + + auto& IntensityMap = EmotionCurveMap.FindOrAdd(Emotion); + + if (PoseSet.Normal) + { + IntensityMap.Add(EElevenLabsEmotionIntensity::Low, ExtractCurvesFromAnim(PoseSet.Normal)); + ++EmotionCount; + } + if (PoseSet.Medium) + { + IntensityMap.Add(EElevenLabsEmotionIntensity::Medium, ExtractCurvesFromAnim(PoseSet.Medium)); + ++EmotionCount; + } + if (PoseSet.Extreme) + { + IntensityMap.Add(EElevenLabsEmotionIntensity::High, ExtractCurvesFromAnim(PoseSet.Extreme)); + ++EmotionCount; + } + } + + UE_LOG(LogElevenLabsFacialExpr, Log, + TEXT("=== Emotion poses: %d emotions, %d total anim slots loaded ==="), + PoseMap->EmotionPoses.Num(), EmotionCount); +} + +TMap UElevenLabsFacialExpressionComponent::ExtractCurvesFromAnim(UAnimSequence* AnimSeq) +{ + TMap CurveValues; + if (!AnimSeq) return CurveValues; + + const IAnimationDataModel* DataModel = AnimSeq->GetDataModel(); + if (!DataModel) return CurveValues; + + const TArray& FloatCurves = DataModel->GetFloatCurves(); + for (const FFloatCurve& Curve : FloatCurves) + { + const FName CurveName = Curve.GetName(); + const float Value = Curve.FloatCurve.Eval(0.0f); + if (FMath::Abs(Value) < 0.001f) continue; + CurveValues.Add(CurveName, Value); + } + + UE_LOG(LogElevenLabsFacialExpr, Log, + TEXT("Emotion anim '%s': Extracted %d non-zero curves."), + *AnimSeq->GetName(), CurveValues.Num()); + return CurveValues; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Emotion change handler +// ───────────────────────────────────────────────────────────────────────────── + +void UElevenLabsFacialExpressionComponent::OnEmotionChanged( + EElevenLabsEmotion Emotion, EElevenLabsEmotionIntensity Intensity) +{ + if (Emotion == ActiveEmotion && Intensity == ActiveEmotionIntensity) + return; // No change + + ActiveEmotion = Emotion; + ActiveEmotionIntensity = Intensity; + + // Look up target emotion curves + TargetEmotionCurves.Reset(); + const auto* IntensityMap = EmotionCurveMap.Find(Emotion); + if (IntensityMap) + { + const auto* Curves = IntensityMap->Find(Intensity); + if (Curves) + { + TargetEmotionCurves = *Curves; + } + else + { + // Fallback: try Medium, then Low, then High + static const EElevenLabsEmotionIntensity Fallbacks[] = { + EElevenLabsEmotionIntensity::Medium, + EElevenLabsEmotionIntensity::Low, + EElevenLabsEmotionIntensity::High + }; + for (EElevenLabsEmotionIntensity Fb : Fallbacks) + { + Curves = IntensityMap->Find(Fb); + if (Curves) { TargetEmotionCurves = *Curves; break; } + } + } + } + + // Start blending from current to target + EmotionBlendAlpha = 0.0f; + + UE_LOG(LogElevenLabsFacialExpr, Log, + TEXT("Emotion target set: %s (%s) — %d curves, blending over %.1fs..."), + *UEnum::GetValueAsString(Emotion), *UEnum::GetValueAsString(Intensity), + TargetEmotionCurves.Num(), EmotionBlendDuration); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Tick — smooth emotion blending +// ───────────────────────────────────────────────────────────────────────────── + +void UElevenLabsFacialExpressionComponent::TickComponent( + float DeltaTime, ELevelTick TickType, FActorComponentTickFunction* ThisTickFunction) +{ + Super::TickComponent(DeltaTime, TickType, ThisTickFunction); + + if (EmotionCurveMap.Num() == 0) + return; // No emotion data loaded + + // Advance blend alpha + if (EmotionBlendAlpha < 1.0f) + { + const float BlendSpeed = 1.0f / FMath::Max(0.05f, EmotionBlendDuration); + EmotionBlendAlpha = FMath::Min(1.0f, EmotionBlendAlpha + DeltaTime * BlendSpeed); + } + + // Blend CurrentEmotionCurves toward TargetEmotionCurves + { + TSet AllCurves; + for (const auto& P : CurrentEmotionCurves) AllCurves.Add(P.Key); + for (const auto& P : TargetEmotionCurves) AllCurves.Add(P.Key); + + for (const FName& CurveName : AllCurves) + { + const float Current = CurrentEmotionCurves.Contains(CurveName) + ? CurrentEmotionCurves[CurveName] : 0.0f; + const float Target = TargetEmotionCurves.Contains(CurveName) + ? TargetEmotionCurves[CurveName] : 0.0f; + const float Blended = FMath::Lerp(Current, Target, EmotionBlendAlpha); + + if (FMath::Abs(Blended) > 0.001f) + CurrentEmotionCurves.FindOrAdd(CurveName) = Blended; + else + CurrentEmotionCurves.Remove(CurveName); + } + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Mouth curve classification +// ───────────────────────────────────────────────────────────────────────────── + +bool UElevenLabsFacialExpressionComponent::IsMouthCurve(const FName& CurveName) +{ + const FString Name = CurveName.ToString().ToLower(); + return Name.Contains(TEXT("jaw")) + || Name.Contains(TEXT("mouth")) + || Name.Contains(TEXT("lips")) + || Name.Contains(TEXT("tongue")) + || Name.Contains(TEXT("cheekpuff")); +} diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsLipSyncComponent.cpp b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsLipSyncComponent.cpp index a50f33d..b2cccf1 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsLipSyncComponent.cpp +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsLipSyncComponent.cpp @@ -3,7 +3,7 @@ #include "ElevenLabsLipSyncComponent.h" #include "ElevenLabsLipSyncPoseMap.h" #include "ElevenLabsConversationalAgentComponent.h" -#include "ElevenLabsDefinitions.h" +#include "ElevenLabsFacialExpressionComponent.h" #include "Components/SkeletalMeshComponent.h" #include "Engine/SkeletalMesh.h" #include "Animation/MorphTarget.h" @@ -539,6 +539,7 @@ void UElevenLabsLipSyncComponent::InitializePoseMappings() UE_LOG(LogElevenLabsLipSync, Log, TEXT("No phoneme pose AnimSequences assigned — using hardcoded ARKit mapping.")); } + } void UElevenLabsLipSyncComponent::EndPlay(const EEndPlayReason::Type EndPlayReason) @@ -2274,6 +2275,44 @@ void UElevenLabsLipSyncComponent::MapVisemesToBlendshapes() } } + // ── Merge emotion base layer from FacialExpressionComponent ────────── + // Emotion provides the base expression (eyes, brows, cheeks). + // Lip sync overrides only mouth-area curves. + if (AActor* Owner = GetOwner()) + { + if (auto* FaceExpr = Owner->FindComponentByClass()) + { + const TMap& EmotionCurves = FaceExpr->GetCurrentEmotionCurves(); + if (EmotionCurves.Num() > 0) + { + // Collect which curves lip sync is actively driving (mouth area) + TSet LipSyncMouthCurves; + for (const auto& Pair : CurrentBlendshapes) + { + if (UElevenLabsFacialExpressionComponent::IsMouthCurve(Pair.Key) && Pair.Value > 0.01f) + LipSyncMouthCurves.Add(Pair.Key); + } + + // Add non-mouth emotion curves (eyes, brows, cheeks, nose) + for (const auto& Pair : EmotionCurves) + { + if (!UElevenLabsFacialExpressionComponent::IsMouthCurve(Pair.Key)) + { + // Emotion controls non-mouth curves exclusively + CurrentBlendshapes.FindOrAdd(Pair.Key) = Pair.Value; + } + else if (!LipSyncMouthCurves.Contains(Pair.Key)) + { + // Mouth curves from emotion only if lip sync has nothing active there + // (e.g. during silence, the emotion's mouth pose shows through) + CurrentBlendshapes.FindOrAdd(Pair.Key) = Pair.Value; + } + // Otherwise: lip sync already has a value for this mouth curve — keep it + } + } + } + } + // Clamp all values. Use wider range for pose data (CTRL curves can exceed 1.0). const float MaxClamp = bUsePoseMapping ? 2.0f : 1.0f; for (auto& Pair : CurrentBlendshapes) diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsWebSocketProxy.cpp b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsWebSocketProxy.cpp index e0d32f0..cc8913f 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsWebSocketProxy.cpp +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsWebSocketProxy.cpp @@ -391,6 +391,10 @@ void UElevenLabsWebSocketProxy::OnWsMessage(const FString& Message) // Silently ignore — corrected text after interruption. UE_LOG(LogElevenLabsWS, Verbose, TEXT("agent_response_correction received (ignored).")); } + else if (MsgType == ElevenLabsMessageType::ClientToolCall) + { + HandleClientToolCall(Root); + } else if (MsgType == ElevenLabsMessageType::InterruptionEvent) { HandleInterruption(Root); @@ -658,6 +662,64 @@ void UElevenLabsWebSocketProxy::HandleInterruption(const TSharedPtr OnInterrupted.Broadcast(); } +void UElevenLabsWebSocketProxy::HandleClientToolCall(const TSharedPtr& Root) +{ + // Incoming: { "type": "client_tool_call", "client_tool_call": { + // "tool_name": "set_emotion", "tool_call_id": "abc123", + // "parameters": { "emotion": "surprise" } } } + const TSharedPtr* ToolCallObj = nullptr; + if (!Root->TryGetObjectField(TEXT("client_tool_call"), ToolCallObj) || !ToolCallObj) + { + UE_LOG(LogElevenLabsWS, Warning, TEXT("client_tool_call: missing client_tool_call object.")); + return; + } + + FElevenLabsClientToolCall ToolCall; + (*ToolCallObj)->TryGetStringField(TEXT("tool_name"), ToolCall.ToolName); + (*ToolCallObj)->TryGetStringField(TEXT("tool_call_id"), ToolCall.ToolCallId); + + // Extract parameters as string key-value pairs + const TSharedPtr* ParamsObj = nullptr; + if ((*ToolCallObj)->TryGetObjectField(TEXT("parameters"), ParamsObj) && ParamsObj) + { + for (const auto& Pair : (*ParamsObj)->Values) + { + FString Value; + if (Pair.Value->TryGetString(Value)) + { + ToolCall.Parameters.Add(Pair.Key, Value); + } + else + { + // For non-string values, serialize to string + ToolCall.Parameters.Add(Pair.Key, Pair.Value->AsString()); + } + } + } + + const double T = FPlatformTime::Seconds() - SessionStartTime; + UE_LOG(LogElevenLabsWS, Log, TEXT("[T+%.2fs] Client tool call: %s (id=%s, %d params)"), + T, *ToolCall.ToolName, *ToolCall.ToolCallId, ToolCall.Parameters.Num()); + + OnClientToolCall.Broadcast(ToolCall); +} + +void UElevenLabsWebSocketProxy::SendClientToolResult(const FString& ToolCallId, const FString& Result, bool bIsError) +{ + // Outgoing: { "type": "client_tool_result", "tool_call_id": "abc123", + // "result": "emotion set to surprise", "is_error": false } + TSharedPtr Msg = MakeShareable(new FJsonObject()); + Msg->SetStringField(TEXT("type"), ElevenLabsMessageType::ClientToolResult); + Msg->SetStringField(TEXT("tool_call_id"), ToolCallId); + Msg->SetStringField(TEXT("result"), Result); + Msg->SetBoolField(TEXT("is_error"), bIsError); + SendJsonMessage(Msg); + + const double T = FPlatformTime::Seconds() - SessionStartTime; + UE_LOG(LogElevenLabsWS, Log, TEXT("[T+%.2fs] Sent client_tool_result for %s: %s (error=%s)"), + T, *ToolCallId, *Result, bIsError ? TEXT("true") : TEXT("false")); +} + void UElevenLabsWebSocketProxy::HandlePing(const TSharedPtr& Root) { // Reply with a pong to keep the connection alive. diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsConversationalAgentComponent.h b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsConversationalAgentComponent.h index a1020ca..a43fe2e 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsConversationalAgentComponent.h +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsConversationalAgentComponent.h @@ -62,6 +62,23 @@ DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnAgentPartialResponse, */ DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnAgentResponseTimeout); +/** + * Fired when the agent sets an emotion via the "set_emotion" client tool. + * Use this to drive facial expressions on your character (MetaHuman blendshapes, etc.). + * The emotion changes BEFORE the corresponding audio arrives, giving time to blend. + */ +DECLARE_DYNAMIC_MULTICAST_DELEGATE_TwoParams(FOnAgentEmotionChanged, + EElevenLabsEmotion, Emotion, + EElevenLabsEmotionIntensity, Intensity); + +/** + * Fired for any client tool call that is NOT automatically handled (i.e. not "set_emotion"). + * Use this to implement custom client tools in Blueprint. + * You MUST call SendClientToolResult on the WebSocketProxy to acknowledge the call. + */ +DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnAgentClientToolCall, + const FElevenLabsClientToolCall&, ToolCall); + // Non-dynamic delegate for raw agent audio (high-frequency, C++ consumers only). // Delivers PCM chunks as int16, 16kHz mono, little-endian. DECLARE_MULTICAST_DELEGATE_OneParam(FOnAgentAudioData, const TArray& /*PCMData*/); @@ -208,6 +225,24 @@ public: meta = (ToolTip = "Fires if the server doesn't respond within ResponseTimeoutSeconds.\nUse to show 'try again' or re-open the mic automatically.")) FOnAgentResponseTimeout OnAgentResponseTimeout; + /** Fired when the agent changes emotion via the "set_emotion" client tool. The emotion is set BEFORE the corresponding audio arrives, giving you time to smoothly blend facial expressions. */ + UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events", + meta = (ToolTip = "Fires when the agent sets an emotion (joy, sadness, surprise, fear, anger, disgust).\nDriven by the 'set_emotion' client tool. Arrives before the audio.")) + FOnAgentEmotionChanged OnAgentEmotionChanged; + + /** Fired for client tool calls that are NOT automatically handled (i.e. not "set_emotion"). You must call GetWebSocketProxy()->SendClientToolResult() to respond. */ + UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events", + meta = (ToolTip = "Fires for custom client tool calls (not set_emotion).\nYou must respond via GetWebSocketProxy()->SendClientToolResult().")) + FOnAgentClientToolCall OnAgentClientToolCall; + + /** The current emotion of the agent, as set by the "set_emotion" client tool. Defaults to Neutral. */ + UPROPERTY(BlueprintReadOnly, Category = "ElevenLabs") + EElevenLabsEmotion CurrentEmotion = EElevenLabsEmotion::Neutral; + + /** The current emotion intensity. Defaults to Medium. */ + UPROPERTY(BlueprintReadOnly, Category = "ElevenLabs") + EElevenLabsEmotionIntensity CurrentEmotionIntensity = EElevenLabsEmotionIntensity::Medium; + // ── Raw audio data (C++ only, used by LipSync component) ──────────────── /** Raw PCM audio from the agent (int16, 16kHz mono). Fires for each WebSocket audio chunk. * Used internally by UElevenLabsLipSyncComponent for spectral analysis. */ @@ -312,6 +347,9 @@ private: UFUNCTION() void HandleAgentResponsePart(const FString& PartialText); + UFUNCTION() + void HandleClientToolCall(const FElevenLabsClientToolCall& ToolCall); + // ── Audio playback ──────────────────────────────────────────────────────── void InitAudioPlayback(); void EnqueueAgentAudio(const TArray& PCMData); diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsDefinitions.h b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsDefinitions.h index 09d7ba1..6cc796b 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsDefinitions.h +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsDefinitions.h @@ -108,3 +108,50 @@ struct PS_AI_AGENT_ELEVENLABS_API FElevenLabsTranscriptSegment UPROPERTY(BlueprintReadOnly, Category = "ElevenLabs") bool bIsFinal = false; }; + +// ───────────────────────────────────────────────────────────────────────────── +// Agent emotion (driven by client tool "set_emotion" from the LLM) +// ───────────────────────────────────────────────────────────────────────────── +UENUM(BlueprintType) +enum class EElevenLabsEmotion : uint8 +{ + Neutral UMETA(DisplayName = "Neutral"), + Joy UMETA(DisplayName = "Joy"), + Sadness UMETA(DisplayName = "Sadness"), + Anger UMETA(DisplayName = "Anger"), + Surprise UMETA(DisplayName = "Surprise"), + Fear UMETA(DisplayName = "Fear"), + Disgust UMETA(DisplayName = "Disgust"), +}; + +// ───────────────────────────────────────────────────────────────────────────── +// Emotion intensity (maps to Normal/Medium/Extreme pose variants) +// ───────────────────────────────────────────────────────────────────────────── +UENUM(BlueprintType) +enum class EElevenLabsEmotionIntensity : uint8 +{ + Low UMETA(DisplayName = "Low (Normal)"), + Medium UMETA(DisplayName = "Medium"), + High UMETA(DisplayName = "High (Extreme)"), +}; + +// ───────────────────────────────────────────────────────────────────────────── +// Client tool call received from ElevenLabs server +// ───────────────────────────────────────────────────────────────────────────── +USTRUCT(BlueprintType) +struct PS_AI_AGENT_ELEVENLABS_API FElevenLabsClientToolCall +{ + GENERATED_BODY() + + /** Name of the tool the agent wants to invoke (e.g. "set_emotion"). */ + UPROPERTY(BlueprintReadOnly, Category = "ElevenLabs") + FString ToolName; + + /** Unique ID for this tool invocation — must be echoed back in client_tool_result. */ + UPROPERTY(BlueprintReadOnly, Category = "ElevenLabs") + FString ToolCallId; + + /** Raw JSON parameters as key-value string pairs. */ + UPROPERTY(BlueprintReadOnly, Category = "ElevenLabs") + TMap Parameters; +}; diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsFacialExpressionComponent.h b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsFacialExpressionComponent.h new file mode 100644 index 0000000..f01fa12 --- /dev/null +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsFacialExpressionComponent.h @@ -0,0 +1,116 @@ +// Copyright ASTERION. All Rights Reserved. + +#pragma once + +#include "CoreMinimal.h" +#include "Components/ActorComponent.h" +#include "ElevenLabsDefinitions.h" +#include "ElevenLabsFacialExpressionComponent.generated.h" + +class UElevenLabsConversationalAgentComponent; +class UElevenLabsLipSyncPoseMap; +class USkeletalMeshComponent; + +// ───────────────────────────────────────────────────────────────────────────── +// UElevenLabsFacialExpressionComponent +// +// Drives emotion-based facial expressions on a MetaHuman (or any skeletal mesh) +// as a BASE layer. Lip sync (from ElevenLabsLipSyncComponent) modulates on top, +// overriding only mouth-area curves. +// +// Workflow: +// 1. Assign a PoseMap data asset with Emotion Poses filled in. +// 2. Assign the TargetMesh (same mesh as the LipSync component). +// 3. The component listens to OnAgentEmotionChanged from the agent component. +// 4. Emotion curves are smoothly blended (~500ms transitions). +// 5. The LipSync component reads GetCurrentEmotionCurves() to merge as base layer. +// ───────────────────────────────────────────────────────────────────────────── +UCLASS(ClassGroup = "ElevenLabs", meta = (BlueprintSpawnableComponent), + DisplayName = "ElevenLabs Facial Expression") +class PS_AI_AGENT_ELEVENLABS_API UElevenLabsFacialExpressionComponent : public UActorComponent +{ + GENERATED_BODY() + +public: + UElevenLabsFacialExpressionComponent(); + + // ── Configuration ───────────────────────────────────────────────────────── + + /** Pose map asset containing emotion AnimSequences (Normal / Medium / Extreme per emotion). + * Can be the same PoseMap asset used by the LipSync component. */ + UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|FacialExpression", + meta = (ToolTip = "Pose map with Emotion Poses filled in.\nCan be the same asset as the LipSync component.")) + TObjectPtr PoseMap; + + /** Skeletal mesh to apply emotion curves to. + * Should be the same mesh as the LipSync component's TargetMesh. */ + UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|FacialExpression", + meta = (ToolTip = "Skeletal mesh for emotion curves.\nShould match the LipSync component's TargetMesh.")) + TObjectPtr TargetMesh; + + /** Emotion transition duration in seconds. */ + UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|FacialExpression", + meta = (ClampMin = "0.1", ClampMax = "3.0", + ToolTip = "How long (seconds) to blend between emotions.\n0.5 = snappy, 1.5 = smooth.")) + float EmotionBlendDuration = 0.5f; + + // ── Getters ─────────────────────────────────────────────────────────────── + + /** Get the current smoothed emotion curves (for the LipSync component to merge). */ + UFUNCTION(BlueprintCallable, Category = "ElevenLabs|FacialExpression") + const TMap& GetCurrentEmotionCurves() const { return CurrentEmotionCurves; } + + /** Get the active emotion. */ + UFUNCTION(BlueprintPure, Category = "ElevenLabs|FacialExpression") + EElevenLabsEmotion GetActiveEmotion() const { return ActiveEmotion; } + + /** Get the active emotion intensity. */ + UFUNCTION(BlueprintPure, Category = "ElevenLabs|FacialExpression") + EElevenLabsEmotionIntensity GetActiveIntensity() const { return ActiveEmotionIntensity; } + + /** Check if a curve name belongs to the mouth area (overridden by lip sync). */ + UFUNCTION(BlueprintPure, Category = "ElevenLabs|FacialExpression") + static bool IsMouthCurve(const FName& CurveName); + + // ── UActorComponent overrides ───────────────────────────────────────────── + virtual void BeginPlay() override; + virtual void EndPlay(const EEndPlayReason::Type EndPlayReason) override; + virtual void TickComponent(float DeltaTime, ELevelTick TickType, + FActorComponentTickFunction* ThisTickFunction) override; + +private: + // ── Event handlers ──────────────────────────────────────────────────────── + + /** Called when the agent changes emotion via client tool. */ + UFUNCTION() + void OnEmotionChanged(EElevenLabsEmotion Emotion, EElevenLabsEmotionIntensity Intensity); + + // ── Curve extraction ────────────────────────────────────────────────────── + + /** Extract curve values at t=0 from an AnimSequence. */ + TMap ExtractCurvesFromAnim(UAnimSequence* AnimSeq); + + /** Initialize emotion curve data from PoseMap at BeginPlay. */ + void InitializeEmotionPoses(); + + // ── State ───────────────────────────────────────────────────────────────── + + /** Extracted curve data: Emotion → Intensity → { CurveName → Value }. */ + TMap>> EmotionCurveMap; + + /** Current smoothed emotion curves (blended each tick). */ + TMap CurrentEmotionCurves; + + /** Target emotion curves (set when emotion changes, blended toward). */ + TMap TargetEmotionCurves; + + /** Current blend progress (0 = old emotion, 1 = target emotion). */ + float EmotionBlendAlpha = 1.0f; + + /** Active emotion (for change detection). */ + EElevenLabsEmotion ActiveEmotion = EElevenLabsEmotion::Neutral; + EElevenLabsEmotionIntensity ActiveEmotionIntensity = EElevenLabsEmotionIntensity::Medium; + + /** Cached reference to the agent component on the same Actor. */ + TWeakObjectPtr AgentComponent; +}; diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsLipSyncPoseMap.h b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsLipSyncPoseMap.h index 6bacbd1..34fecb6 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsLipSyncPoseMap.h +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsLipSyncPoseMap.h @@ -5,10 +5,35 @@ #include "CoreMinimal.h" #include "Engine/DataAsset.h" #include "Engine/AssetManager.h" +#include "ElevenLabsDefinitions.h" #include "ElevenLabsLipSyncPoseMap.generated.h" class UAnimSequence; +// ───────────────────────────────────────────────────────────────────────────── +// Emotion pose set: 3 intensity levels (Normal / Medium / Extreme) +// ───────────────────────────────────────────────────────────────────────────── +USTRUCT(BlueprintType) +struct PS_AI_AGENT_ELEVENLABS_API FElevenLabsEmotionPoseSet +{ + GENERATED_BODY() + + /** Low intensity expression (subtle). E.g. MHF_Happy_N */ + UPROPERTY(EditAnywhere, BlueprintReadWrite, + meta = (ToolTip = "Low intensity (Normal). E.g. MHF_Happy_N")) + TObjectPtr Normal; + + /** Medium intensity expression. E.g. MHF_Happy_M */ + UPROPERTY(EditAnywhere, BlueprintReadWrite, + meta = (ToolTip = "Medium intensity. E.g. MHF_Happy_M")) + TObjectPtr Medium; + + /** High intensity expression (extreme). E.g. MHF_Happy_E */ + UPROPERTY(EditAnywhere, BlueprintReadWrite, + meta = (ToolTip = "High intensity (Extreme). E.g. MHF_Happy_E")) + TObjectPtr Extreme; +}; + /** * Reusable data asset that maps OVR visemes to phoneme pose AnimSequences. * @@ -103,4 +128,17 @@ public: UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Phoneme Poses", meta = (ToolTip = "Close back vowel (OO). E.g. MHF_OU")) TObjectPtr PoseOU; + + // ── Emotion Poses ──────────────────────────────────────────────────────── + // + // Facial expression animations for each emotion, with 3 intensity levels. + // These are applied as a BASE layer (eyes, eyebrows, cheeks). + // Lip sync MODULATES on top, overriding only mouth-area curves. + + /** Map of emotions to their pose sets (Normal / Medium / Extreme). + * Add entries for each emotion your agent uses (Joy, Sadness, Anger, Surprise, Fear, Disgust). + * Neutral is optional — absence means no base expression. */ + UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "Emotion Poses", + meta = (ToolTip = "Emotion → AnimSequence mapping with 3 intensity levels.\nThese drive the base facial expression (eyes, brows, cheeks).\nLip sync overrides the mouth area on top.")) + TMap EmotionPoses; }; diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsWebSocketProxy.h b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsWebSocketProxy.h index c1a3cb7..e183c5d 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsWebSocketProxy.h +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsWebSocketProxy.h @@ -48,6 +48,10 @@ DECLARE_DYNAMIC_MULTICAST_DELEGATE(FOnElevenLabsAgentResponseStarted); DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnElevenLabsAgentResponsePart, const FString&, PartialText); +/** Fired when the server sends a client_tool_call — the agent wants the client to execute a tool. */ +DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnElevenLabsClientToolCall, + const FElevenLabsClientToolCall&, ToolCall); + // ───────────────────────────────────────────────────────────────────────────── // WebSocket Proxy @@ -103,6 +107,10 @@ public: UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events") FOnElevenLabsAgentResponsePart OnAgentResponsePart; + /** Fired when the agent invokes a client tool. Handle the call and reply with SendClientToolResult. */ + UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events") + FOnElevenLabsClientToolCall OnClientToolCall; + // ── Lifecycle ───────────────────────────────────────────────────────────── /** @@ -172,6 +180,17 @@ public: UFUNCTION(BlueprintCallable, Category = "ElevenLabs") void SendInterrupt(); + /** + * Send the result of a client tool call back to ElevenLabs. + * Must be called after receiving a OnClientToolCall event. + * + * @param ToolCallId The tool_call_id from the original client_tool_call. + * @param Result A string result to return to the agent. + * @param bIsError True if the tool execution failed. + */ + UFUNCTION(BlueprintCallable, Category = "ElevenLabs") + void SendClientToolResult(const FString& ToolCallId, const FString& Result, bool bIsError = false); + // ── Info ────────────────────────────────────────────────────────────────── UFUNCTION(BlueprintPure, Category = "ElevenLabs") @@ -193,6 +212,7 @@ private: void HandleAgentResponse(const TSharedPtr& Payload); void HandleAgentChatResponsePart(const TSharedPtr& Payload); void HandleInterruption(const TSharedPtr& Payload); + void HandleClientToolCall(const TSharedPtr& Payload); void HandlePing(const TSharedPtr& Payload); /** Build and send a JSON text frame to the server. */