diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsConversationalAgentComponent.h b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsConversationalAgentComponent.h index ecd5923..9a16804 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsConversationalAgentComponent.h +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsConversationalAgentComponent.h @@ -86,151 +86,113 @@ public: // ── Configuration ───────────────────────────────────────────────────────── - /** - * ElevenLabs Agent ID. Overrides the project-level default in Project Settings. - * Leave empty to use the project default. - */ - UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs") + /** ElevenLabs Agent ID used for this conversation. Leave empty to use the default from Project Settings > ElevenLabs. */ + UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs", + meta = (ToolTip = "ElevenLabs Agent ID. Leave empty to use the project default from Project Settings.")) FString AgentID; - /** - * Turn mode: - * - Server VAD: ElevenLabs detects end-of-speech automatically (recommended). - * - Client Controlled: you call StartListening/StopListening manually (push-to-talk). - */ - UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs") + /** How turn-taking is managed between the user and the agent.\n- Server VAD (recommended): ElevenLabs automatically detects when the user stops speaking.\n- Client Controlled: You manually call StartListening/StopListening (push-to-talk with a key). */ + UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs", + meta = (ToolTip = "Turn-taking mode.\n- Server VAD: ElevenLabs detects end-of-speech automatically (hands-free).\n- Client Controlled: You call StartListening/StopListening manually (push-to-talk).")) EElevenLabsTurnMode TurnMode = EElevenLabsTurnMode::Server; - /** - * Automatically start listening (microphone capture) once the WebSocket is - * connected and the conversation is initiated. - */ - UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs") + /** Automatically open the microphone as soon as the WebSocket connection is established. Only applies in Server VAD mode. In Client (push-to-talk) mode, you must call StartListening manually. */ + UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs", + meta = (ToolTip = "Auto-open the microphone when the conversation starts.\nOnly applies in Server VAD mode. In push-to-talk mode, call StartListening() manually.")) bool bAutoStartListening = true; - /** - * Enable speculative turn: the LLM starts generating a response during - * silence before the VAD is fully confident the user has finished speaking. - * Reduces latency by 200-500ms but caused the server to silently stop - * processing user audio after 2 turns when combined with a short turn_timeout. - * Disabled by default until ElevenLabs confirms stability in multi-turn sessions. - */ - UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|Latency") + /** Let the LLM start generating a response during silence, before the VAD is fully confident the user has finished speaking. Saves 200-500ms of latency but may be unstable in long multi-turn sessions. Disabled by default. */ + UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|Latency", + meta = (ToolTip = "Speculative turn: the LLM begins generating during silence before full turn-end confidence.\nReduces latency by 200-500ms. May be unstable in long sessions — test before enabling in production.")) bool bSpeculativeTurn = false; - /** - * Duration in milliseconds of each microphone audio chunk sent to ElevenLabs. - * WASAPI captures audio every ~5ms, but sending tiny chunks degrades VAD/STT - * accuracy. We accumulate audio and send once this duration is reached. - * - Lower values (50-80ms): less latency, but VAD may be less reliable. - * - Higher values (150-250ms): more reliable VAD, but adds latency. - * Default: 100ms (3200 bytes at 16kHz 16-bit mono). - */ + /** How many milliseconds of microphone audio to accumulate before sending a chunk to ElevenLabs. Lower values reduce latency but may degrade voice detection accuracy. Higher values are more reliable but add delay. */ UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|Latency", - meta = (ClampMin = "20", ClampMax = "500", Units = "ms")) + meta = (ClampMin = "20", ClampMax = "500", Units = "ms", + ToolTip = "Mic audio chunk duration sent to ElevenLabs.\n- 50-80ms: lower latency, less reliable voice detection.\n- 100ms (default): good balance.\n- 150-250ms: more reliable, higher latency.")) int32 MicChunkDurationMs = 100; - /** - * Allow the user to interrupt the agent while it is playing audio (speaking). - * When true, calling StartListening() while the agent is audibly speaking automatically - * sends an interruption signal to the server and opens the mic — no Blueprint nodes needed. - * When false, StartListening() is silently ignored until the agent finishes speaking. - * - * NOTE: interruption only applies during the audio-playback phase (bAgentSpeaking). - * While the agent is generating but has not yet started speaking, StartListening() is - * always blocked regardless of this flag — this prevents Blueprint's OnAgentStartedGenerating - * handler (which often calls StartListening for bookkeeping) from accidentally cancelling - * the response before any audio plays. - */ - UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs") + /** Allow the user to interrupt the agent while it is speaking.\n- In Server VAD mode: the microphone stays open during agent speech and the server detects interruptions automatically.\n- In Client (push-to-talk) mode: pressing the talk key while the agent speaks sends an interrupt signal.\n- When disabled: the user must wait for the agent to finish speaking before talking. */ + UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs", + meta = (ToolTip = "Allow the user to interrupt the agent while it speaks.\n- Server VAD: mic stays open, server detects user voice automatically.\n- Push-to-talk: pressing the talk key interrupts the agent.\n- Disabled: user must wait for the agent to finish.")) bool bAllowInterruption = true; - /** - * Forward user speech transcripts (user_transcript events) to the - * OnAgentTranscript delegate. Disable to reduce overhead if you don't - * need to display what the user said. - */ - UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|Events") + /** Enable the OnAgentTranscript event, which provides real-time speech-to-text of what the user is saying. Disable if you don't need to display user speech to reduce processing overhead. */ + UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|Events", + meta = (ToolTip = "Fire OnAgentTranscript with real-time speech-to-text of user speech.\nDisable if you don't need to display what the user said.")) bool bEnableUserTranscript = true; - /** - * Forward agent text responses (agent_response events) to the - * OnAgentTextResponse delegate. Disable if you only need audio output. - */ - UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|Events") + /** Enable the OnAgentTextResponse event, which provides the agent's complete text response once fully generated. Disable if you only need the audio output. */ + UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|Events", + meta = (ToolTip = "Fire OnAgentTextResponse with the agent's complete text once fully generated.\nDisable if you only need the audio output.")) bool bEnableAgentTextResponse = true; - /** - * Forward streaming text parts (agent_chat_response_part events) to the - * OnAgentPartialResponse delegate. Each part is a text fragment as the LLM - * generates it — use this for real-time subtitles that appear while the agent - * speaks, instead of waiting for the full text (OnAgentTextResponse). - */ - UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|Events") + /** Enable the OnAgentPartialResponse event, which streams the agent's text word-by-word as the LLM generates it. Use this for real-time subtitles that appear while the agent speaks, rather than waiting for the full response. */ + UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|Events", + meta = (ToolTip = "Fire OnAgentPartialResponse with streaming text fragments as the LLM generates them.\nIdeal for real-time subtitles. Each event gives one text chunk, not the accumulated text.")) bool bEnableAgentPartialResponse = false; - /** - * How many seconds to wait for the server to start generating a response - * after the user stops speaking (StopListening) before firing OnAgentResponseTimeout. - * Set to 0 to disable. Default: 10 seconds. - * - * A typical healthy round-trip is 0.1–0.8s to first agent_chat_response_part. - * Values above 10s are extremely unusual and almost always indicate a server issue. - */ - UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs", meta = (ClampMin = "0.0")) + /** Safety timeout: if the server does not start generating a response within this many seconds after the user stops speaking, fire OnAgentResponseTimeout. Set to 0 to disable. A normal response starts within 0.1-0.8s. */ + UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs", + meta = (ClampMin = "0.0", + ToolTip = "Seconds to wait for a server response after the user stops speaking.\nFires OnAgentResponseTimeout if exceeded. Normal latency is 0.1-0.8s.\nSet to 0 to disable. Default: 10s.")) float ResponseTimeoutSeconds = 10.0f; // ── Events ──────────────────────────────────────────────────────────────── - UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events") + /** Fired when the WebSocket connection is established and the conversation session is ready. Provides the ConversationID and AgentID. */ + UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events", + meta = (ToolTip = "Fires when the connection to ElevenLabs is established and the conversation is ready to begin.")) FOnAgentConnected OnAgentConnected; - UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events") + /** Fired when the WebSocket connection is closed (gracefully or due to an error). Provides the status code and reason. */ + UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events", + meta = (ToolTip = "Fires when the connection to ElevenLabs is closed. Check StatusCode and Reason for details.")) FOnAgentDisconnected OnAgentDisconnected; - UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events") + /** Fired on any connection or protocol error. The error message describes what went wrong. */ + UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events", + meta = (ToolTip = "Fires on connection or protocol errors. The ErrorMessage describes the issue.")) FOnAgentError OnAgentError; - /** Fired for every transcript segment (user speech or agent speech, tentative and final). */ - UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events") + /** Fired with real-time speech-to-text of the user's voice. Includes both tentative (in-progress) and final transcripts. Requires bEnableUserTranscript to be true. */ + UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events", + meta = (ToolTip = "Real-time speech-to-text of the user's voice.\nIncludes tentative and final transcripts. Enable with bEnableUserTranscript.")) FOnAgentTranscript OnAgentTranscript; - /** Final text response produced by the agent (mirrors the audio). */ - UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events") + /** Fired once when the agent's complete text response is available. This is the full text that corresponds to the audio the agent speaks. Requires bEnableAgentTextResponse to be true. */ + UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events", + meta = (ToolTip = "The agent's complete text response (matches the spoken audio).\nFires once when the full text is ready. Enable with bEnableAgentTextResponse.")) FOnAgentTextResponse OnAgentTextResponse; - /** - * Streaming text fragments as the LLM generates them. - * Fires for every agent_chat_response_part — each call gives one text chunk. - * Enable with bEnableAgentPartialResponse. - */ - UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events") + /** Fired repeatedly as the LLM generates text, providing one word/fragment at a time. Use for real-time subtitles. Each call gives a new fragment, NOT the accumulated text. Requires bEnableAgentPartialResponse to be true. */ + UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events", + meta = (ToolTip = "Streaming text fragments as the LLM generates them (word by word).\nIdeal for real-time subtitles. Enable with bEnableAgentPartialResponse.")) FOnAgentPartialResponse OnAgentPartialResponse; - UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events") + /** Fired when the agent begins playing audio (first audio chunk received). Use this to trigger speech animations or UI indicators. */ + UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events", + meta = (ToolTip = "Fires when the agent starts speaking (first audio chunk). Use for lip-sync or UI feedback.")) FOnAgentStartedSpeaking OnAgentStartedSpeaking; - UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events") + /** Fired when the agent finishes playing all audio. Use this to re-open the microphone (in Server VAD mode without interruption) or update UI. */ + UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events", + meta = (ToolTip = "Fires when the agent finishes speaking. Use to re-open the mic or update UI.")) FOnAgentStoppedSpeaking OnAgentStoppedSpeaking; - UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events") + /** Fired when the agent's speech is interrupted (either by the user speaking over it, or by a manual InterruptAgent call). The audio playback is automatically stopped. */ + UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events", + meta = (ToolTip = "Fires when the agent is interrupted mid-speech. Audio is automatically stopped.")) FOnAgentInterrupted OnAgentInterrupted; - /** - * Fired when the server starts generating a response (before audio). - * The component automatically stops the microphone when this fires while listening, - * so the Blueprint doesn't need to handle this manually for push-to-talk. - * Bind here if you need UI feedback ("agent is thinking..."). - */ - UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events") + /** Fired when the server starts generating a response (before any audio arrives). Use this for "thinking..." UI feedback. In push-to-talk mode, the microphone is automatically closed when this fires. */ + UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events", + meta = (ToolTip = "Fires when the server starts generating (before audio arrives).\nUse for 'thinking...' UI. Mic is auto-closed in push-to-talk mode.")) FOnAgentStartedGenerating OnAgentStartedGenerating; - /** - * Fired when the server has not started generating within ResponseTimeoutSeconds - * after StopListening was called. Bind here to give the user feedback such as - * "I didn't get a response, please try again" or to automatically re-open the mic. - */ - UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events") + /** Fired if the server does not start generating a response within ResponseTimeoutSeconds after the user stops speaking. Use this to show a "try again" message or automatically re-open the microphone. */ + UPROPERTY(BlueprintAssignable, Category = "ElevenLabs|Events", + meta = (ToolTip = "Fires if the server doesn't respond within ResponseTimeoutSeconds.\nUse to show 'try again' or re-open the mic automatically.")) FOnAgentResponseTimeout OnAgentResponseTimeout; // ── Control ─────────────────────────────────────────────────────────────── diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsMicrophoneCaptureComponent.h b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsMicrophoneCaptureComponent.h index de58348..4fd2cb1 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsMicrophoneCaptureComponent.h +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsMicrophoneCaptureComponent.h @@ -28,9 +28,10 @@ class PS_AI_AGENT_ELEVENLABS_API UElevenLabsMicrophoneCaptureComponent : public public: UElevenLabsMicrophoneCaptureComponent(); - /** Volume multiplier applied to captured samples before forwarding. */ + /** Multiplier applied to the microphone input volume before sending to ElevenLabs. Increase if the agent has trouble hearing you, decrease if your audio is clipping. Default: 1.0 (no change). */ UPROPERTY(EditAnywhere, BlueprintReadWrite, Category = "ElevenLabs|Microphone", - meta = (ClampMin = "0.0", ClampMax = "4.0")) + meta = (ClampMin = "0.0", ClampMax = "4.0", + ToolTip = "Microphone volume multiplier.\n1.0 = no change. Increase if the agent can't hear you, decrease if audio clips.")) float VolumeMultiplier = 1.0f; /**