rollback to a more functional version but not perfect

2026-02-21 19:49:26 +01:00 · 2026-02-21 19:49:26 +01:00 · d8957625f8
commit d8957625f8
parent 1b883f532f
4 changed files with 4 additions and 71 deletions
--- a/Unreal/PS_AI_Agent/Content/test_AI_Actor.uasset
+++ b/Unreal/PS_AI_Agent/Content/test_AI_Actor.uasset
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsConversationalAgentComponent.cpp
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsConversationalAgentComponent.cpp
@ -459,9 +459,7 @@ void UElevenLabsConversationalAgentComponent::HandleAgentResponseStarted()
 		// bAgentGenerating guard). Flushing would send audio to a server that is mid-generation,
 		// causing it to re-enter "user speaking" state and stall — both sides stuck.
 		//
-		// Do NOT send an interrupt here: the ElevenLabs server does not always send the
-		// interruption ack, which would leave bIgnoreIncomingContent=true and silently
-		// discard all subsequent content. Instead, let the server's response play out:
+		// Do NOT send an interrupt here — just let the server's response play out:
 		//   - If audio arrives → EnqueueAgentAudio sets bAgentSpeaking, response plays normally.
 		//   - If audio never arrives → generating timeout (10s) clears bAgentGenerating.
 		// Either way the state machine recovers and Blueprint can reopen the mic.
--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsWebSocketProxy.cpp
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsWebSocketProxy.cpp
@ -158,20 +158,6 @@ void UElevenLabsWebSocketProxy::SendUserTurnEnd()
 	// in a loop: part arrives → event → StopListening → SendUserTurnEnd → flag reset → part arrives → loop.
 	// The flag is only reset in SendUserTurnStart() at the beginning of a new user turn.

-	// Clear the interrupt-ignore flag if it was never cleared by an "interruption" server ack.
-	// The ElevenLabs server does not always send the "interruption" acknowledgement reliably.
-	// By the time the user has spoken a full new turn (seconds of audio), any in-flight content
-	// from the previously interrupted generation has long since arrived — it is safe to resume
-	// normal content processing so the server's response to this new turn is not silently discarded.
-	if (bIgnoreIncomingContent)
-	{
-		bIgnoreIncomingContent = false;
-		const double T = UserTurnEndTime - SessionStartTime;
-		UE_LOG(LogElevenLabsWS, Log,
-			TEXT("[T+%.2fs] Cleared interrupt-ignore flag at turn end (server 'interruption' ack was not received — resuming content processing)."),
-			T);
-	}
-
 	const double T = UserTurnEndTime - SessionStartTime;
 	UE_LOG(LogElevenLabsWS, Log, TEXT("[T+%.2fs] User turn ended — server VAD silence detection started (turn_timeout=1s)."), T);
 }
@ -196,12 +182,7 @@ void UElevenLabsWebSocketProxy::SendInterrupt()
 {
 	if (!IsConnected()) return;

-	// Immediately start discarding in-flight audio and chat response parts from
-	// the generation we are about to interrupt. The server may still send several
-	// frames before it processes our interrupt. We stop ignoring once the server
-	// sends its "interruption" acknowledgement (HandleInterruption).
-	bIgnoreIncomingContent = true;
-	UE_LOG(LogElevenLabsWS, Log, TEXT("Sending interrupt — ignoring incoming content until server acks."));
+	UE_LOG(LogElevenLabsWS, Log, TEXT("Sending interrupt."));

 	TSharedPtr<FJsonObject> Msg = MakeShareable(new FJsonObject());
 	Msg->SetStringField(TEXT("type"), ElevenLabsMessageType::Interrupt);
@ -467,17 +448,9 @@ void UElevenLabsWebSocketProxy::OnWsBinaryMessage(const void* Data, SIZE_T Size,
 		}

 		// Broadcast raw PCM bytes directly to the audio queue.
-		// Discard if we are waiting for an interruption ack (same logic as HandleAudioResponse).
 		TArray<uint8> PCMData = MoveTemp(BinaryFrameBuffer);
 		BinaryFrameBuffer.Reset();
-		if (!bIgnoreIncomingContent)
-		{
-			OnAudioReceived.Broadcast(PCMData);
-		}
-		else
-		{
-			UE_LOG(LogElevenLabsWS, Verbose, TEXT("Discarding binary audio frame (interrupt pending server ack)."));
-		}
+		OnAudioReceived.Broadcast(PCMData);
 	}
 }

@ -507,15 +480,6 @@ void UElevenLabsWebSocketProxy::HandleConversationInitiation(const TSharedPtr<FJ

 void UElevenLabsWebSocketProxy::HandleAudioResponse(const TSharedPtr<FJsonObject>& Root)
 {
-	// Discard audio that belongs to an interrupted generation.
-	// The server may send several more audio frames after we sent "interrupt" —
-	// they must not restart the speaking state on the client side.
-	if (bIgnoreIncomingContent)
-	{
-		UE_LOG(LogElevenLabsWS, Verbose, TEXT("Discarding audio frame (interrupt pending server ack)."));
-		return;
-	}
-
 	// Expected structure:
 	// { "type": "audio",
 	//   "audio_event": { "audio_base_64": "<base64 PCM>", "event_id": 1 }
@ -569,16 +533,6 @@ void UElevenLabsWebSocketProxy::HandleTranscript(const TSharedPtr<FJsonObject>&

 void UElevenLabsWebSocketProxy::HandleAgentResponse(const TSharedPtr<FJsonObject>& Root)
 {
-	// ISSUE-19: discard agent_response that belongs to an interrupted generation.
-	// A stale agent_response from the cancelled turn would set bAgentResponseReceived=true
-	// on the component, allowing the silence-detection Tick to fire OnAgentStoppedSpeaking
-	// at the wrong time (no audio is currently playing for the new turn yet).
-	if (bIgnoreIncomingContent)
-	{
-		UE_LOG(LogElevenLabsWS, Verbose, TEXT("Discarding agent_response (interrupt pending server ack)."));
-		return;
-	}
-
 	// ISSUE-22: reset bAgentResponseStartedFired so OnAgentResponseStarted fires again on
 	// the next turn. In Server VAD mode SendUserTurnStart() is never called — it is the only
 	// other place that resets this flag — so without this reset, OnAgentResponseStarted fires
@ -604,16 +558,6 @@ void UElevenLabsWebSocketProxy::HandleAgentResponse(const TSharedPtr<FJsonObject

 void UElevenLabsWebSocketProxy::HandleAgentChatResponsePart(const TSharedPtr<FJsonObject>& Root)
 {
-	// Ignore response parts that belong to a generation we have already interrupted.
-	// Without this guard, old parts arriving after SendInterrupt() would re-trigger
-	// OnAgentResponseStarted (bAgentResponseStartedFired was reset in SendUserTurnStart),
-	// causing the component to stop the newly-opened microphone — creating an infinite loop.
-	if (bIgnoreIncomingContent)
-	{
-		UE_LOG(LogElevenLabsWS, Verbose, TEXT("Discarding agent_chat_response_part (interrupt pending server ack)."));
-		return;
-	}
-
 	// agent_chat_response_part = the server is actively generating a response (LLM token stream).
 	// Fire OnAgentResponseStarted once per turn so the component can auto-stop the microphone
 	// if the Blueprint restarted listening before the server finished processing the previous turn.
@ -647,10 +591,7 @@ void UElevenLabsWebSocketProxy::HandleAgentChatResponsePart(const TSharedPtr<FJs

 void UElevenLabsWebSocketProxy::HandleInterruption(const TSharedPtr<FJsonObject>& Root)
 {
-	// Server has acknowledged the interruption — the old generation is fully stopped.
-	// Resume accepting incoming audio and chat response parts (for the next turn).
-	bIgnoreIncomingContent = false;
-	UE_LOG(LogElevenLabsWS, Log, TEXT("Agent interrupted (server ack received — resuming content processing)."));
+	UE_LOG(LogElevenLabsWS, Log, TEXT("Agent interrupted (server ack received)."));
 	OnInterrupted.Broadcast();
 }

--- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsWebSocketProxy.h
+++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Public/ElevenLabsWebSocketProxy.h
@ -226,12 +226,6 @@ private:
 	// Used to compute [T+Xs] session-relative timestamps in all log messages.
 	double SessionStartTime = 0.0;

-	// Set to true in SendInterrupt() so that in-flight audio frames and
-	// agent_chat_response_part messages from the interrupted generation are silently
-	// discarded instead of re-triggering the speaking/generating state.
-	// Cleared when the server sends its "interruption" acknowledgement.
-	bool bIgnoreIncomingContent = false;
-
 public:
 	// Set by UElevenLabsConversationalAgentComponent before calling Connect().
 	// Controls turn_timeout in conversation_initiation_client_data.