rollback to a more functional version but not perfect
This commit is contained in:
parent
1b883f532f
commit
d8957625f8
Binary file not shown.
@ -459,9 +459,7 @@ void UElevenLabsConversationalAgentComponent::HandleAgentResponseStarted()
|
||||
// bAgentGenerating guard). Flushing would send audio to a server that is mid-generation,
|
||||
// causing it to re-enter "user speaking" state and stall — both sides stuck.
|
||||
//
|
||||
// Do NOT send an interrupt here: the ElevenLabs server does not always send the
|
||||
// interruption ack, which would leave bIgnoreIncomingContent=true and silently
|
||||
// discard all subsequent content. Instead, let the server's response play out:
|
||||
// Do NOT send an interrupt here — just let the server's response play out:
|
||||
// - If audio arrives → EnqueueAgentAudio sets bAgentSpeaking, response plays normally.
|
||||
// - If audio never arrives → generating timeout (10s) clears bAgentGenerating.
|
||||
// Either way the state machine recovers and Blueprint can reopen the mic.
|
||||
|
||||
@ -158,20 +158,6 @@ void UElevenLabsWebSocketProxy::SendUserTurnEnd()
|
||||
// in a loop: part arrives → event → StopListening → SendUserTurnEnd → flag reset → part arrives → loop.
|
||||
// The flag is only reset in SendUserTurnStart() at the beginning of a new user turn.
|
||||
|
||||
// Clear the interrupt-ignore flag if it was never cleared by an "interruption" server ack.
|
||||
// The ElevenLabs server does not always send the "interruption" acknowledgement reliably.
|
||||
// By the time the user has spoken a full new turn (seconds of audio), any in-flight content
|
||||
// from the previously interrupted generation has long since arrived — it is safe to resume
|
||||
// normal content processing so the server's response to this new turn is not silently discarded.
|
||||
if (bIgnoreIncomingContent)
|
||||
{
|
||||
bIgnoreIncomingContent = false;
|
||||
const double T = UserTurnEndTime - SessionStartTime;
|
||||
UE_LOG(LogElevenLabsWS, Log,
|
||||
TEXT("[T+%.2fs] Cleared interrupt-ignore flag at turn end (server 'interruption' ack was not received — resuming content processing)."),
|
||||
T);
|
||||
}
|
||||
|
||||
const double T = UserTurnEndTime - SessionStartTime;
|
||||
UE_LOG(LogElevenLabsWS, Log, TEXT("[T+%.2fs] User turn ended — server VAD silence detection started (turn_timeout=1s)."), T);
|
||||
}
|
||||
@ -196,12 +182,7 @@ void UElevenLabsWebSocketProxy::SendInterrupt()
|
||||
{
|
||||
if (!IsConnected()) return;
|
||||
|
||||
// Immediately start discarding in-flight audio and chat response parts from
|
||||
// the generation we are about to interrupt. The server may still send several
|
||||
// frames before it processes our interrupt. We stop ignoring once the server
|
||||
// sends its "interruption" acknowledgement (HandleInterruption).
|
||||
bIgnoreIncomingContent = true;
|
||||
UE_LOG(LogElevenLabsWS, Log, TEXT("Sending interrupt — ignoring incoming content until server acks."));
|
||||
UE_LOG(LogElevenLabsWS, Log, TEXT("Sending interrupt."));
|
||||
|
||||
TSharedPtr<FJsonObject> Msg = MakeShareable(new FJsonObject());
|
||||
Msg->SetStringField(TEXT("type"), ElevenLabsMessageType::Interrupt);
|
||||
@ -467,17 +448,9 @@ void UElevenLabsWebSocketProxy::OnWsBinaryMessage(const void* Data, SIZE_T Size,
|
||||
}
|
||||
|
||||
// Broadcast raw PCM bytes directly to the audio queue.
|
||||
// Discard if we are waiting for an interruption ack (same logic as HandleAudioResponse).
|
||||
TArray<uint8> PCMData = MoveTemp(BinaryFrameBuffer);
|
||||
BinaryFrameBuffer.Reset();
|
||||
if (!bIgnoreIncomingContent)
|
||||
{
|
||||
OnAudioReceived.Broadcast(PCMData);
|
||||
}
|
||||
else
|
||||
{
|
||||
UE_LOG(LogElevenLabsWS, Verbose, TEXT("Discarding binary audio frame (interrupt pending server ack)."));
|
||||
}
|
||||
OnAudioReceived.Broadcast(PCMData);
|
||||
}
|
||||
}
|
||||
|
||||
@ -507,15 +480,6 @@ void UElevenLabsWebSocketProxy::HandleConversationInitiation(const TSharedPtr<FJ
|
||||
|
||||
void UElevenLabsWebSocketProxy::HandleAudioResponse(const TSharedPtr<FJsonObject>& Root)
|
||||
{
|
||||
// Discard audio that belongs to an interrupted generation.
|
||||
// The server may send several more audio frames after we sent "interrupt" —
|
||||
// they must not restart the speaking state on the client side.
|
||||
if (bIgnoreIncomingContent)
|
||||
{
|
||||
UE_LOG(LogElevenLabsWS, Verbose, TEXT("Discarding audio frame (interrupt pending server ack)."));
|
||||
return;
|
||||
}
|
||||
|
||||
// Expected structure:
|
||||
// { "type": "audio",
|
||||
// "audio_event": { "audio_base_64": "<base64 PCM>", "event_id": 1 }
|
||||
@ -569,16 +533,6 @@ void UElevenLabsWebSocketProxy::HandleTranscript(const TSharedPtr<FJsonObject>&
|
||||
|
||||
void UElevenLabsWebSocketProxy::HandleAgentResponse(const TSharedPtr<FJsonObject>& Root)
|
||||
{
|
||||
// ISSUE-19: discard agent_response that belongs to an interrupted generation.
|
||||
// A stale agent_response from the cancelled turn would set bAgentResponseReceived=true
|
||||
// on the component, allowing the silence-detection Tick to fire OnAgentStoppedSpeaking
|
||||
// at the wrong time (no audio is currently playing for the new turn yet).
|
||||
if (bIgnoreIncomingContent)
|
||||
{
|
||||
UE_LOG(LogElevenLabsWS, Verbose, TEXT("Discarding agent_response (interrupt pending server ack)."));
|
||||
return;
|
||||
}
|
||||
|
||||
// ISSUE-22: reset bAgentResponseStartedFired so OnAgentResponseStarted fires again on
|
||||
// the next turn. In Server VAD mode SendUserTurnStart() is never called — it is the only
|
||||
// other place that resets this flag — so without this reset, OnAgentResponseStarted fires
|
||||
@ -604,16 +558,6 @@ void UElevenLabsWebSocketProxy::HandleAgentResponse(const TSharedPtr<FJsonObject
|
||||
|
||||
void UElevenLabsWebSocketProxy::HandleAgentChatResponsePart(const TSharedPtr<FJsonObject>& Root)
|
||||
{
|
||||
// Ignore response parts that belong to a generation we have already interrupted.
|
||||
// Without this guard, old parts arriving after SendInterrupt() would re-trigger
|
||||
// OnAgentResponseStarted (bAgentResponseStartedFired was reset in SendUserTurnStart),
|
||||
// causing the component to stop the newly-opened microphone — creating an infinite loop.
|
||||
if (bIgnoreIncomingContent)
|
||||
{
|
||||
UE_LOG(LogElevenLabsWS, Verbose, TEXT("Discarding agent_chat_response_part (interrupt pending server ack)."));
|
||||
return;
|
||||
}
|
||||
|
||||
// agent_chat_response_part = the server is actively generating a response (LLM token stream).
|
||||
// Fire OnAgentResponseStarted once per turn so the component can auto-stop the microphone
|
||||
// if the Blueprint restarted listening before the server finished processing the previous turn.
|
||||
@ -647,10 +591,7 @@ void UElevenLabsWebSocketProxy::HandleAgentChatResponsePart(const TSharedPtr<FJs
|
||||
|
||||
void UElevenLabsWebSocketProxy::HandleInterruption(const TSharedPtr<FJsonObject>& Root)
|
||||
{
|
||||
// Server has acknowledged the interruption — the old generation is fully stopped.
|
||||
// Resume accepting incoming audio and chat response parts (for the next turn).
|
||||
bIgnoreIncomingContent = false;
|
||||
UE_LOG(LogElevenLabsWS, Log, TEXT("Agent interrupted (server ack received — resuming content processing)."));
|
||||
UE_LOG(LogElevenLabsWS, Log, TEXT("Agent interrupted (server ack received)."));
|
||||
OnInterrupted.Broadcast();
|
||||
}
|
||||
|
||||
|
||||
@ -226,12 +226,6 @@ private:
|
||||
// Used to compute [T+Xs] session-relative timestamps in all log messages.
|
||||
double SessionStartTime = 0.0;
|
||||
|
||||
// Set to true in SendInterrupt() so that in-flight audio frames and
|
||||
// agent_chat_response_part messages from the interrupted generation are silently
|
||||
// discarded instead of re-triggering the speaking/generating state.
|
||||
// Cleared when the server sends its "interruption" acknowledgement.
|
||||
bool bIgnoreIncomingContent = false;
|
||||
|
||||
public:
|
||||
// Set by UElevenLabsConversationalAgentComponent before calling Connect().
|
||||
// Controls turn_timeout in conversation_initiation_client_data.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user