Fix: distinguish binary audio frames from binary JSON frames

ElevenLabs sends two kinds of binary WebSocket frames:
  1. JSON control messages (starts with '{') — decode as UTF-8, route to OnWsMessage
  2. Raw PCM audio (binary, does not start with '{') — broadcast directly as audio

Previously all binary frames were decoded as UTF-8 JSON, causing
"Failed to parse WebSocket message as JSON" for every audio frame.

Fix: peek at first byte of assembled frame buffer:
  - '{' → UTF-8 JSON path (null-terminated, routed to existing message handler)
  - anything else → raw PCM path (broadcast directly to OnAudioReceived)

Also: improved "Failed to parse JSON" log to show first 80 chars of message,
and added verbose hex dump of binary audio frame prefix for diagnostics.
Compiles cleanly on UE 5.5 Win64.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
j.foucher 2026-02-19 13:54:34 +01:00
parent 669c503d06
commit 483456728d

View File

@ -186,7 +186,7 @@ void UElevenLabsWebSocketProxy::OnWsMessage(const FString& Message)
TSharedRef<TJsonReader<>> Reader = TJsonReaderFactory<>::Create(Message); TSharedRef<TJsonReader<>> Reader = TJsonReaderFactory<>::Create(Message);
if (!FJsonSerializer::Deserialize(Reader, Root) || !Root.IsValid()) if (!FJsonSerializer::Deserialize(Reader, Root) || !Root.IsValid())
{ {
UE_LOG(LogElevenLabsWS, Warning, TEXT("Failed to parse WebSocket message as JSON.")); UE_LOG(LogElevenLabsWS, Warning, TEXT("Failed to parse WebSocket message as JSON (first 80 chars): %.80s"), *Message);
return; return;
} }
@ -237,9 +237,7 @@ void UElevenLabsWebSocketProxy::OnWsMessage(const FString& Message)
void UElevenLabsWebSocketProxy::OnWsBinaryMessage(const void* Data, SIZE_T Size, SIZE_T BytesRemaining) void UElevenLabsWebSocketProxy::OnWsBinaryMessage(const void* Data, SIZE_T Size, SIZE_T BytesRemaining)
{ {
// ElevenLabs sends its JSON messages as binary WebSocket frames (not text frames). // Accumulate fragments until BytesRemaining == 0.
// Accumulate fragments until BytesRemaining == 0, then parse the complete message.
const uint8* Bytes = static_cast<const uint8*>(Data); const uint8* Bytes = static_cast<const uint8*>(Data);
BinaryFrameBuffer.Append(Bytes, Size); BinaryFrameBuffer.Append(Bytes, Size);
@ -249,15 +247,49 @@ void UElevenLabsWebSocketProxy::OnWsBinaryMessage(const void* Data, SIZE_T Size,
return; return;
} }
// Full message received — interpret as UTF-8 JSON const int32 TotalSize = BinaryFrameBuffer.Num();
// Peek at first byte to distinguish JSON (starts with '{') from raw binary audio.
const bool bLooksLikeJson = (TotalSize > 0 && BinaryFrameBuffer[0] == '{');
if (bLooksLikeJson)
{
// Null-terminate safely then decode as UTF-8 JSON
BinaryFrameBuffer.Add(0);
const FString JsonString = FString(UTF8_TO_TCHAR( const FString JsonString = FString(UTF8_TO_TCHAR(
reinterpret_cast<const char*>(BinaryFrameBuffer.GetData()))); reinterpret_cast<const char*>(BinaryFrameBuffer.GetData())));
BinaryFrameBuffer.Reset(); BinaryFrameBuffer.Reset();
// Route through the existing text message handler const UElevenLabsSettings* Settings = FPS_AI_Agent_ElevenLabsModule::Get().GetSettings();
if (Settings->bVerboseLogging)
{
UE_LOG(LogElevenLabsWS, Verbose, TEXT("Binary JSON frame (%d bytes): %.120s"), TotalSize, *JsonString);
}
OnWsMessage(JsonString); OnWsMessage(JsonString);
} }
else
{
// Raw binary audio frame — PCM bytes sent directly without Base64/JSON wrapper.
// Log first few bytes as hex to help diagnose the format.
const UElevenLabsSettings* Settings = FPS_AI_Agent_ElevenLabsModule::Get().GetSettings();
if (Settings->bVerboseLogging)
{
FString HexPreview;
const int32 PreviewBytes = FMath::Min(TotalSize, 8);
for (int32 i = 0; i < PreviewBytes; i++)
{
HexPreview += FString::Printf(TEXT("%02X "), BinaryFrameBuffer[i]);
}
UE_LOG(LogElevenLabsWS, Verbose, TEXT("Binary audio frame: %d bytes | first bytes: %s"), TotalSize, *HexPreview);
}
// Broadcast raw PCM bytes directly to the audio queue.
TArray<uint8> PCMData = MoveTemp(BinaryFrameBuffer);
BinaryFrameBuffer.Reset();
OnAudioReceived.Broadcast(PCMData);
}
}
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
// Message handlers // Message handlers