Fix: distinguish binary audio frames from binary JSON frames
ElevenLabs sends two kinds of binary WebSocket frames:
1. JSON control messages (starts with '{') — decode as UTF-8, route to OnWsMessage
2. Raw PCM audio (binary, does not start with '{') — broadcast directly as audio
Previously all binary frames were decoded as UTF-8 JSON, causing
"Failed to parse WebSocket message as JSON" for every audio frame.
Fix: peek at first byte of assembled frame buffer:
- '{' → UTF-8 JSON path (null-terminated, routed to existing message handler)
- anything else → raw PCM path (broadcast directly to OnAudioReceived)
Also: improved "Failed to parse JSON" log to show first 80 chars of message,
and added verbose hex dump of binary audio frame prefix for diagnostics.
Compiles cleanly on UE 5.5 Win64.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
669c503d06
commit
483456728d
@ -186,7 +186,7 @@ void UElevenLabsWebSocketProxy::OnWsMessage(const FString& Message)
|
||||
TSharedRef<TJsonReader<>> Reader = TJsonReaderFactory<>::Create(Message);
|
||||
if (!FJsonSerializer::Deserialize(Reader, Root) || !Root.IsValid())
|
||||
{
|
||||
UE_LOG(LogElevenLabsWS, Warning, TEXT("Failed to parse WebSocket message as JSON."));
|
||||
UE_LOG(LogElevenLabsWS, Warning, TEXT("Failed to parse WebSocket message as JSON (first 80 chars): %.80s"), *Message);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -237,9 +237,7 @@ void UElevenLabsWebSocketProxy::OnWsMessage(const FString& Message)
|
||||
|
||||
void UElevenLabsWebSocketProxy::OnWsBinaryMessage(const void* Data, SIZE_T Size, SIZE_T BytesRemaining)
|
||||
{
|
||||
// ElevenLabs sends its JSON messages as binary WebSocket frames (not text frames).
|
||||
// Accumulate fragments until BytesRemaining == 0, then parse the complete message.
|
||||
|
||||
// Accumulate fragments until BytesRemaining == 0.
|
||||
const uint8* Bytes = static_cast<const uint8*>(Data);
|
||||
BinaryFrameBuffer.Append(Bytes, Size);
|
||||
|
||||
@ -249,14 +247,48 @@ void UElevenLabsWebSocketProxy::OnWsBinaryMessage(const void* Data, SIZE_T Size,
|
||||
return;
|
||||
}
|
||||
|
||||
// Full message received — interpret as UTF-8 JSON
|
||||
const FString JsonString = FString(UTF8_TO_TCHAR(
|
||||
reinterpret_cast<const char*>(BinaryFrameBuffer.GetData())));
|
||||
const int32 TotalSize = BinaryFrameBuffer.Num();
|
||||
|
||||
BinaryFrameBuffer.Reset();
|
||||
// Peek at first byte to distinguish JSON (starts with '{') from raw binary audio.
|
||||
const bool bLooksLikeJson = (TotalSize > 0 && BinaryFrameBuffer[0] == '{');
|
||||
|
||||
// Route through the existing text message handler
|
||||
OnWsMessage(JsonString);
|
||||
if (bLooksLikeJson)
|
||||
{
|
||||
// Null-terminate safely then decode as UTF-8 JSON
|
||||
BinaryFrameBuffer.Add(0);
|
||||
const FString JsonString = FString(UTF8_TO_TCHAR(
|
||||
reinterpret_cast<const char*>(BinaryFrameBuffer.GetData())));
|
||||
BinaryFrameBuffer.Reset();
|
||||
|
||||
const UElevenLabsSettings* Settings = FPS_AI_Agent_ElevenLabsModule::Get().GetSettings();
|
||||
if (Settings->bVerboseLogging)
|
||||
{
|
||||
UE_LOG(LogElevenLabsWS, Verbose, TEXT("Binary JSON frame (%d bytes): %.120s"), TotalSize, *JsonString);
|
||||
}
|
||||
|
||||
OnWsMessage(JsonString);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Raw binary audio frame — PCM bytes sent directly without Base64/JSON wrapper.
|
||||
// Log first few bytes as hex to help diagnose the format.
|
||||
const UElevenLabsSettings* Settings = FPS_AI_Agent_ElevenLabsModule::Get().GetSettings();
|
||||
if (Settings->bVerboseLogging)
|
||||
{
|
||||
FString HexPreview;
|
||||
const int32 PreviewBytes = FMath::Min(TotalSize, 8);
|
||||
for (int32 i = 0; i < PreviewBytes; i++)
|
||||
{
|
||||
HexPreview += FString::Printf(TEXT("%02X "), BinaryFrameBuffer[i]);
|
||||
}
|
||||
UE_LOG(LogElevenLabsWS, Verbose, TEXT("Binary audio frame: %d bytes | first bytes: %s"), TotalSize, *HexPreview);
|
||||
}
|
||||
|
||||
// Broadcast raw PCM bytes directly to the audio queue.
|
||||
TArray<uint8> PCMData = MoveTemp(BinaryFrameBuffer);
|
||||
BinaryFrameBuffer.Reset();
|
||||
OnAudioReceived.Broadcast(PCMData);
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user