diff --git a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsWebSocketProxy.cpp b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsWebSocketProxy.cpp index 356de64..6fa399c 100644 --- a/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsWebSocketProxy.cpp +++ b/Unreal/PS_AI_Agent/Plugins/PS_AI_Agent_ElevenLabs/Source/PS_AI_Agent_ElevenLabs/Private/ElevenLabsWebSocketProxy.cpp @@ -186,7 +186,7 @@ void UElevenLabsWebSocketProxy::OnWsMessage(const FString& Message) TSharedRef> Reader = TJsonReaderFactory<>::Create(Message); if (!FJsonSerializer::Deserialize(Reader, Root) || !Root.IsValid()) { - UE_LOG(LogElevenLabsWS, Warning, TEXT("Failed to parse WebSocket message as JSON.")); + UE_LOG(LogElevenLabsWS, Warning, TEXT("Failed to parse WebSocket message as JSON (first 80 chars): %.80s"), *Message); return; } @@ -237,9 +237,7 @@ void UElevenLabsWebSocketProxy::OnWsMessage(const FString& Message) void UElevenLabsWebSocketProxy::OnWsBinaryMessage(const void* Data, SIZE_T Size, SIZE_T BytesRemaining) { - // ElevenLabs sends its JSON messages as binary WebSocket frames (not text frames). - // Accumulate fragments until BytesRemaining == 0, then parse the complete message. - + // Accumulate fragments until BytesRemaining == 0. const uint8* Bytes = static_cast(Data); BinaryFrameBuffer.Append(Bytes, Size); @@ -249,14 +247,48 @@ void UElevenLabsWebSocketProxy::OnWsBinaryMessage(const void* Data, SIZE_T Size, return; } - // Full message received — interpret as UTF-8 JSON - const FString JsonString = FString(UTF8_TO_TCHAR( - reinterpret_cast(BinaryFrameBuffer.GetData()))); + const int32 TotalSize = BinaryFrameBuffer.Num(); - BinaryFrameBuffer.Reset(); + // Peek at first byte to distinguish JSON (starts with '{') from raw binary audio. + const bool bLooksLikeJson = (TotalSize > 0 && BinaryFrameBuffer[0] == '{'); - // Route through the existing text message handler - OnWsMessage(JsonString); + if (bLooksLikeJson) + { + // Null-terminate safely then decode as UTF-8 JSON + BinaryFrameBuffer.Add(0); + const FString JsonString = FString(UTF8_TO_TCHAR( + reinterpret_cast(BinaryFrameBuffer.GetData()))); + BinaryFrameBuffer.Reset(); + + const UElevenLabsSettings* Settings = FPS_AI_Agent_ElevenLabsModule::Get().GetSettings(); + if (Settings->bVerboseLogging) + { + UE_LOG(LogElevenLabsWS, Verbose, TEXT("Binary JSON frame (%d bytes): %.120s"), TotalSize, *JsonString); + } + + OnWsMessage(JsonString); + } + else + { + // Raw binary audio frame — PCM bytes sent directly without Base64/JSON wrapper. + // Log first few bytes as hex to help diagnose the format. + const UElevenLabsSettings* Settings = FPS_AI_Agent_ElevenLabsModule::Get().GetSettings(); + if (Settings->bVerboseLogging) + { + FString HexPreview; + const int32 PreviewBytes = FMath::Min(TotalSize, 8); + for (int32 i = 0; i < PreviewBytes; i++) + { + HexPreview += FString::Printf(TEXT("%02X "), BinaryFrameBuffer[i]); + } + UE_LOG(LogElevenLabsWS, Verbose, TEXT("Binary audio frame: %d bytes | first bytes: %s"), TotalSize, *HexPreview); + } + + // Broadcast raw PCM bytes directly to the audio queue. + TArray PCMData = MoveTemp(BinaryFrameBuffer); + BinaryFrameBuffer.Reset(); + OnAudioReceived.Broadcast(PCMData); + } } // ─────────────────────────────────────────────────────────────────────────────