From 97096b21db81b1e0aef9dc9cb65e3c693c786749 Mon Sep 17 00:00:00 2001 From: Adrian Niculescu <15037449+adrian-niculescu@users.noreply.github.com> Date: Mon, 15 Jun 2026 16:06:51 +0300 Subject: [PATCH 1/2] Make V8 string to NSString conversions UTF-16 faithful V8 strings are UTF-16, but several bridge points round-tripped them through UTF-8 before building an NSString (and one reverse direction did too). That path corrupts lone surrogates, which get replaced with U+FFFD, and where it used C-string APIs (stringWithUTF8String:, [NSString UTF8String] folded into a std::string) it truncated at an embedded NUL. Rework tns::ToUtf16String to read the V8 string's native two-byte buffer directly, which also drops the deprecated std::codecvt_utf8_utf16. Switch the DictionaryAdapter, Interop and ArgConverter string sites to ToUtf16String + stringWithCharacters:length:, and pass NSString straight to ToV8String instead of going through a UTF8String C string. Add TestRunner cases asserting lone surrogates survive the JS to NSString bridge: each reads the bridged string's first UTF-16 code unit straight out of its buffer and checks it is unchanged (high U+D834 and low U+DC00), whereas the old UTF-8 round trip would have turned either into U+FFFD. --- NativeScript/runtime/ArgConverter.mm | 7 ++--- NativeScript/runtime/DictionaryAdapter.mm | 13 ++++----- NativeScript/runtime/Helpers.mm | 25 ++++++++++++----- NativeScript/runtime/Interop.mm | 6 +++-- TestRunner/app/tests/ApiTests.js | 33 +++++++++++++++++++++++ 5 files changed, 67 insertions(+), 17 deletions(-) diff --git a/NativeScript/runtime/ArgConverter.mm b/NativeScript/runtime/ArgConverter.mm index bd6956c6..16600f96 100644 --- a/NativeScript/runtime/ArgConverter.mm +++ b/NativeScript/runtime/ArgConverter.mm @@ -294,7 +294,8 @@ } else if (value->IsString()) { if (type == BinaryTypeEncodingType::IdEncoding || type == BinaryTypeEncodingType::InterfaceDeclarationReference) { - id data = tns::ToNSString(isolate, value); + std::u16string strValue = tns::ToUtf16String(isolate, value); + id data = [NSString stringWithCharacters:(const unichar*)strValue.data() length:strValue.size()]; // this feels wrong but follows the other CFBridgingRetain calls // and also solves a leak auto ref = CFBridgingRetain(data); @@ -929,8 +930,8 @@ } if ([obj isKindOfClass:[NSString class]]) { - const char* str = [obj UTF8String]; - args.GetReturnValue().Set(tns::ToV8String(isolate, str)); + NSString* nativeStr = (NSString*)obj; + args.GetReturnValue().Set(tns::ToV8String(isolate, nativeStr)); return; } diff --git a/NativeScript/runtime/DictionaryAdapter.mm b/NativeScript/runtime/DictionaryAdapter.mm index f6e85ce6..1dbd2c48 100644 --- a/NativeScript/runtime/DictionaryAdapter.mm +++ b/NativeScript/runtime/DictionaryAdapter.mm @@ -48,7 +48,8 @@ - (id)nextObject { bool success = array->Get(context, self->index_).ToLocal(&key); tns::Assert(success, isolate); self->index_ += 2; - NSString* result = tns::ToNSString(isolate, key); + std::u16string keyStr = tns::ToUtf16String(isolate, key); + NSString* result = [NSString stringWithCharacters:(const unichar*)keyStr.data() length:keyStr.length()]; return result; } @@ -116,8 +117,8 @@ - (id)nextObject { bool success = properties->Get(context, (uint)self->index_).ToLocal(&value); tns::Assert(success, isolate); self->index_++; - std::string result = tns::ToString(isolate, value); - return [NSString stringWithUTF8String:result.c_str()]; + std::u16string result = tns::ToUtf16String(isolate, value); + return [NSString stringWithCharacters:(const unichar*)result.data() length:result.size()]; } return nil; @@ -139,8 +140,8 @@ - (NSArray*)allObjects { Local value; bool success = properties->Get(context, i).ToLocal(&value); tns::Assert(success, isolate); - std::string result = tns::ToString(isolate, value); - [array addObject:[NSString stringWithUTF8String:result.c_str()]]; + std::u16string result = tns::ToUtf16String(isolate, value); + [array addObject:[NSString stringWithCharacters:(const unichar*)result.data() length:result.size()]]; } return array; @@ -214,7 +215,7 @@ - (id)objectForKey:(id)aKey { bool success = obj->Get(context, key).ToLocal(&value); tns::Assert(success, isolate); } else if ([aKey isKindOfClass:[NSString class]]) { - const char* key = [aKey UTF8String]; + NSString* key = (NSString*)aKey; Local keyV8Str = tns::ToV8String(isolate, key); if (obj->IsMap()) { diff --git a/NativeScript/runtime/Helpers.mm b/NativeScript/runtime/Helpers.mm index d3174360..6d3cab9d 100644 --- a/NativeScript/runtime/Helpers.mm +++ b/NativeScript/runtime/Helpers.mm @@ -24,13 +24,26 @@ } // namespace std::u16string tns::ToUtf16String(Isolate* isolate, const Local& value) { - std::string valueStr = tns::ToString(isolate, value); -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - // FIXME: std::codecvt_utf8_utf16 is deprecated - std::wstring_convert, char16_t> convert; - std::u16string value16 = convert.from_bytes(valueStr); + // Read the V8 string's native UTF-16 buffer directly instead of round-tripping + // through UTF-8, which corrupts lone surrogates (replaced with U+FFFD) and is + // slower. This also drops the deprecated std::codecvt_utf8_utf16. + if (value.IsEmpty()) { + return std::u16string(); + } + + if (value->IsStringObject()) { + Local obj = value.As()->ValueOf(); + return tns::ToUtf16String(isolate, obj); + } + + v8::String::Value result(isolate, value); + + uint16_t* val = *result; + if (val == nullptr) { + return std::u16string(); + } - return value16; + return std::u16string((char16_t*)val, result.length()); } std::vector tns::ToVector(const std::string& value) { diff --git a/NativeScript/runtime/Interop.mm b/NativeScript/runtime/Interop.mm index 23bc4647..f84292c3 100644 --- a/NativeScript/runtime/Interop.mm +++ b/NativeScript/runtime/Interop.mm @@ -324,7 +324,8 @@ inline bool isBool() { } else if (argHelper.isString() && (typeEncoding->type == BinaryTypeEncodingType::InterfaceDeclarationReference || typeEncoding->type == BinaryTypeEncodingType::IdEncoding)) { - NSString* result = tns::ToNSString(isolate, arg); + std::u16string str = tns::ToUtf16String(isolate, arg); + NSString* result = [NSString stringWithCharacters:(const unichar*)str.data() length:str.size()]; Interop::SetValue(dest, result); } else if (Interop::IsNumbericType(typeEncoding->type) || tns::IsNumber(arg)) { double value = tns::ToNumber(isolate, arg); @@ -686,7 +687,8 @@ inline bool isBool() { if (arg.IsEmpty() || arg->IsNullOrUndefined()) { return nil; } else if (tns::IsString(arg)) { - NSString* result = tns::ToNSString(isolate, arg); + std::u16string value = tns::ToUtf16String(isolate, arg); + NSString* result = [NSString stringWithCharacters:(const unichar*)value.data() length:value.size()]; return result; } else if (tns::IsNumber(arg)) { double value = tns::ToNumber(isolate, arg); diff --git a/TestRunner/app/tests/ApiTests.js b/TestRunner/app/tests/ApiTests.js index 84834d17..d4c144df 100644 --- a/TestRunner/app/tests/ApiTests.js +++ b/TestRunner/app/tests/ApiTests.js @@ -12,6 +12,39 @@ describe(module.id, function () { expect(object.hash).toBe(3); }); + it("preserves a lone high surrogate when bridging a JS string to NSString", function () { + // A lone high surrogate (U+D834, range U+D800-U+DBFF) is a valid JS string + // code unit but has no UTF-8 encoding. The old UTF-8 round-trip replaced it + // with U+FFFD; faithful UTF-16 bridging keeps it. Read the code unit straight + // out of the bridged string's UTF-16 buffer as a number: reading it back as a + // JS string would re-corrupt a lone surrogate, and converting it to UTF-8 to + // measure it is not reliable across OS versions. + var ns = NSString.stringWithString("\uD834"); + expect(ns.length).toBe(1); + + var buffer = interop.alloc(interop.sizeof(interop.types.uint16)); + ns.getCharactersRange(buffer, NSMakeRange(0, 1)); + var codeUnit = new interop.Reference(interop.types.uint16, buffer).value; + interop.free(buffer); + + expect(codeUnit).toBe(0xD834); // 0xFFFD (65533) after a lossy UTF-8 round-trip + }); + + it("preserves a lone low surrogate when bridging a JS string to NSString", function () { + // The low surrogate range (U+DC00-U+DFFF) is a different bit pattern that also + // has no UTF-8 encoding and must survive the bridge intact; observed the same + // way as the high-surrogate case above. + var ns = NSString.stringWithString("\uDC00"); + expect(ns.length).toBe(1); + + var buffer = interop.alloc(interop.sizeof(interop.types.uint16)); + ns.getCharactersRange(buffer, NSMakeRange(0, 1)); + var codeUnit = new interop.Reference(interop.types.uint16, buffer).value; + interop.free(buffer); + + expect(codeUnit).toBe(0xDC00); // 0xFFFD (65533) after a lossy UTF-8 round-trip + }); + it("NSArray from native (uncached) array access", function () { const res = TNSObjCTypes.new().getNSArrayOfNSURLs(); console.log(res); From cc3ef531d1d5a513f55918f0e334c84d3e849d9c Mon Sep 17 00:00:00 2001 From: Adrian Niculescu <15037449+adrian-niculescu@users.noreply.github.com> Date: Mon, 15 Jun 2026 16:06:51 +0300 Subject: [PATCH 2/2] Routed JS-to-NSString bridge through a UTF-16-faithful ToNSString Made tns::ToNSString read the V8 string's UTF-16 buffer directly and pointed the six bridge sites at it, instead of building a std::u16string and copying it into NSString at each one. That drops the extra copy and the repeated stringWithCharacters boilerplate, keeps the conversion in one place, and brings Interop back to matching vanilla. Kept the ToUtf16String rewrite as the general accessor. Added an embedded-NUL test next to the lone-surrogate ones, since the bridge now has to preserve NUL bytes too. --- NativeScript/runtime/ArgConverter.mm | 3 +-- NativeScript/runtime/DictionaryAdapter.mm | 9 +++------ NativeScript/runtime/Helpers.h | 13 ++++++------- NativeScript/runtime/Interop.mm | 6 ++---- TestRunner/app/tests/ApiTests.js | 17 +++++++++++++++++ 5 files changed, 29 insertions(+), 19 deletions(-) diff --git a/NativeScript/runtime/ArgConverter.mm b/NativeScript/runtime/ArgConverter.mm index 16600f96..2197c820 100644 --- a/NativeScript/runtime/ArgConverter.mm +++ b/NativeScript/runtime/ArgConverter.mm @@ -294,8 +294,7 @@ } else if (value->IsString()) { if (type == BinaryTypeEncodingType::IdEncoding || type == BinaryTypeEncodingType::InterfaceDeclarationReference) { - std::u16string strValue = tns::ToUtf16String(isolate, value); - id data = [NSString stringWithCharacters:(const unichar*)strValue.data() length:strValue.size()]; + id data = tns::ToNSString(isolate, value); // this feels wrong but follows the other CFBridgingRetain calls // and also solves a leak auto ref = CFBridgingRetain(data); diff --git a/NativeScript/runtime/DictionaryAdapter.mm b/NativeScript/runtime/DictionaryAdapter.mm index 1dbd2c48..1c548163 100644 --- a/NativeScript/runtime/DictionaryAdapter.mm +++ b/NativeScript/runtime/DictionaryAdapter.mm @@ -48,8 +48,7 @@ - (id)nextObject { bool success = array->Get(context, self->index_).ToLocal(&key); tns::Assert(success, isolate); self->index_ += 2; - std::u16string keyStr = tns::ToUtf16String(isolate, key); - NSString* result = [NSString stringWithCharacters:(const unichar*)keyStr.data() length:keyStr.length()]; + NSString* result = tns::ToNSString(isolate, key); return result; } @@ -117,8 +116,7 @@ - (id)nextObject { bool success = properties->Get(context, (uint)self->index_).ToLocal(&value); tns::Assert(success, isolate); self->index_++; - std::u16string result = tns::ToUtf16String(isolate, value); - return [NSString stringWithCharacters:(const unichar*)result.data() length:result.size()]; + return tns::ToNSString(isolate, value); } return nil; @@ -140,8 +138,7 @@ - (NSArray*)allObjects { Local value; bool success = properties->Get(context, i).ToLocal(&value); tns::Assert(success, isolate); - std::u16string result = tns::ToUtf16String(isolate, value); - [array addObject:[NSString stringWithCharacters:(const unichar*)result.data() length:result.size()]]; + [array addObject:tns::ToNSString(isolate, value)]; } return array; diff --git a/NativeScript/runtime/Helpers.h b/NativeScript/runtime/Helpers.h index 41b725d6..359ec9cc 100644 --- a/NativeScript/runtime/Helpers.h +++ b/NativeScript/runtime/Helpers.h @@ -106,8 +106,8 @@ inline NSString* ToNSString(const std::string& v) { length:v.length() encoding:NSUTF8StringEncoding] S_AUTORELEASE]; } -// this method is a copy of ToString to avoid needless std::string<->NSString -// conversions +// Reads the V8 string's native UTF-16 buffer directly so lone surrogates and +// embedded NUL survive the bridge; a UTF-8 round-trip loses both. inline NSString* ToNSString(v8::Isolate* isolate, const v8::Local& value) { if (value.IsEmpty()) { @@ -119,16 +119,15 @@ inline NSString* ToNSString(v8::Isolate* isolate, return ToNSString(isolate, obj); } - v8::String::Utf8Value result(isolate, value); + v8::String::Value result(isolate, value); - const char* val = *result; + const uint16_t* val = *result; if (val == nullptr) { return @""; } - return [[[NSString alloc] initWithBytes:*result - length:result.length() - encoding:NSUTF8StringEncoding] S_AUTORELEASE]; + return [NSString stringWithCharacters:(const unichar*)val + length:result.length()]; } #endif std::u16string ToUtf16String(v8::Isolate* isolate, diff --git a/NativeScript/runtime/Interop.mm b/NativeScript/runtime/Interop.mm index f84292c3..23bc4647 100644 --- a/NativeScript/runtime/Interop.mm +++ b/NativeScript/runtime/Interop.mm @@ -324,8 +324,7 @@ inline bool isBool() { } else if (argHelper.isString() && (typeEncoding->type == BinaryTypeEncodingType::InterfaceDeclarationReference || typeEncoding->type == BinaryTypeEncodingType::IdEncoding)) { - std::u16string str = tns::ToUtf16String(isolate, arg); - NSString* result = [NSString stringWithCharacters:(const unichar*)str.data() length:str.size()]; + NSString* result = tns::ToNSString(isolate, arg); Interop::SetValue(dest, result); } else if (Interop::IsNumbericType(typeEncoding->type) || tns::IsNumber(arg)) { double value = tns::ToNumber(isolate, arg); @@ -687,8 +686,7 @@ inline bool isBool() { if (arg.IsEmpty() || arg->IsNullOrUndefined()) { return nil; } else if (tns::IsString(arg)) { - std::u16string value = tns::ToUtf16String(isolate, arg); - NSString* result = [NSString stringWithCharacters:(const unichar*)value.data() length:value.size()]; + NSString* result = tns::ToNSString(isolate, arg); return result; } else if (tns::IsNumber(arg)) { double value = tns::ToNumber(isolate, arg); diff --git a/TestRunner/app/tests/ApiTests.js b/TestRunner/app/tests/ApiTests.js index d4c144df..676b8f92 100644 --- a/TestRunner/app/tests/ApiTests.js +++ b/TestRunner/app/tests/ApiTests.js @@ -45,6 +45,23 @@ describe(module.id, function () { expect(codeUnit).toBe(0xDC00); // 0xFFFD (65533) after a lossy UTF-8 round-trip }); + it("preserves an embedded NUL when bridging a JS string to NSString", function () { + // U+0000 is a valid JS code unit but terminates a C string, so a bridge + // that went through char* would cut "a\0b" down to "a". Faithful UTF-16 + // bridging keeps all three units. Read the NUL unit straight out of the + // NSString's buffer so the check does not lean on a native-to-JS conversion. + var withNul = "a" + String.fromCharCode(0) + "b"; + var ns = NSString.stringWithString(withNul); + expect(ns.length).toBe(3); + + var buffer = interop.alloc(interop.sizeof(interop.types.uint16)); + ns.getCharactersRange(buffer, NSMakeRange(1, 1)); + var codeUnit = new interop.Reference(interop.types.uint16, buffer).value; + interop.free(buffer); + + expect(codeUnit).toBe(0x0000); // a char* bridge would have stopped before this + }); + it("NSArray from native (uncached) array access", function () { const res = TNSObjCTypes.new().getNSArrayOfNSURLs(); console.log(res);