%PDF- %PDF-
| Direktori : /home/vacivi36/vittasync.vacivitta.com.br/vittasync/node/deps/v8/src/strings/ |
| Current File : /home/vacivi36/vittasync.vacivitta.com.br/vittasync/node/deps/v8/src/strings/unicode-decoder.cc |
// Copyright 2014 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/strings/unicode-decoder.h"
#include "src/strings/unicode-inl.h"
#include "src/utils/memcopy.h"
#if V8_ENABLE_WEBASSEMBLY
#include "src/third_party/utf8-decoder/generalized-utf8-decoder.h"
#endif
namespace v8 {
namespace internal {
namespace {
template <class Decoder>
struct DecoderTraits;
template <>
struct DecoderTraits<Utf8Decoder> {
static bool IsInvalidSurrogatePair(uint32_t lead, uint32_t trail) {
// The DfaDecoder will only ever decode Unicode scalar values, and all
// sequences of USVs are valid.
DCHECK(!unibrow::Utf16::IsLeadSurrogate(trail));
DCHECK(!unibrow::Utf16::IsTrailSurrogate(trail));
return false;
}
static const bool kAllowIncompleteSequences = true;
using DfaDecoder = Utf8DfaDecoder;
};
#if V8_ENABLE_WEBASSEMBLY
template <>
struct DecoderTraits<Wtf8Decoder> {
static bool IsInvalidSurrogatePair(uint32_t lead, uint32_t trail) {
return unibrow::Utf16::IsSurrogatePair(lead, trail);
}
static const bool kAllowIncompleteSequences = false;
using DfaDecoder = GeneralizedUtf8DfaDecoder;
};
template <>
struct DecoderTraits<StrictUtf8Decoder> {
static bool IsInvalidSurrogatePair(uint32_t lead, uint32_t trail) {
// The DfaDecoder will only ever decode Unicode scalar values, and all
// sequences of USVs are valid.
DCHECK(!unibrow::Utf16::IsLeadSurrogate(trail));
DCHECK(!unibrow::Utf16::IsTrailSurrogate(trail));
return false;
}
static const bool kAllowIncompleteSequences = false;
using DfaDecoder = Utf8DfaDecoder;
};
#endif // V8_ENABLE_WEBASSEMBLY
} // namespace
template <class Decoder>
Utf8DecoderBase<Decoder>::Utf8DecoderBase(base::Vector<const uint8_t> data)
: encoding_(Encoding::kAscii),
non_ascii_start_(NonAsciiStart(data.begin(), data.length())),
utf16_length_(non_ascii_start_) {
using Traits = DecoderTraits<Decoder>;
if (non_ascii_start_ == data.length()) return;
bool is_one_byte = true;
auto state = Traits::DfaDecoder::kAccept;
uint32_t current = 0;
uint32_t previous = 0;
const uint8_t* cursor = data.begin() + non_ascii_start_;
const uint8_t* end = data.begin() + data.length();
while (cursor < end) {
if (V8_LIKELY(*cursor <= unibrow::Utf8::kMaxOneByteChar &&
state == Traits::DfaDecoder::kAccept)) {
DCHECK_EQ(0u, current);
DCHECK(!Traits::IsInvalidSurrogatePair(previous, *cursor));
previous = *cursor;
utf16_length_++;
cursor++;
continue;
}
auto previous_state = state;
Traits::DfaDecoder::Decode(*cursor, &state, ¤t);
if (state < Traits::DfaDecoder::kAccept) {
DCHECK_EQ(state, Traits::DfaDecoder::kReject);
if (Traits::kAllowIncompleteSequences) {
state = Traits::DfaDecoder::kAccept;
static_assert(unibrow::Utf8::kBadChar > unibrow::Latin1::kMaxChar);
is_one_byte = false;
utf16_length_++;
previous = unibrow::Utf8::kBadChar;
current = 0;
// If we were trying to continue a multibyte sequence, try this byte
// again.
if (previous_state != Traits::DfaDecoder::kAccept) continue;
} else {
encoding_ = Encoding::kInvalid;
return;
}
} else if (state == Traits::DfaDecoder::kAccept) {
if (Traits::IsInvalidSurrogatePair(previous, current)) {
encoding_ = Encoding::kInvalid;
return;
}
is_one_byte = is_one_byte && current <= unibrow::Latin1::kMaxChar;
utf16_length_++;
if (current > unibrow::Utf16::kMaxNonSurrogateCharCode) utf16_length_++;
previous = current;
current = 0;
}
cursor++;
}
if (state == Traits::DfaDecoder::kAccept) {
encoding_ = is_one_byte ? Encoding::kLatin1 : Encoding::kUtf16;
} else if (Traits::kAllowIncompleteSequences) {
static_assert(unibrow::Utf8::kBadChar > unibrow::Latin1::kMaxChar);
encoding_ = Encoding::kUtf16;
utf16_length_++;
} else {
encoding_ = Encoding::kInvalid;
}
}
template <class Decoder>
template <typename Char>
void Utf8DecoderBase<Decoder>::Decode(Char* out,
base::Vector<const uint8_t> data) {
using Traits = DecoderTraits<Decoder>;
DCHECK(!is_invalid());
CopyChars(out, data.begin(), non_ascii_start_);
out += non_ascii_start_;
auto state = Traits::DfaDecoder::kAccept;
uint32_t current = 0;
const uint8_t* cursor = data.begin() + non_ascii_start_;
const uint8_t* end = data.begin() + data.length();
while (cursor < end) {
if (V8_LIKELY(*cursor <= unibrow::Utf8::kMaxOneByteChar &&
state == Traits::DfaDecoder::kAccept)) {
DCHECK_EQ(0u, current);
*(out++) = static_cast<Char>(*cursor);
cursor++;
continue;
}
auto previous_state = state;
Traits::DfaDecoder::Decode(*cursor, &state, ¤t);
if (Traits::kAllowIncompleteSequences &&
state < Traits::DfaDecoder::kAccept) {
state = Traits::DfaDecoder::kAccept;
*(out++) = static_cast<Char>(unibrow::Utf8::kBadChar);
current = 0;
// If we were trying to continue a multibyte sequence, try this byte
// again.
if (previous_state != Traits::DfaDecoder::kAccept) continue;
} else if (state == Traits::DfaDecoder::kAccept) {
if (sizeof(Char) == 1 ||
current <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
*(out++) = static_cast<Char>(current);
} else {
*(out++) = unibrow::Utf16::LeadSurrogate(current);
*(out++) = unibrow::Utf16::TrailSurrogate(current);
}
current = 0;
}
cursor++;
}
if (Traits::kAllowIncompleteSequences &&
state != Traits::DfaDecoder::kAccept) {
*out = static_cast<Char>(unibrow::Utf8::kBadChar);
} else {
DCHECK_EQ(state, Traits::DfaDecoder::kAccept);
}
}
#define DEFINE_UNICODE_DECODER(Decoder) \
template V8_EXPORT_PRIVATE Utf8DecoderBase<Decoder>::Utf8DecoderBase( \
base::Vector<const uint8_t> data); \
template V8_EXPORT_PRIVATE void Utf8DecoderBase<Decoder>::Decode( \
uint8_t* out, base::Vector<const uint8_t> data); \
template V8_EXPORT_PRIVATE void Utf8DecoderBase<Decoder>::Decode( \
uint16_t* out, base::Vector<const uint8_t> data)
DEFINE_UNICODE_DECODER(Utf8Decoder);
#if V8_ENABLE_WEBASSEMBLY
DEFINE_UNICODE_DECODER(Wtf8Decoder);
DEFINE_UNICODE_DECODER(StrictUtf8Decoder);
#endif // V8_ENABLE_WEBASSEMBLY
#undef DEFINE_UNICODE_DECODER
} // namespace internal
} // namespace v8