validate_utf8.js 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. 'use strict';
  2. const FIRST_BIT = 0x80;
  3. const FIRST_TWO_BITS = 0xc0;
  4. const FIRST_THREE_BITS = 0xe0;
  5. const FIRST_FOUR_BITS = 0xf0;
  6. const FIRST_FIVE_BITS = 0xf8;
  7. const TWO_BIT_CHAR = 0xc0;
  8. const THREE_BIT_CHAR = 0xe0;
  9. const FOUR_BIT_CHAR = 0xf0;
  10. const CONTINUING_CHAR = 0x80;
  11. /**
  12. * Determines if the passed in bytes are valid utf8
  13. * @param {Buffer|Uint8Array} bytes An array of 8-bit bytes. Must be indexable and have length property
  14. * @param {Number} start The index to start validating
  15. * @param {Number} end The index to end validating
  16. * @returns {boolean} True if valid utf8
  17. */
  18. function validateUtf8(bytes, start, end) {
  19. let continuation = 0;
  20. for (let i = start; i < end; i += 1) {
  21. const byte = bytes[i];
  22. if (continuation) {
  23. if ((byte & FIRST_TWO_BITS) !== CONTINUING_CHAR) {
  24. return false;
  25. }
  26. continuation -= 1;
  27. } else if (byte & FIRST_BIT) {
  28. if ((byte & FIRST_THREE_BITS) === TWO_BIT_CHAR) {
  29. continuation = 1;
  30. } else if ((byte & FIRST_FOUR_BITS) === THREE_BIT_CHAR) {
  31. continuation = 2;
  32. } else if ((byte & FIRST_FIVE_BITS) === FOUR_BIT_CHAR) {
  33. continuation = 3;
  34. } else {
  35. return false;
  36. }
  37. }
  38. }
  39. return !continuation;
  40. }
  41. module.exports.validateUtf8 = validateUtf8;