generate-identifier-regex.js 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. // Note: run `npm install unicode-7.0.0` first.
  2. // Which Unicode version should be used?
  3. var version = '7.0.0';
  4. var start = require('unicode-' + version + '/properties/ID_Start/code-points')
  5. .filter(function(ch) { return ch > 127; });
  6. var cont = [0x200c, 0x200d].concat(require('unicode-' + version + '/properties/ID_Continue/code-points')
  7. .filter(function(ch) { return ch > 127 && start.indexOf(ch) == -1; }));
  8. function pad(str, width) {
  9. while (str.length < width) str = "0" + str;
  10. return str;
  11. }
  12. function esc(code) {
  13. var hex = code.toString(16);
  14. if (hex.length <= 2) return "\\x" + pad(hex, 2);
  15. else return "\\u" + pad(hex, 4);
  16. }
  17. function generate(chars) {
  18. var astral = [], re = "";
  19. for (var i = 0, at = 0x10000; i < chars.length; i++) {
  20. var from = chars[i], to = from;
  21. while (i < chars.length - 1 && chars[i + 1] == to + 1) {
  22. i++;
  23. to++;
  24. }
  25. if (to <= 0xffff) {
  26. if (from == to) re += esc(from);
  27. else if (from + 1 == to) re += esc(from) + esc(to);
  28. else re += esc(from) + "-" + esc(to);
  29. } else {
  30. astral.push(from - at, to - from);
  31. at = to;
  32. }
  33. }
  34. return {nonASCII: re, astral: astral};
  35. }
  36. var startData = generate(start), contData = generate(cont);
  37. console.log(" var nonASCIIidentifierStartChars = \"" + startData.nonASCII + "\";");
  38. console.log(" var nonASCIIidentifierChars = \"" + contData.nonASCII + "\";");
  39. console.log(" var astralIdentifierStartCodes = " + JSON.stringify(startData.astral) + ";");
  40. console.log(" var astralIdentifierCodes = " + JSON.stringify(contData.astral) + ";");