9#define IS_INVALID_CHAR(enc, ptr, n) (0)
12#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
15 return XML_TOK_PARTIAL_CHAR; \
16 if (IS_INVALID_CHAR(enc, ptr, n)) { \
17 *(nextTokPtr) = (ptr); \
18 return XML_TOK_INVALID; \
23#define INVALID_CASES(ptr, nextTokPtr) \
24 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
25 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
26 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
30 *(nextTokPtr) = (ptr); \
31 return XML_TOK_INVALID;
33#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
36 return XML_TOK_PARTIAL_CHAR; \
37 if (!IS_NAME_CHAR(enc, ptr, n)) { \
39 return XML_TOK_INVALID; \
44#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
46 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
48 return XML_TOK_INVALID; \
57 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
58 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
59 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
61#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
64 return XML_TOK_PARTIAL_CHAR; \
65 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
67 return XML_TOK_INVALID; \
72#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
74 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
76 return XML_TOK_INVALID; \
82 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
83 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
84 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
87#define PREFIX(ident) ident
94 const char *end,
const char **nextTokPtr)
104 INVALID_CASES(ptr, nextTokPtr)
106 if ((ptr +=
MINBPC(enc)) == end)
109 if ((ptr +=
MINBPC(enc)) == end)
115 *nextTokPtr = ptr +
MINBPC(enc);
132 const char *end,
const char **nextTokPtr)
138 return PREFIX(scanComment)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
140 *nextTokPtr = ptr +
MINBPC(enc);
153 if (ptr +
MINBPC(enc) == end)
179 const char *end,
int *tokPtr)
183 if (end - ptr !=
MINBPC(enc)*3)
224 const char *end,
const char **nextTokPtr)
227 const char *target = ptr;
231 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
238 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
240 if (!
PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
247 INVALID_CASES(ptr, nextTokPtr)
253 *nextTokPtr = ptr +
MINBPC(enc);
264 if (!
PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
272 *nextTokPtr = ptr +
MINBPC(enc);
286 const char *end,
const char **nextTokPtr)
292 if (end - ptr < 6 *
MINBPC(enc))
294 for (i = 0; i < 6; i++, ptr +=
MINBPC(enc)) {
306 const char *end,
const char **nextTokPtr)
311 size_t n = end - ptr;
312 if (n & (
MINBPC(enc) - 1)) {
333 *nextTokPtr = ptr +
MINBPC(enc);
344 *nextTokPtr = ptr +
MINBPC(enc);
346 INVALID_CASES(ptr, nextTokPtr)
353#define LEAD_CASE(n) \
355 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
357 return XML_TOK_DATA_CHARS; \
361 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
384 const char *end,
const char **nextTokPtr)
389 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
396 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
398 for (ptr +=
MINBPC(enc); ptr != end; ptr +=
MINBPC(enc)) {
403 *nextTokPtr = ptr +
MINBPC(enc);
419 *nextTokPtr = ptr +
MINBPC(enc);
433 const char *end,
const char **nextTokPtr)
444 for (ptr +=
MINBPC(enc); ptr != end; ptr +=
MINBPC(enc)) {
450 *nextTokPtr = ptr +
MINBPC(enc);
465 const char *end,
const char **nextTokPtr)
469 return PREFIX(scanHexCharRef)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
477 for (ptr +=
MINBPC(enc); ptr != end; ptr +=
MINBPC(enc)) {
482 *nextTokPtr = ptr +
MINBPC(enc);
496PREFIX(scanRef)(
const ENCODING *enc,
const char *ptr,
const char *end,
497 const char **nextTokPtr)
502 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
504 return PREFIX(scanCharRef)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
511 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
513 *nextTokPtr = ptr +
MINBPC(enc);
526PREFIX(scanAtts)(
const ENCODING *enc,
const char *ptr,
const char *end,
527 const char **nextTokPtr)
534 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
546 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
607 INVALID_CASES(ptr, nextTokPtr)
610 int tok =
PREFIX(scanRef)(enc, ptr +
MINBPC(enc), end, &ptr);
648 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
653 *nextTokPtr = ptr +
MINBPC(enc);
664 *nextTokPtr = ptr +
MINBPC(enc);
685PREFIX(scanLt)(
const ENCODING *enc,
const char *ptr,
const char *end,
686 const char **nextTokPtr)
694 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
696 if ((ptr +=
MINBPC(enc)) == end)
700 return PREFIX(scanComment)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
708 return PREFIX(scanPi)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
710 return PREFIX(scanEndTag)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
721 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
733 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
745 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
757 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
763 *nextTokPtr = ptr +
MINBPC(enc);
774 *nextTokPtr = ptr +
MINBPC(enc);
785PREFIX(contentTok)(
const ENCODING *enc,
const char *ptr,
const char *end,
786 const char **nextTokPtr)
791 size_t n = end - ptr;
792 if (n & (
MINBPC(enc) - 1)) {
801 return PREFIX(scanLt)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
803 return PREFIX(scanRef)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
813 *nextTokPtr = ptr +
MINBPC(enc);
830 INVALID_CASES(ptr, nextTokPtr)
837#define LEAD_CASE(n) \
839 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
841 return XML_TOK_DATA_CHARS; \
845 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
848 if (ptr +
MINBPC(enc) != end) {
853 if (ptr + 2*
MINBPC(enc) != end) {
858 *nextTokPtr = ptr + 2*
MINBPC(enc);
884PREFIX(scanPercent)(
const ENCODING *enc,
const char *ptr,
const char *end,
885 const char **nextTokPtr)
890 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
900 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
902 *nextTokPtr = ptr +
MINBPC(enc);
913PREFIX(scanPoundName)(
const ENCODING *enc,
const char *ptr,
const char *end,
914 const char **nextTokPtr)
919 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
926 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
941 const char *ptr,
const char *end,
942 const char **nextTokPtr)
947 INVALID_CASES(ptr, nextTokPtr)
972PREFIX(prologTok)(
const ENCODING *enc,
const char *ptr,
const char *end,
973 const char **nextTokPtr)
979 size_t n = end - ptr;
980 if (n & (
MINBPC(enc) - 1)) {
999 return PREFIX(scanDecl)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
1001 return PREFIX(scanPi)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
1008 *nextTokPtr = ptr -
MINBPC(enc);
1015 if (ptr +
MINBPC(enc) == end) {
1031 if (ptr +
MINBPC(enc) != end)
1042 return PREFIX(scanPercent)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
1044 *nextTokPtr = ptr +
MINBPC(enc);
1047 *nextTokPtr = ptr +
MINBPC(enc);
1054 if (ptr +
MINBPC(enc) == end)
1057 *nextTokPtr = ptr + 2*
MINBPC(enc);
1064 *nextTokPtr = ptr +
MINBPC(enc);
1072 *nextTokPtr = ptr +
MINBPC(enc);
1075 *nextTokPtr = ptr +
MINBPC(enc);
1078 *nextTokPtr = ptr +
MINBPC(enc);
1089 *nextTokPtr = ptr +
MINBPC(enc);
1092 *nextTokPtr = ptr +
MINBPC(enc);
1095 return PREFIX(scanPoundName)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
1096#define LEAD_CASE(n) \
1097 case BT_LEAD ## n: \
1098 if (end - ptr < n) \
1099 return XML_TOK_PARTIAL_CHAR; \
1100 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
1102 tok = XML_TOK_NAME; \
1105 if (IS_NAME_CHAR(enc, ptr, n)) { \
1107 tok = XML_TOK_NMTOKEN; \
1110 *nextTokPtr = ptr; \
1111 return XML_TOK_INVALID;
1112 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1144 while (ptr != end) {
1146 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1161 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1178 *nextTokPtr = ptr +
MINBPC(enc);
1185 *nextTokPtr = ptr +
MINBPC(enc);
1192 *nextTokPtr = ptr +
MINBPC(enc);
1204 const char *end,
const char **nextTokPtr)
1210 while (ptr != end) {
1212#define LEAD_CASE(n) \
1213 case BT_LEAD ## n: ptr += n; break;
1214 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1218 return PREFIX(scanRef)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
1227 *nextTokPtr = ptr +
MINBPC(enc);
1246 *nextTokPtr = ptr +
MINBPC(enc);
1262 const char *end,
const char **nextTokPtr)
1268 while (ptr != end) {
1270#define LEAD_CASE(n) \
1271 case BT_LEAD ## n: ptr += n; break;
1272 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1276 return PREFIX(scanRef)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
1289 *nextTokPtr = ptr +
MINBPC(enc);
1319 const char *end,
const char **nextTokPtr)
1323 size_t n = end - ptr;
1324 if (n & (
MINBPC(enc) - 1)) {
1329 while (ptr != end) {
1331 INVALID_CASES(ptr, nextTokPtr)
1333 if ((ptr +=
MINBPC(enc)) == end)
1336 if ((ptr +=
MINBPC(enc)) == end)
1345 if ((ptr +=
MINBPC(enc)) == end)
1348 if ((ptr +=
MINBPC(enc)) == end)
1354 return XML_TOK_IGNORE_SECT;
1371PREFIX(isPublicId)(
const ENCODING *enc,
const char *ptr,
const char *end,
1372 const char **badPtr)
1376 for (; ptr != end; ptr +=
MINBPC(enc)) {
1434 enum { other, inName, inValue } state = inName;
1442 if (state == other) { \
1443 if (nAtts < attsMax) { \
1444 atts[nAtts].name = ptr; \
1445 atts[nAtts].normalized = 1; \
1449#define LEAD_CASE(n) \
1450 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
1451 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1460 if (state != inValue) {
1461 if (nAtts < attsMax)
1468 if (nAtts < attsMax)
1469 atts[nAtts].valueEnd = ptr;
1474 if (state != inValue) {
1475 if (nAtts < attsMax)
1482 if (nAtts < attsMax)
1483 atts[nAtts].valueEnd = ptr;
1488 if (nAtts < attsMax)
1489 atts[nAtts].normalized = 0;
1492 if (state == inName)
1494 else if (state == inValue
1496 &&
atts[nAtts].normalized
1497 && (ptr ==
atts[nAtts].valuePtr
1501 atts[nAtts].normalized = 0;
1506 if (state == inName)
1508 else if (state == inValue && nAtts < attsMax)
1509 atts[nAtts].normalized = 0;
1513 if (state != inValue)
1551 if (result >= 0x110000)
1560 if (result >= 0x110000)
1564 return checkCharRefNumber(result);
1568PREFIX(predefinedEntityName)(
const ENCODING *enc,
const char *ptr,
1571 switch ((end - ptr)/
MINBPC(enc)) {
1622PREFIX(sameName)(
const ENCODING *enc,
const char *ptr1,
const char *ptr2)
1626#define LEAD_CASE(n) \
1627 case BT_LEAD ## n: \
1628 if (*ptr1++ != *ptr2++) \
1630 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
1633 if (*ptr1++ != *ptr2++)
1645 if (*ptr2++ != *ptr1++)
1648 if (*ptr2++ != *ptr1++)
1651 if (*ptr2++ != *ptr1++)
1654 if (*ptr2++ != *ptr1++)
1661 if (
MINBPC(enc) == 1 && *ptr1 == *ptr2)
1687 const char *end1,
const char *ptr2)
1689 for (; *ptr2; ptr1 +=
MINBPC(enc), ptr2++) {
1695 return ptr1 == end1;
1701 const char *start = ptr;
1704#define LEAD_CASE(n) \
1705 case BT_LEAD ## n: ptr += n; break;
1706 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1720 return (
int)(ptr - start);
1747 while (ptr != end) {
1749#define LEAD_CASE(n) \
1750 case BT_LEAD ## n: \
1753 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1771 pos->columnNumber++;
1776#undef MULTIBYTE_CASES
1778#undef CHECK_NAME_CASE
1779#undef CHECK_NAME_CASES
1780#undef CHECK_NMSTRT_CASE
1781#undef CHECK_NMSTRT_CASES
#define CHAR_MATCHES(enc, p, c)
#define IS_NAME_CHAR_MINBPC(enc, p)
#define BYTE_TYPE(enc, p)
#define BYTE_TO_ASCII(enc, p)
#define IS_NMSTRT_CHAR_MINBPC(enc, p)
#define XML_TOK_CLOSE_PAREN_ASTERISK
#define XML_TOK_DECL_OPEN
#define XML_TOK_PREFIXED_NAME
#define XML_TOK_OPEN_PAREN
#define XML_TOK_OPEN_BRACKET
#define XML_TOK_CLOSE_PAREN_PLUS
#define XML_TOK_EMPTY_ELEMENT_NO_ATTS
#define XML_TOK_ENTITY_REF
#define XML_TOK_CLOSE_PAREN
#define XML_TOK_NAME_ASTERISK
#define XML_TOK_DATA_NEWLINE
#define XML_TOK_DECL_CLOSE
#define XML_TOK_NAME_PLUS
#define XML_TOK_NAME_QUESTION
#define XML_TOK_COND_SECT_CLOSE
#define XML_TOK_START_TAG_WITH_ATTS
#define XML_TOK_CDATA_SECT_CLOSE
#define XML_TOK_DATA_CHARS
#define XML_TOK_COND_SECT_OPEN
#define XML_TOK_POUND_NAME
#define XML_TOK_PARAM_ENTITY_REF
#define XML_TOK_CLOSE_BRACKET
#define XML_TOK_EMPTY_ELEMENT_WITH_ATTS
#define XML_TOK_TRAILING_RSQB
#define XML_TOK_ATTRIBUTE_VALUE_S
#define XML_TOK_START_TAG_NO_ATTS
#define XML_TOK_TRAILING_CR
#define XML_TOK_CDATA_SECT_OPEN
#define XML_TOK_INSTANCE_START
#define XML_TOK_CLOSE_PAREN_QUESTION