'use strict'; const util = require('./util'); const buildOptions = require('./util').buildOptions; const xmlNode = require('./xmlNode'); const regx = '<((!\\[CDATA\\[([\\s\\S]*?)(]]>))|((NAME:)?(NAME))([^>]*)>|((\\/)(NAME)\\s*>))([^<]*)' .replace(/NAME/g, util.nameRegexp); //const tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(\\s*"+cdataRegx+")*([^<]+)?","g"); //const tagsRegx = new RegExp("<(\\/?)((\\w*:)?([\\w:\\-\._]+))([^>]*)>([^<]*)("+cdataRegx+"([^<]*))*([^<]+)?","g"); //polyfill if (!Number.parseInt && window.parseInt) { Number.parseInt = window.parseInt; } if (!Number.parseFloat && window.parseFloat) { Number.parseFloat = window.parseFloat; } const defaultOptions = { attributeNamePrefix: '@_', attrNodeName: false, textNodeName: '#text', ignoreAttributes: true, ignoreNameSpace: false, allowBooleanAttributes: false, //a tag can have attributes without any value //ignoreRootElement : false, parseNodeValue: true, parseAttributeValue: false, arrayMode: false, trimValues: true, //Trim string values of tag and attributes cdataTagName: false, cdataPositionChar: '\\c', tagValueProcessor: function(a, tagName) { return a; }, attrValueProcessor: function(a, attrName) { return a; }, stopNodes: [] //decodeStrict: false, }; exports.defaultOptions = defaultOptions; const props = [ 'attributeNamePrefix', 'attrNodeName', 'textNodeName', 'ignoreAttributes', 'ignoreNameSpace', 'allowBooleanAttributes', 'parseNodeValue', 'parseAttributeValue', 'arrayMode', 'trimValues', 'cdataTagName', 'cdataPositionChar', 'tagValueProcessor', 'attrValueProcessor', 'parseTrueNumberOnly', 'stopNodes' ]; exports.props = props; /** * Trim -> valueProcessor -> parse value * @param {string} tagName * @param {string} val * @param {object} options */ function processTagValue(tagName, val, options) { if (val) { if (options.trimValues) { val = val.trim(); } val = options.tagValueProcessor(val, tagName); val = parseValue(val, options.parseNodeValue, options.parseTrueNumberOnly); } return val; } function resolveNameSpace(tagname, options) { if (options.ignoreNameSpace) { const tags = tagname.split(':'); const prefix = tagname.charAt(0) === '/' ? '/' : ''; if (tags[0] === 'xmlns') { return ''; } if (tags.length === 2) { tagname = prefix + tags[1]; } } return tagname; } function parseValue(val, shouldParse, parseTrueNumberOnly) { if (shouldParse && typeof val === 'string') { let parsed; if (val.trim() === '' || isNaN(val)) { parsed = val === 'true' ? true : val === 'false' ? false : val; } else { if (val.indexOf('0x') !== -1) { //support hexa decimal parsed = Number.parseInt(val, 16); } else if (val.indexOf('.') !== -1) { parsed = Number.parseFloat(val); val = val.replace(/\.?0+$/, ""); } else { parsed = Number.parseInt(val, 10); } if (parseTrueNumberOnly) { parsed = String(parsed) === val ? parsed : val; } } return parsed; } else { if (util.isExist(val)) { return val; } else { return ''; } } } //TODO: change regex to capture NS //const attrsRegx = new RegExp("([\\w\\-\\.\\:]+)\\s*=\\s*(['\"])((.|\n)*?)\\2","gm"); const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])(.*?)\\3)?', 'g'); function buildAttributesMap(attrStr, options) { if (!options.ignoreAttributes && typeof attrStr === 'string') { attrStr = attrStr.replace(/\r?\n/g, ' '); //attrStr = attrStr || attrStr.trim(); const matches = util.getAllMatches(attrStr, attrsRegx); const len = matches.length; //don't make it inline const attrs = {}; for (let i = 0; i < len; i++) { const attrName = resolveNameSpace(matches[i][1], options); if (attrName.length) { if (matches[i][4] !== undefined) { if (options.trimValues) { matches[i][4] = matches[i][4].trim(); } matches[i][4] = options.attrValueProcessor(matches[i][4], attrName); attrs[options.attributeNamePrefix + attrName] = parseValue( matches[i][4], options.parseAttributeValue, options.parseTrueNumberOnly ); } else if (options.allowBooleanAttributes) { attrs[options.attributeNamePrefix + attrName] = true; } } } if (!Object.keys(attrs).length) { return; } if (options.attrNodeName) { const attrCollection = {}; attrCollection[options.attrNodeName] = attrs; return attrCollection; } return attrs; } } const getTraversalObj = function(xmlData, options) { xmlData = xmlData.replace(/(\r\n)|\n/, " "); options = buildOptions(options, defaultOptions, props); const xmlObj = new xmlNode('!xml'); let currentNode = xmlObj; let textData = ""; //function match(xmlData){ for(let i=0; i< xmlData.length; i++){ const ch = xmlData[i]; if(ch === '<'){ if( xmlData[i+1] === '/') {//Closing Tag const closeIndex = findClosingIndex(xmlData, ">", i, "Closing Tag is not closed.") let tagName = xmlData.substring(i+2,closeIndex).trim(); if(options.ignoreNameSpace){ const colonIndex = tagName.indexOf(":"); if(colonIndex !== -1){ tagName = tagName.substr(colonIndex+1); } } /* if (currentNode.parent) { currentNode.parent.val = util.getValue(currentNode.parent.val) + '' + processTagValue2(tagName, textData , options); } */ if(currentNode){ if(currentNode.val){ currentNode.val = util.getValue(currentNode.val) + '' + processTagValue(tagName, textData , options); }else{ currentNode.val = processTagValue(tagName, textData , options); } } if (options.stopNodes.length && options.stopNodes.includes(currentNode.tagname)) { currentNode.child = [] if (currentNode.attrsMap == undefined) { currentNode.attrsMap = {}} currentNode.val = xmlData.substr(currentNode.startIndex + 1, i - currentNode.startIndex - 1) } currentNode = currentNode.parent; textData = ""; i = closeIndex; } else if( xmlData[i+1] === '?') { i = findClosingIndex(xmlData, "?>", i, "Pi Tag is not closed.") } else if(xmlData.substr(i + 1, 3) === '!--') { i = findClosingIndex(xmlData, "-->", i, "Comment is not closed.") } else if( xmlData.substr(i + 1, 2) === '!D') { const closeIndex = findClosingIndex(xmlData, ">", i, "DOCTYPE is not closed.") const tagExp = xmlData.substring(i, closeIndex); if(tagExp.indexOf("[") >= 0){ i = xmlData.indexOf("]>", i) + 1; }else{ i = closeIndex; } }else if(xmlData.substr(i + 1, 2) === '![') { const closeIndex = findClosingIndex(xmlData, "]]>", i, "CDATA is not closed.") - 2 const tagExp = xmlData.substring(i + 9,closeIndex); //considerations //1. CDATA will always have parent node //2. A tag with CDATA is not a leaf node so it's value would be string type. if(textData){ currentNode.val = util.getValue(currentNode.val) + '' + processTagValue(currentNode.tagname, textData , options); textData = ""; } if (options.cdataTagName) { //add cdata node const childNode = new xmlNode(options.cdataTagName, currentNode, tagExp); currentNode.addChild(childNode); //for backtracking currentNode.val = util.getValue(currentNode.val) + options.cdataPositionChar; //add rest value to parent node if (tagExp) { childNode.val = tagExp; } } else { currentNode.val = (currentNode.val || '') + (tagExp || ''); } i = closeIndex + 2; }else {//Opening tag const result = closingIndexForOpeningTag(xmlData, i+1) let tagExp = result.data; const closeIndex = result.index; const separatorIndex = tagExp.indexOf(" "); let tagName = tagExp; if(separatorIndex !== -1){ tagName = tagExp.substr(0, separatorIndex).trimRight(); tagExp = tagExp.substr(separatorIndex + 1); } if(options.ignoreNameSpace){ const colonIndex = tagName.indexOf(":"); if(colonIndex !== -1){ tagName = tagName.substr(colonIndex+1); } } //save text to parent node if (currentNode && textData) { if(currentNode.tagname !== '!xml'){ currentNode.val = util.getValue(currentNode.val) + '' + processTagValue( currentNode.tagname, textData, options); } } if(tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1){//selfClosing tag if(tagName[tagName.length - 1] === "/"){ //remove trailing '/' tagName = tagName.substr(0, tagName.length - 1); tagExp = tagName; }else{ tagExp = tagExp.substr(0, tagExp.length - 1); } const childNode = new xmlNode(tagName, currentNode, ''); if(tagName !== tagExp){ childNode.attrsMap = buildAttributesMap(tagExp, options); } currentNode.addChild(childNode); }else{//opening tag const childNode = new xmlNode( tagName, currentNode ); if (options.stopNodes.length && options.stopNodes.includes(childNode.tagname)) { childNode.startIndex=closeIndex; } if(tagName !== tagExp){ childNode.attrsMap = buildAttributesMap(tagExp, options); } currentNode.addChild(childNode); currentNode = childNode; } textData = ""; i = closeIndex; } }else{ textData += xmlData[i]; } } return xmlObj; } function closingIndexForOpeningTag(data, i){ let attrBoundary; let tagExp = ""; for (let index = i; index < data.length; index++) { let ch = data[index]; if (attrBoundary) { if (ch === attrBoundary) attrBoundary = "";//reset } else if (ch === '"' || ch === "'") { attrBoundary = ch; } else if (ch === '>') { return { data: tagExp, index: index } } else if (ch === '\t') { ch = " " } tagExp += ch; } } function findClosingIndex(xmlData, str, i, errMsg){ const closingIndex = xmlData.indexOf(str, i); if(closingIndex === -1){ throw new Error(errMsg) }else{ return closingIndex + str.length - 1; } } exports.getTraversalObj = getTraversalObj;