342 lines
10 KiB
JavaScript
342 lines
10 KiB
JavaScript
'use strict';
|
|
|
|
const util = require('./util');
|
|
const buildOptions = require('./util').buildOptions;
|
|
const xmlNode = require('./xmlNode');
|
|
const regx =
|
|
'<((!\\[CDATA\\[([\\s\\S]*?)(]]>))|((NAME:)?(NAME))([^>]*)>|((\\/)(NAME)\\s*>))([^<]*)'
|
|
.replace(/NAME/g, util.nameRegexp);
|
|
|
|
//const tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(\\s*"+cdataRegx+")*([^<]+)?","g");
|
|
//const tagsRegx = new RegExp("<(\\/?)((\\w*:)?([\\w:\\-\._]+))([^>]*)>([^<]*)("+cdataRegx+"([^<]*))*([^<]+)?","g");
|
|
|
|
//polyfill
|
|
if (!Number.parseInt && window.parseInt) {
|
|
Number.parseInt = window.parseInt;
|
|
}
|
|
if (!Number.parseFloat && window.parseFloat) {
|
|
Number.parseFloat = window.parseFloat;
|
|
}
|
|
|
|
const defaultOptions = {
|
|
attributeNamePrefix: '@_',
|
|
attrNodeName: false,
|
|
textNodeName: '#text',
|
|
ignoreAttributes: true,
|
|
ignoreNameSpace: false,
|
|
allowBooleanAttributes: false, //a tag can have attributes without any value
|
|
//ignoreRootElement : false,
|
|
parseNodeValue: true,
|
|
parseAttributeValue: false,
|
|
arrayMode: false,
|
|
trimValues: true, //Trim string values of tag and attributes
|
|
cdataTagName: false,
|
|
cdataPositionChar: '\\c',
|
|
tagValueProcessor: function(a, tagName) {
|
|
return a;
|
|
},
|
|
attrValueProcessor: function(a, attrName) {
|
|
return a;
|
|
},
|
|
stopNodes: []
|
|
//decodeStrict: false,
|
|
};
|
|
|
|
exports.defaultOptions = defaultOptions;
|
|
|
|
const props = [
|
|
'attributeNamePrefix',
|
|
'attrNodeName',
|
|
'textNodeName',
|
|
'ignoreAttributes',
|
|
'ignoreNameSpace',
|
|
'allowBooleanAttributes',
|
|
'parseNodeValue',
|
|
'parseAttributeValue',
|
|
'arrayMode',
|
|
'trimValues',
|
|
'cdataTagName',
|
|
'cdataPositionChar',
|
|
'tagValueProcessor',
|
|
'attrValueProcessor',
|
|
'parseTrueNumberOnly',
|
|
'stopNodes'
|
|
];
|
|
exports.props = props;
|
|
|
|
/**
|
|
* Trim -> valueProcessor -> parse value
|
|
* @param {string} tagName
|
|
* @param {string} val
|
|
* @param {object} options
|
|
*/
|
|
function processTagValue(tagName, val, options) {
|
|
if (val) {
|
|
if (options.trimValues) {
|
|
val = val.trim();
|
|
}
|
|
val = options.tagValueProcessor(val, tagName);
|
|
val = parseValue(val, options.parseNodeValue, options.parseTrueNumberOnly);
|
|
}
|
|
|
|
return val;
|
|
}
|
|
|
|
function resolveNameSpace(tagname, options) {
|
|
if (options.ignoreNameSpace) {
|
|
const tags = tagname.split(':');
|
|
const prefix = tagname.charAt(0) === '/' ? '/' : '';
|
|
if (tags[0] === 'xmlns') {
|
|
return '';
|
|
}
|
|
if (tags.length === 2) {
|
|
tagname = prefix + tags[1];
|
|
}
|
|
}
|
|
return tagname;
|
|
}
|
|
|
|
function parseValue(val, shouldParse, parseTrueNumberOnly) {
|
|
if (shouldParse && typeof val === 'string') {
|
|
let parsed;
|
|
if (val.trim() === '' || isNaN(val)) {
|
|
parsed = val === 'true' ? true : val === 'false' ? false : val;
|
|
} else {
|
|
if (val.indexOf('0x') !== -1) {
|
|
//support hexa decimal
|
|
parsed = Number.parseInt(val, 16);
|
|
} else if (val.indexOf('.') !== -1) {
|
|
parsed = Number.parseFloat(val);
|
|
val = val.replace(/\.?0+$/, "");
|
|
} else {
|
|
parsed = Number.parseInt(val, 10);
|
|
}
|
|
if (parseTrueNumberOnly) {
|
|
parsed = String(parsed) === val ? parsed : val;
|
|
}
|
|
}
|
|
return parsed;
|
|
} else {
|
|
if (util.isExist(val)) {
|
|
return val;
|
|
} else {
|
|
return '';
|
|
}
|
|
}
|
|
}
|
|
|
|
//TODO: change regex to capture NS
|
|
//const attrsRegx = new RegExp("([\\w\\-\\.\\:]+)\\s*=\\s*(['\"])((.|\n)*?)\\2","gm");
|
|
const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])(.*?)\\3)?', 'g');
|
|
|
|
function buildAttributesMap(attrStr, options) {
|
|
if (!options.ignoreAttributes && typeof attrStr === 'string') {
|
|
attrStr = attrStr.replace(/\r?\n/g, ' ');
|
|
//attrStr = attrStr || attrStr.trim();
|
|
|
|
const matches = util.getAllMatches(attrStr, attrsRegx);
|
|
const len = matches.length; //don't make it inline
|
|
const attrs = {};
|
|
for (let i = 0; i < len; i++) {
|
|
const attrName = resolveNameSpace(matches[i][1], options);
|
|
if (attrName.length) {
|
|
if (matches[i][4] !== undefined) {
|
|
if (options.trimValues) {
|
|
matches[i][4] = matches[i][4].trim();
|
|
}
|
|
matches[i][4] = options.attrValueProcessor(matches[i][4], attrName);
|
|
attrs[options.attributeNamePrefix + attrName] = parseValue(
|
|
matches[i][4],
|
|
options.parseAttributeValue,
|
|
options.parseTrueNumberOnly
|
|
);
|
|
} else if (options.allowBooleanAttributes) {
|
|
attrs[options.attributeNamePrefix + attrName] = true;
|
|
}
|
|
}
|
|
}
|
|
if (!Object.keys(attrs).length) {
|
|
return;
|
|
}
|
|
if (options.attrNodeName) {
|
|
const attrCollection = {};
|
|
attrCollection[options.attrNodeName] = attrs;
|
|
return attrCollection;
|
|
}
|
|
return attrs;
|
|
}
|
|
}
|
|
|
|
const getTraversalObj = function(xmlData, options) {
|
|
xmlData = xmlData.replace(/(\r\n)|\n/, " ");
|
|
options = buildOptions(options, defaultOptions, props);
|
|
const xmlObj = new xmlNode('!xml');
|
|
let currentNode = xmlObj;
|
|
let textData = "";
|
|
|
|
//function match(xmlData){
|
|
for(let i=0; i< xmlData.length; i++){
|
|
const ch = xmlData[i];
|
|
if(ch === '<'){
|
|
if( xmlData[i+1] === '/') {//Closing Tag
|
|
const closeIndex = findClosingIndex(xmlData, ">", i, "Closing Tag is not closed.")
|
|
let tagName = xmlData.substring(i+2,closeIndex).trim();
|
|
|
|
if(options.ignoreNameSpace){
|
|
const colonIndex = tagName.indexOf(":");
|
|
if(colonIndex !== -1){
|
|
tagName = tagName.substr(colonIndex+1);
|
|
}
|
|
}
|
|
|
|
/* if (currentNode.parent) {
|
|
currentNode.parent.val = util.getValue(currentNode.parent.val) + '' + processTagValue2(tagName, textData , options);
|
|
} */
|
|
if(currentNode){
|
|
if(currentNode.val){
|
|
currentNode.val = util.getValue(currentNode.val) + '' + processTagValue(tagName, textData , options);
|
|
}else{
|
|
currentNode.val = processTagValue(tagName, textData , options);
|
|
}
|
|
}
|
|
|
|
if (options.stopNodes.length && options.stopNodes.includes(currentNode.tagname)) {
|
|
currentNode.child = []
|
|
if (currentNode.attrsMap == undefined) { currentNode.attrsMap = {}}
|
|
currentNode.val = xmlData.substr(currentNode.startIndex + 1, i - currentNode.startIndex - 1)
|
|
}
|
|
currentNode = currentNode.parent;
|
|
textData = "";
|
|
i = closeIndex;
|
|
} else if( xmlData[i+1] === '?') {
|
|
i = findClosingIndex(xmlData, "?>", i, "Pi Tag is not closed.")
|
|
} else if(xmlData.substr(i + 1, 3) === '!--') {
|
|
i = findClosingIndex(xmlData, "-->", i, "Comment is not closed.")
|
|
} else if( xmlData.substr(i + 1, 2) === '!D') {
|
|
const closeIndex = findClosingIndex(xmlData, ">", i, "DOCTYPE is not closed.")
|
|
const tagExp = xmlData.substring(i, closeIndex);
|
|
if(tagExp.indexOf("[") >= 0){
|
|
i = xmlData.indexOf("]>", i) + 1;
|
|
}else{
|
|
i = closeIndex;
|
|
}
|
|
}else if(xmlData.substr(i + 1, 2) === '![') {
|
|
const closeIndex = findClosingIndex(xmlData, "]]>", i, "CDATA is not closed.") - 2
|
|
const tagExp = xmlData.substring(i + 9,closeIndex);
|
|
|
|
//considerations
|
|
//1. CDATA will always have parent node
|
|
//2. A tag with CDATA is not a leaf node so it's value would be string type.
|
|
if(textData){
|
|
currentNode.val = util.getValue(currentNode.val) + '' + processTagValue(currentNode.tagname, textData , options);
|
|
textData = "";
|
|
}
|
|
|
|
if (options.cdataTagName) {
|
|
//add cdata node
|
|
const childNode = new xmlNode(options.cdataTagName, currentNode, tagExp);
|
|
currentNode.addChild(childNode);
|
|
//for backtracking
|
|
currentNode.val = util.getValue(currentNode.val) + options.cdataPositionChar;
|
|
//add rest value to parent node
|
|
if (tagExp) {
|
|
childNode.val = tagExp;
|
|
}
|
|
} else {
|
|
currentNode.val = (currentNode.val || '') + (tagExp || '');
|
|
}
|
|
|
|
i = closeIndex + 2;
|
|
}else {//Opening tag
|
|
const result = closingIndexForOpeningTag(xmlData, i+1)
|
|
let tagExp = result.data;
|
|
const closeIndex = result.index;
|
|
const separatorIndex = tagExp.indexOf(" ");
|
|
let tagName = tagExp;
|
|
if(separatorIndex !== -1){
|
|
tagName = tagExp.substr(0, separatorIndex).trimRight();
|
|
tagExp = tagExp.substr(separatorIndex + 1);
|
|
}
|
|
|
|
if(options.ignoreNameSpace){
|
|
const colonIndex = tagName.indexOf(":");
|
|
if(colonIndex !== -1){
|
|
tagName = tagName.substr(colonIndex+1);
|
|
}
|
|
}
|
|
|
|
//save text to parent node
|
|
if (currentNode && textData) {
|
|
if(currentNode.tagname !== '!xml'){
|
|
currentNode.val = util.getValue(currentNode.val) + '' + processTagValue( currentNode.tagname, textData, options);
|
|
}
|
|
}
|
|
|
|
if(tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1){//selfClosing tag
|
|
|
|
if(tagName[tagName.length - 1] === "/"){ //remove trailing '/'
|
|
tagName = tagName.substr(0, tagName.length - 1);
|
|
tagExp = tagName;
|
|
}else{
|
|
tagExp = tagExp.substr(0, tagExp.length - 1);
|
|
}
|
|
|
|
const childNode = new xmlNode(tagName, currentNode, '');
|
|
if(tagName !== tagExp){
|
|
childNode.attrsMap = buildAttributesMap(tagExp, options);
|
|
}
|
|
currentNode.addChild(childNode);
|
|
}else{//opening tag
|
|
|
|
const childNode = new xmlNode( tagName, currentNode );
|
|
if (options.stopNodes.length && options.stopNodes.includes(childNode.tagname)) {
|
|
childNode.startIndex=closeIndex;
|
|
}
|
|
if(tagName !== tagExp){
|
|
childNode.attrsMap = buildAttributesMap(tagExp, options);
|
|
}
|
|
currentNode.addChild(childNode);
|
|
currentNode = childNode;
|
|
}
|
|
textData = "";
|
|
i = closeIndex;
|
|
}
|
|
}else{
|
|
textData += xmlData[i];
|
|
}
|
|
}
|
|
return xmlObj;
|
|
}
|
|
|
|
function closingIndexForOpeningTag(data, i){
|
|
let attrBoundary;
|
|
let tagExp = "";
|
|
for (let index = i; index < data.length; index++) {
|
|
let ch = data[index];
|
|
if (attrBoundary) {
|
|
if (ch === attrBoundary) attrBoundary = "";//reset
|
|
} else if (ch === '"' || ch === "'") {
|
|
attrBoundary = ch;
|
|
} else if (ch === '>') {
|
|
return {
|
|
data: tagExp,
|
|
index: index
|
|
}
|
|
} else if (ch === '\t') {
|
|
ch = " "
|
|
}
|
|
tagExp += ch;
|
|
}
|
|
}
|
|
|
|
function findClosingIndex(xmlData, str, i, errMsg){
|
|
const closingIndex = xmlData.indexOf(str, i);
|
|
if(closingIndex === -1){
|
|
throw new Error(errMsg)
|
|
}else{
|
|
return closingIndex + str.length - 1;
|
|
}
|
|
}
|
|
|
|
exports.getTraversalObj = getTraversalObj;
|