| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350 | var Tokenizer = require("./Tokenizer.js");/*	Options:	xmlMode: Disables the special behavior for script/style tags (false by default)	lowerCaseAttributeNames: call .toLowerCase for each attribute name (true if xmlMode is `false`)	lowerCaseTags: call .toLowerCase for each tag name (true if xmlMode is `false`)*//*	Callbacks:	oncdataend,	oncdatastart,	onclosetag,	oncomment,	oncommentend,	onerror,	onopentag,	onprocessinginstruction,	onreset,	ontext*/var formTags = {	input: true,	option: true,	optgroup: true,	select: true,	button: true,	datalist: true,	textarea: true};var openImpliesClose = {	tr      : { tr:true, th:true, td:true },	th      : { th:true },	td      : { thead:true, th:true, td:true },	body    : { head:true, link:true, script:true },	li      : { li:true },	p       : { p:true },	h1      : { p:true },	h2      : { p:true },	h3      : { p:true },	h4      : { p:true },	h5      : { p:true },	h6      : { p:true },	select  : formTags,	input   : formTags,	output  : formTags,	button  : formTags,	datalist: formTags,	textarea: formTags,	option  : { option:true },	optgroup: { optgroup:true }};var voidElements = {	__proto__: null,	area: true,	base: true,	basefont: true,	br: true,	col: true,	command: true,	embed: true,	frame: true,	hr: true,	img: true,	input: true,	isindex: true,	keygen: true,	link: true,	meta: true,	param: true,	source: true,	track: true,	wbr: true,	//common self closing svg elements	path: true,	circle: true,	ellipse: true,	line: true,	rect: true,	use: true,	stop: true,	polyline: true,	polygon: true};var re_nameEnd = /\s|\//;function Parser(cbs, options){	this._options = options || {};	this._cbs = cbs || {};	this._tagname = "";	this._attribname = "";	this._attribvalue = "";	this._attribs = null;	this._stack = [];	this.startIndex = 0;	this.endIndex = null;	this._lowerCaseTagNames = "lowerCaseTags" in this._options ?									!!this._options.lowerCaseTags :									!this._options.xmlMode;	this._lowerCaseAttributeNames = "lowerCaseAttributeNames" in this._options ?									!!this._options.lowerCaseAttributeNames :									!this._options.xmlMode;	this._tokenizer = new Tokenizer(this._options, this);	if(this._cbs.onparserinit) this._cbs.onparserinit(this);}require("util").inherits(Parser, require("events").EventEmitter);Parser.prototype._updatePosition = function(initialOffset){	if(this.endIndex === null){		if(this._tokenizer._sectionStart <= initialOffset){			this.startIndex = 0;		} else {			this.startIndex = this._tokenizer._sectionStart - initialOffset;		}	}	else this.startIndex = this.endIndex + 1;	this.endIndex = this._tokenizer.getAbsoluteIndex();};//Tokenizer event handlersParser.prototype.ontext = function(data){	this._updatePosition(1);	this.endIndex--;	if(this._cbs.ontext) this._cbs.ontext(data);};Parser.prototype.onopentagname = function(name){	if(this._lowerCaseTagNames){		name = name.toLowerCase();	}	this._tagname = name;	if(!this._options.xmlMode && name in openImpliesClose) {		for(			var el;			(el = this._stack[this._stack.length - 1]) in openImpliesClose[name];			this.onclosetag(el)		);	}	if(this._options.xmlMode || !(name in voidElements)){		this._stack.push(name);	}	if(this._cbs.onopentagname) this._cbs.onopentagname(name);	if(this._cbs.onopentag) this._attribs = {};};Parser.prototype.onopentagend = function(){	this._updatePosition(1);	if(this._attribs){		if(this._cbs.onopentag) this._cbs.onopentag(this._tagname, this._attribs);		this._attribs = null;	}	if(!this._options.xmlMode && this._cbs.onclosetag && this._tagname in voidElements){		this._cbs.onclosetag(this._tagname);	}	this._tagname = "";};Parser.prototype.onclosetag = function(name){	this._updatePosition(1);	if(this._lowerCaseTagNames){		name = name.toLowerCase();	}	if(this._stack.length && (!(name in voidElements) || this._options.xmlMode)){		var pos = this._stack.lastIndexOf(name);		if(pos !== -1){			if(this._cbs.onclosetag){				pos = this._stack.length - pos;				while(pos--) this._cbs.onclosetag(this._stack.pop());			}			else this._stack.length = pos;		} else if(name === "p" && !this._options.xmlMode){			this.onopentagname(name);			this._closeCurrentTag();		}	} else if(!this._options.xmlMode && (name === "br" || name === "p")){		this.onopentagname(name);		this._closeCurrentTag();	}};Parser.prototype.onselfclosingtag = function(){	if(this._options.xmlMode || this._options.recognizeSelfClosing){		this._closeCurrentTag();	} else {		this.onopentagend();	}};Parser.prototype._closeCurrentTag = function(){	var name = this._tagname;	this.onopentagend();	//self-closing tags will be on the top of the stack	//(cheaper check than in onclosetag)	if(this._stack[this._stack.length - 1] === name){		if(this._cbs.onclosetag){			this._cbs.onclosetag(name);		}		this._stack.pop();	}};Parser.prototype.onattribname = function(name){	if(this._lowerCaseAttributeNames){		name = name.toLowerCase();	}	this._attribname = name;};Parser.prototype.onattribdata = function(value){	this._attribvalue += value;};Parser.prototype.onattribend = function(){	if(this._cbs.onattribute) this._cbs.onattribute(this._attribname, this._attribvalue);	if(		this._attribs &&		!Object.prototype.hasOwnProperty.call(this._attribs, this._attribname)	){		this._attribs[this._attribname] = this._attribvalue;	}	this._attribname = "";	this._attribvalue = "";};Parser.prototype._getInstructionName = function(value){	var idx = value.search(re_nameEnd),	    name = idx < 0 ? value : value.substr(0, idx);	if(this._lowerCaseTagNames){		name = name.toLowerCase();	}	return name;};Parser.prototype.ondeclaration = function(value){	if(this._cbs.onprocessinginstruction){		var name = this._getInstructionName(value);		this._cbs.onprocessinginstruction("!" + name, "!" + value);	}};Parser.prototype.onprocessinginstruction = function(value){	if(this._cbs.onprocessinginstruction){		var name = this._getInstructionName(value);		this._cbs.onprocessinginstruction("?" + name, "?" + value);	}};Parser.prototype.oncomment = function(value){	this._updatePosition(4);	if(this._cbs.oncomment) this._cbs.oncomment(value);	if(this._cbs.oncommentend) this._cbs.oncommentend();};Parser.prototype.oncdata = function(value){	this._updatePosition(1);	if(this._options.xmlMode || this._options.recognizeCDATA){		if(this._cbs.oncdatastart) this._cbs.oncdatastart();		if(this._cbs.ontext) this._cbs.ontext(value);		if(this._cbs.oncdataend) this._cbs.oncdataend();	} else {		this.oncomment("[CDATA[" + value + "]]");	}};Parser.prototype.onerror = function(err){	if(this._cbs.onerror) this._cbs.onerror(err);};Parser.prototype.onend = function(){	if(this._cbs.onclosetag){		for(			var i = this._stack.length;			i > 0;			this._cbs.onclosetag(this._stack[--i])		);	}	if(this._cbs.onend) this._cbs.onend();};//Resets the parser to a blank state, ready to parse a new HTML documentParser.prototype.reset = function(){	if(this._cbs.onreset) this._cbs.onreset();	this._tokenizer.reset();	this._tagname = "";	this._attribname = "";	this._attribs = null;	this._stack = [];	if(this._cbs.onparserinit) this._cbs.onparserinit(this);};//Parses a complete HTML document and pushes it to the handlerParser.prototype.parseComplete = function(data){	this.reset();	this.end(data);};Parser.prototype.write = function(chunk){	this._tokenizer.write(chunk);};Parser.prototype.end = function(chunk){	this._tokenizer.end(chunk);};Parser.prototype.pause = function(){	this._tokenizer.pause();};Parser.prototype.resume = function(){	this._tokenizer.resume();};//alias for backwards compatParser.prototype.parseChunk = Parser.prototype.write;Parser.prototype.done = Parser.prototype.end;module.exports = Parser;
 |