1 /**
2 	This is an html DOM implementation, started with cloning
3 	what the browser offers in Javascript, but going well beyond
4 	it in convenience.
5 
6 	If you can do it in Javascript, you can probably do it with
7 	this module.
8 
9 	And much more.
10 
11 
12 	Note: some of the documentation here writes html with added
13 	spaces. That's because ddoc doesn't bother encoding html output,
14 	and adding spaces is easier than using LT macros everywhere.
15 
16 
17 	BTW: this file depends on arsd.characterencodings, so help it
18 	correctly read files from the internet. You should be able to
19 	get characterencodings.d from the same place you got this file.
20 */
21 module arsd.dom;
22 
23 // FIXME: might be worth doing Element.attrs and taking opDispatch off that
24 // so more UFCS works.
25 
26 
27 // FIXME: something like <ol>spam <ol> with no closing </ol> should read the second tag as the closer in garbage mode
28 // FIXME: failing to close a paragraph sometimes messes things up too
29 
30 // FIXME: it would be kinda cool to have some support for internal DTDs
31 // and maybe XPath as well, to some extent
32 /*
33 	we could do
34 	meh this sux
35 
36 	auto xpath = XPath(element);
37 
38 	     // get the first p
39 	xpath.p[0].a["href"]
40 */
41 
42 // public import arsd.domconvenience; // merged for now
43 
44 /* domconvenience follows { */
45 
46 
47 import std.string;
48 import std.container;
49 
50 // the reason this is separated is so I can plug it into D->JS as well, which uses a different base Element class
51 
52 import arsd.dom;
53 
54 mixin template DomConvenienceFunctions() {
55 
56 	/// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done.
57 	final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__)
58 	if(
59 		is(SomeElementType : Element)
60 	)
61 	out(ret) {
62 		assert(ret !is null);
63 	}
64 	body {
65 		auto e = cast(SomeElementType) getElementById(id);
66 		if(e is null)
67 			throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id, file, line);
68 		return e;
69 	}
70 
71 	/// ditto but with selectors instead of ids
72 	final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__)
73 	if(
74 		is(SomeElementType : Element)
75 	)
76 	out(ret) {
77 		assert(ret !is null);
78 	}
79 	body {
80 		auto e = cast(SomeElementType) querySelector(selector);
81 		if(e is null)
82 			throw new ElementNotFoundException(SomeElementType.stringof, selector, file, line);
83 		return e;
84 	}
85 
86 
87 
88 
89 	/// get all the classes on this element
90 	@property string[] classes() {
91 		return split(className, " ");
92 	}
93 
94 	/// Adds a string to the class attribute. The class attribute is used a lot in CSS.
95 	Element addClass(string c) {
96 		if(hasClass(c))
97 			return this; // don't add it twice
98 
99 		string cn = getAttribute("class");
100 		if(cn.length == 0) {
101 			setAttribute("class", c);
102 			return this;
103 		} else {
104 			setAttribute("class", cn ~ " " ~ c);
105 		}
106 
107 		return this;
108 	}
109 
110 	/// Removes a particular class name.
111 	Element removeClass(string c) {
112 		if(!hasClass(c))
113 			return this;
114 		string n;
115 		foreach(name; classes) {
116 			if(c == name)
117 				continue; // cut it out
118 			if(n.length)
119 				n ~= " ";
120 			n ~= name;
121 		}
122 
123 		className = n.strip();
124 
125 		return this;
126 	}
127 
128 	/// Returns whether the given class appears in this element.
129 	bool hasClass(string c) {
130 		auto cn = className;
131 
132 		auto idx = cn.indexOf(c);
133 		if(idx == -1)
134 			return false;
135 
136 		foreach(cla; cn.split(" "))
137 			if(cla == c)
138 				return true;
139 		return false;
140 
141 		/*
142 		int rightSide = idx + c.length;
143 
144 		bool checkRight() {
145 			if(rightSide == cn.length)
146 				return true; // it's the only class
147 			else if(iswhite(cn[rightSide]))
148 				return true;
149 			return false; // this is a substring of something else..
150 		}
151 
152 		if(idx == 0) {
153 			return checkRight();
154 		} else {
155 			if(!iswhite(cn[idx - 1]))
156 				return false; // substring
157 			return checkRight();
158 		}
159 
160 		assert(0);
161 		*/
162 	}
163 
164 
165 	/* *******************************
166 		  DOM Mutation
167 	*********************************/
168 
169 	/// Removes all inner content from the tag; all child text and elements are gone.
170 	void removeAllChildren()
171 		out {
172 			assert(this.children.length == 0);
173 		}
174 	body {
175 		children = null;
176 	}
177 	/// convenience function to quickly add a tag with some text or
178 	/// other relevant info (for example, it's a src for an <img> element
179 	/// instead of inner text)
180 	Element addChild(string tagName, string childInfo = null, string childInfo2 = null)
181 		in {
182 			assert(tagName !is null);
183 		}
184 		out(e) {
185 			assert(e.parentNode is this);
186 			assert(e.parentDocument is this.parentDocument);
187 		}
188 	body {
189 		auto e = Element.make(tagName, childInfo, childInfo2);
190 		// FIXME (maybe): if the thing is self closed, we might want to go ahead and
191 		// return the parent. That will break existing code though.
192 		return appendChild(e);
193 	}
194 
195 	/// Another convenience function. Adds a child directly after the current one, returning
196 	/// the new child.
197 	///
198 	/// Between this, addChild, and parentNode, you can build a tree as a single expression.
199 	Element addSibling(string tagName, string childInfo = null, string childInfo2 = null)
200 		in {
201 			assert(tagName !is null);
202 			assert(parentNode !is null);
203 		}
204 		out(e) {
205 			assert(e.parentNode is this.parentNode);
206 			assert(e.parentDocument is this.parentDocument);
207 		}
208 	body {
209 		auto e = Element.make(tagName, childInfo, childInfo2);
210 		return parentNode.insertAfter(this, e);
211 	}
212 
213 	Element addSibling(Element e) {
214 		return parentNode.insertAfter(this, e);
215 	}
216 
217 	Element addChild(Element e) {
218 		return this.appendChild(e);
219 	}
220 
221 	/// Convenience function to append text intermixed with other children.
222 	/// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), ".");
223 	/// or div.addChildren("Hello, ", user.name, "!");
224 
225 	/// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping.
226 	void addChildren(T...)(T t) {
227 		foreach(item; t) {
228 			static if(is(item : Element))
229 				appendChild(item);
230 			else static if (is(isSomeString!(item)))
231 				appendText(to!string(item));
232 			else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren");
233 		}
234 	}
235 
236 	///.
237 	Element addChild(string tagName, Element firstChild, string info2 = null)
238 	in {
239 		assert(firstChild !is null);
240 	}
241 	out(ret) {
242 		assert(ret !is null);
243 		assert(ret.parentNode is this);
244 		assert(firstChild.parentNode is ret);
245 
246 		assert(ret.parentDocument is this.parentDocument);
247 		//assert(firstChild.parentDocument is this.parentDocument);
248 	}
249 	body {
250 		auto e = Element.make(tagName, "", info2);
251 		e.appendChild(firstChild);
252 		this.appendChild(e);
253 		return e;
254 	}
255 
256 	Element addChild(string tagName, in Html innerHtml, string info2 = null)
257 	in {
258 	}
259 	out(ret) {
260 		assert(ret !is null);
261 		assert(ret.parentNode is this);
262 		assert(ret.parentDocument is this.parentDocument);
263 	}
264 	body {
265 		auto e = Element.make(tagName, "", info2);
266 		this.appendChild(e);
267 		e.innerHTML = innerHtml.source;
268 		return e;
269 	}
270 
271 
272 	/// .
273 	void appendChildren(Element[] children) {
274 		foreach(ele; children)
275 			appendChild(ele);
276 	}
277 
278 	///.
279 	void reparent(Element newParent)
280 		in {
281 			assert(newParent !is null);
282 			assert(parentNode !is null);
283 		}
284 		out {
285 			assert(this.parentNode is newParent);
286 			//assert(isInArray(this, newParent.children));
287 		}
288 	body {
289 		parentNode.removeChild(this);
290 		newParent.appendChild(this);
291 	}
292 
293 	/**
294 		Strips this tag out of the document, putting its inner html
295 		as children of the parent.
296 
297 		For example, given: <p>hello <b>there</b></p>, if you
298 		call stripOut() on the b element, you'll be left with
299 		<p>hello there<p>.
300 
301 		The idea here is to make it easy to get rid of garbage
302 		markup you aren't interested in.
303 	*/
304 	void stripOut()
305 		in {
306 			assert(parentNode !is null);
307 		}
308 		out {
309 			assert(parentNode is null);
310 			assert(children.length == 0);
311 		}
312 	body {
313 		foreach(c; children)
314 			c.parentNode = null; // remove the parent
315 		if(children.length)
316 			parentNode.replaceChild(this, this.children);
317 		else
318 			parentNode.removeChild(this);
319 		this.children.length = 0; // we reparented them all above
320 	}
321 
322 	/// shorthand for this.parentNode.removeChild(this) with parentNode null check
323 	/// if the element already isn't in a tree, it does nothing.
324 	Element removeFromTree()
325 		in {
326 
327 		}
328 		out(var) {
329 			assert(this.parentNode is null);
330 			assert(var is this);
331 		}
332 	body {
333 		if(this.parentNode is null)
334 			return this;
335 
336 		this.parentNode.removeChild(this);
337 
338 		return this;
339 	}
340 
341 	/// Wraps this element inside the given element.
342 	/// It's like this.replaceWith(what); what.appendchild(this);
343 	///
344 	/// Given: < b >cool</ b >, if you call b.wrapIn(new Link("site.com", "my site is "));
345 	/// you'll end up with: < a href="site.com">my site is < b >cool< /b ></ a >.
346 	Element wrapIn(Element what)
347 		in {
348 			assert(what !is null);
349 		}
350 		out(ret) {
351 			assert(this.parentNode is what);
352 			assert(ret is what);
353 		}
354 	body {
355 		this.replaceWith(what);
356 		what.appendChild(this);
357 
358 		return what;
359 	}
360 
361 	/// Replaces this element with something else in the tree.
362 	Element replaceWith(Element e)
363 	in {
364 		assert(this.parentNode !is null);
365 	}
366 	body {
367 		e.removeFromTree();
368 		this.parentNode.replaceChild(this, e);
369 		return e;
370 	}
371 
372 	/**
373 		Splits the className into an array of each class given
374 	*/
375 	string[] classNames() const {
376 		return className().split(" ");
377 	}
378 
379 	/**
380 		Fetches the first consecutive nodes, if text nodes, concatenated together
381 
382 		If the first node is not text, returns null.
383 
384 		See also: directText, innerText
385 	*/
386 	string firstInnerText() const {
387 		string s;
388 		foreach(child; children) {
389 			if(child.nodeType != NodeType.Text)
390 				break;
391 
392 			s ~= child.nodeValue();
393 		}
394 		return s;
395 	}
396 
397 
398 	/**
399 		Returns the text directly under this element,
400 		not recursively like innerText.
401 
402 		See also: firstInnerText
403 	*/
404 	@property string directText() {
405 		string ret;
406 		foreach(e; children) {
407 			if(e.nodeType == NodeType.Text)
408 				ret ~= e.nodeValue();
409 		}
410 
411 		return ret;
412 	}
413 
414 	/**
415 		Sets the direct text, keeping the same place.
416 
417 		Unlike innerText, this does *not* remove existing
418 		elements in the element.
419 
420 		It only replaces the first text node it sees.
421 
422 		If there are no text nodes, it calls appendText
423 
424 		So, given (ignore the spaces in the tags):
425 			< div > < img > text here < /div >
426 
427 		it will keep the img, and replace the "text here".
428 	*/
429 	@property void directText(string text) {
430 		foreach(e; children) {
431 			if(e.nodeType == NodeType.Text) {
432 				auto it = cast(TextNode) e;
433 				it.contents = text;
434 				return;
435 			}
436 		}
437 
438 		appendText(text);
439 	}
440 }
441 
442 /// finds comments that match the given txt. Case insensitive, strips whitespace.
443 Element[] findComments(Document document, string txt) {
444 	return findComments(document.root, txt);
445 }
446 
447 /// ditto
448 Element[] findComments(Element element, string txt) {
449 	txt = txt.strip().toLower();
450 	Element[] ret;
451 
452 	foreach(comment; element.getElementsByTagName("#comment")) {
453 		string t = comment.nodeValue().strip().toLower();
454 		if(t == txt)
455 			ret ~= comment;
456 	}
457 
458 	return ret;
459 }
460 
461 // I'm just dicking around with this
462 struct ElementCollection {
463 	this(Element e) {
464 		elements = [e];
465 	}
466 
467 	this(Element e, string selector) {
468 		elements = e.querySelectorAll(selector);
469 	}
470 
471 	this(Element[] e) {
472 		elements = e;
473 	}
474 
475 	Element[] elements;
476 	//alias elements this; // let it implicitly convert to the underlying array
477 
478 	ElementCollection opIndex(string selector) {
479 		ElementCollection ec;
480 		foreach(e; elements)
481 			ec.elements ~= e.getElementsBySelector(selector);
482 		return ec;
483 	}
484 
485 	/// Forward method calls to each individual element of the collection
486 	/// returns this so it can be chained.
487 	ElementCollection opDispatch(string name, T...)(T t) {
488 		foreach(e; elements) {
489 			mixin("e." ~ name)(t);
490 		}
491 		return this;
492 	}
493 
494 	ElementCollection opBinary(string op : "~")(ElementCollection rhs) {
495 		return ElementCollection(this.elements ~ rhs.elements);
496 	}
497 }
498 
499 
500 // this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions.
501 mixin template JavascriptStyleDispatch() {
502 	string opDispatch(string name)(string v = null) if(name != "popFront") { // popFront will make this look like a range. Do not want.
503 		if(v !is null)
504 			return set(name, v);
505 		return get(name);
506 	}
507 
508 	string opIndex(string key) const {
509 		return get(key);
510 	}
511 
512 	string opIndexAssign(string value, string field) {
513 		return set(field, value);
514 	}
515 
516 	// FIXME: doesn't seem to work
517 	string* opBinary(string op)(string key)  if(op == "in") {
518 		return key in fields;
519 	}
520 }
521 
522 /// A proxy object to do the Element class' dataset property. See Element.dataset for more info.
523 ///
524 /// Do not create this object directly.
525 struct DataSet {
526 	this(Element e) {
527 		this._element = e;
528 	}
529 
530 	private Element _element;
531 	string set(string name, string value) {
532 		_element.setAttribute("data-" ~ unCamelCase(name), value);
533 		return value;
534 	}
535 
536 	string get(string name) const {
537 		return _element.getAttribute("data-" ~ unCamelCase(name));
538 	}
539 
540 	mixin JavascriptStyleDispatch!();
541 }
542 
543 /// for style, i want to be able to set it with a string like a plain attribute,
544 /// but also be able to do properties Javascript style.
545 
546 struct ElementStyle {
547 	this(Element parent) {
548 		_element = parent;
549 	}
550 
551 	Element _element;
552 
553 	@property ref inout(string) _attribute() inout {
554 		auto s = "style" in _element.attributes;
555 		if(s is null) {
556 			auto e = cast() _element; // const_cast
557 			e.attributes["style"] = ""; // we need something to reference
558 			s = cast(inout) ("style" in e.attributes);
559 		}
560 
561 		assert(s !is null);
562 		return *s;
563 	}
564 
565 	alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work.
566 
567 	string set(string name, string value) {
568 		if(name.length == 0)
569 			return value;
570 		if(name == "cssFloat")
571 			name = "float";
572 		else
573 			name = unCamelCase(name);
574 		auto r = rules();
575 		r[name] = value;
576 
577 		_attribute = "";
578 		foreach(k, v; r) {
579 			if(v is null || v.length == 0) /* css can't do empty rules anyway so we'll use that to remove */
580 				continue;
581 			if(_attribute.length)
582 				_attribute ~= " ";
583 			_attribute ~= k ~ ": " ~ v ~ ";";
584 		}
585 
586 		_element.setAttribute("style", _attribute); // this is to trigger the observer call
587 
588 		return value;
589 	}
590 	string get(string name) const {
591 		if(name == "cssFloat")
592 			name = "float";
593 		else
594 			name = unCamelCase(name);
595 		auto r = rules();
596 		if(name in r)
597 			return r[name];
598 		return null;
599 	}
600 
601 	string[string] rules() const {
602 		string[string] ret;
603 		foreach(rule;  _attribute.split(";")) {
604 			rule = rule.strip();
605 			if(rule.length == 0)
606 				continue;
607 			auto idx = rule.indexOf(":");
608 			if(idx == -1)
609 				ret[rule] = "";
610 			else {
611 				auto name = rule[0 .. idx].strip();
612 				auto value = rule[idx + 1 .. $].strip();
613 
614 				ret[name] = value;
615 			}
616 		}
617 
618 		return ret;
619 	}
620 
621 	mixin JavascriptStyleDispatch!();
622 }
623 
624 /// Converts a camel cased propertyName to a css style dashed property-name
625 string unCamelCase(string a) {
626 	string ret;
627 	foreach(c; a)
628 		if((c >= 'A' && c <= 'Z'))
629 			ret ~= "-" ~ toLower("" ~ c)[0];
630 		else
631 			ret ~= c;
632 	return ret;
633 }
634 
635 /// Translates a css style property-name to a camel cased propertyName
636 string camelCase(string a) {
637 	string ret;
638 	bool justSawDash = false;
639 	foreach(c; a)
640 		if(c == '-') {
641 			justSawDash = true;
642 		} else {
643 			if(justSawDash) {
644 				justSawDash = false;
645 				ret ~= toUpper("" ~ c);
646 			} else
647 				ret ~= c;
648 		}
649 	return ret;
650 }
651 
652 
653 
654 
655 
656 
657 
658 
659 
660 // domconvenience ends }
661 
662 
663 
664 
665 
666 
667 
668 
669 
670 
671 
672 // @safe:
673 
674 // NOTE: do *NOT* override toString on Element subclasses. It won't work.
675 // Instead, override writeToAppender();
676 
677 // FIXME: should I keep processing instructions like <?blah ?> and <!-- blah --> (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too.
678 
679 // Stripping them is useful for reading php as html.... but adding them
680 // is good for building php.
681 
682 // I need to maintain compatibility with the way it is now too.
683 
684 import arsd.characterencodings;
685 
686 import std.string;
687 import std.exception;
688 import std.uri;
689 import std.array;
690 import std.range;
691 
692 //import std.stdio;
693 
694 // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh
695 // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's
696 // most likely a typo so I say kill kill kill.
697 
698 
699 /// This might belong in another module, but it represents a file with a mime type and some data.
700 /// Document implements this interface with type = text/html (see Document.contentType for more info)
701 /// and data = document.toString, so you can return Documents anywhere web.d expects FileResources.
702 interface FileResource {
703 	@property string contentType() const; /// the content-type of the file. e.g. "text/html; charset=utf-8" or "image/png"
704 	immutable(ubyte)[] getData() const; /// the data
705 }
706 
707 
708 
709 
710 ///.
711 enum NodeType { Text = 3 }
712 
713 
714 /// You can use this to do an easy null check or a dynamic cast+null check on any element.
715 T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) if(is(T : Element))
716 	in {}
717 	out(ret) { assert(ret !is null); }
718 body {
719 	auto ret = cast(T) e;
720 	if(ret is null)
721 		throw new ElementNotFoundException(T.stringof, "passed value", file, line);
722 	return ret;
723 }
724 
725 /// This represents almost everything in the DOM.
726 class Element {
727 	mixin DomConvenienceFunctions!();
728 
729 	// this is a thing so i can remove observer support if it gets slow
730 	// I have not implemented all these yet
731 	private void sendObserverEvent(DomMutationOperations operation, string s1 = null, string s2 = null, Element r = null, Element r2 = null) {
732 		if(parentDocument is null) return;
733 		DomMutationEvent me;
734 		me.operation = operation;
735 		me.target = this;
736 		me.relatedString = s1;
737 		me.relatedString2 = s2;
738 		me.related = r;
739 		me.related2 = r2;
740 		parentDocument.dispatchMutationEvent(me);
741 	}
742 
743 	// putting all the members up front
744 
745 	// this ought to be private. don't use it directly.
746 	Element[] children;
747 
748 	/// The name of the tag. Remember, changing this doesn't change the dynamic type of the object.
749 	string tagName;
750 
751 	/// This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead.
752 	string[string] attributes;
753 
754 	/// In XML, it is valid to write <tag /> for all elements with no children, but that breaks HTML, so I don't do it here.
755 	/// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list.
756 	private bool selfClosed;
757 
758 	/// Get the parent Document object that contains this element.
759 	/// It may be null, so remember to check for that.
760 	Document parentDocument;
761 
762 	///.
763 	Element parentNode;
764 
765 	// the next few methods are for implementing interactive kind of things
766 	private CssStyle _computedStyle;
767 
768 	// these are here for event handlers. Don't forget that this library never fires events.
769 	// (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.)
770 	EventHandler[][string] bubblingEventHandlers;
771 	EventHandler[][string] capturingEventHandlers;
772 	EventHandler[string] defaultEventHandlers;
773 
774 	void addEventListener(string event, EventHandler handler, bool useCapture = false) {
775 		if(event.length > 2 && event[0..2] == "on")
776 			event = event[2 .. $];
777 
778 		if(useCapture)
779 			capturingEventHandlers[event] ~= handler;
780 		else
781 			bubblingEventHandlers[event] ~= handler;
782 	}
783 
784 
785 	// and now methods
786 
787 	/// Convenience function to try to do the right thing for HTML. This is the main
788 	/// way I create elements.
789 	static Element make(string tagName, string childInfo = null, string childInfo2 = null) {
790 		bool selfClosed = tagName.isInArray(selfClosedElements);
791 
792 		Element e;
793 		// want to create the right kind of object for the given tag...
794 		switch(tagName) {
795 			case "#text":
796 				e = new TextNode(null, childInfo);
797 				return e;
798 			// break;
799 			case "table":
800 				e = new Table(null);
801 			break;
802 			case "a":
803 				e = new Link(null);
804 			break;
805 			case "form":
806 				e = new Form(null);
807 			break;
808 			case "tr":
809 				e = new TableRow(null);
810 			break;
811 			case "td", "th":
812 				e = new TableCell(null, tagName);
813 			break;
814 			default:
815 				e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere
816 		}
817 
818 		// make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too
819 		e.tagName = tagName;
820 		e.selfClosed = selfClosed;
821 
822 		if(childInfo !is null)
823 			switch(tagName) {
824 				/* html5 convenience tags */
825 				case "audio":
826 					if(childInfo.length)
827 						e.addChild("source", childInfo);
828 					if(childInfo2 !is null)
829 						e.appendText(childInfo2);
830 				break;
831 				case "source":
832 					e.src = childInfo;
833 					if(childInfo2 !is null)
834 						e.type = childInfo2;
835 				break;
836 				/* regular html 4 stuff */
837 				case "img":
838 					e.src = childInfo;
839 					if(childInfo2 !is null)
840 						e.alt = childInfo2;
841 				break;
842 				case "link":
843 					e.href = childInfo;
844 					if(childInfo2 !is null)
845 						e.rel = childInfo2;
846 				break;
847 				case "option":
848 					e.innerText = childInfo;
849 					if(childInfo2 !is null)
850 						e.value = childInfo2;
851 				break;
852 				case "input":
853 					e.type = "hidden";
854 					e.name = childInfo;
855 					if(childInfo2 !is null)
856 						e.value = childInfo2;
857 				break;
858 				case "button":
859 					e.innerText = childInfo;
860 					if(childInfo2 !is null)
861 						e.type = childInfo2;
862 				break;
863 				case "a":
864 					e.innerText = childInfo;
865 					if(childInfo2 !is null)
866 						e.href = childInfo2;
867 				break;
868 				case "script":
869 				case "style":
870 					e.innerRawSource = childInfo;
871 				break;
872 				case "meta":
873 					e.name = childInfo;
874 					if(childInfo2 !is null)
875 						e.content = childInfo2;
876 				break;
877 				/* generically, assume we were passed text and perhaps class */
878 				default:
879 					e.innerText = childInfo;
880 					if(childInfo2.length)
881 						e.className = childInfo2;
882 			}
883 
884 		return e;
885 	}
886 
887 	static Element make(string tagName, in Html innerHtml, string childInfo2 = null) {
888 		// FIXME: childInfo2 is ignored when info1 is null
889 		auto m = Element.make(tagName, cast(string) null, childInfo2);
890 		m.innerHTML = innerHtml.source;
891 		return m;
892 	}
893 
894 	static Element make(string tagName, Element child, string childInfo2 = null) {
895 		auto m = Element.make(tagName, cast(string) null, childInfo2);
896 		m.appendChild(child);
897 		return m;
898 	}
899 
900 
901 	/// Generally, you don't want to call this yourself - use Element.make or document.createElement instead.
902 	this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) {
903 		parentDocument = _parentDocument;
904 		tagName = _tagName;
905 		if(_attributes !is null)
906 			attributes = _attributes;
907 		selfClosed = _selfClosed;
908 
909 		version(dom_node_indexes)
910 			this.dataset.nodeIndex = to!string(&(this.attributes));
911 
912 		assert(_tagName.indexOf(" ") == -1);//, "<" ~ _tagName ~ "> is invalid");
913 	}
914 
915 	/// Convenience constructor when you don't care about the parentDocument. Note this might break things on the document.
916 	/// Note also that without a parent document, elements are always in strict, case-sensitive mode.
917 	this(string _tagName, string[string] _attributes = null) {
918 		tagName = _tagName;
919 		if(_attributes !is null)
920 			attributes = _attributes;
921 		selfClosed = tagName.isInArray(selfClosedElements);
922 
923 		// this is meant to reserve some memory. It makes a small, but consistent improvement.
924 		//children.length = 8;
925 		//children.length = 0;
926 
927 		version(dom_node_indexes)
928 			this.dataset.nodeIndex = to!string(&(this.attributes));
929 	}
930 
931 	private this(Document _parentDocument) {
932 		parentDocument = _parentDocument;
933 
934 		version(dom_node_indexes)
935 			this.dataset.nodeIndex = to!string(&(this.attributes));
936 	}
937 
938 
939 	/* *******************************
940 	       Navigating the DOM
941 	*********************************/
942 
943 	/// Returns the first child of this element. If it has no children, returns null.
944 	/// Remember, text nodes are children too.
945 	@property Element firstChild() {
946 		return children.length ? children[0] : null;
947 	}
948 
949 	///
950 	@property Element lastChild() {
951 		return children.length ? children[$ - 1] : null;
952 	}
953 
954 
955 	///.
956 	@property Element previousSibling(string tagName = null) {
957 		if(this.parentNode is null)
958 			return null;
959 		Element ps = null;
960 		foreach(e; this.parentNode.childNodes) {
961 			if(e is this)
962 				break;
963 			if(tagName == "*" && e.nodeType != NodeType.Text) {
964 				ps = e;
965 				break;
966 			}
967 			if(tagName is null || e.tagName == tagName)
968 				ps = e;
969 		}
970 
971 		return ps;
972 	}
973 
974 	///.
975 	@property Element nextSibling(string tagName = null) {
976 		if(this.parentNode is null)
977 			return null;
978 		Element ns = null;
979 		bool mightBe = false;
980 		foreach(e; this.parentNode.childNodes) {
981 			if(e is this) {
982 				mightBe = true;
983 				continue;
984 			}
985 			if(mightBe) {
986 				if(tagName == "*" && e.nodeType != NodeType.Text) {
987 					ns = e;
988 					break;
989 				}
990 				if(tagName is null || e.tagName == tagName) {
991 					ns = e;
992 					break;
993 				}
994 			}
995 		}
996 
997 		return ns;
998 	}
999 
1000 
1001 	/// Gets the nearest node, going up the chain, with the given tagName
1002 	/// May return null or throw.
1003 	T getParent(T = Element)(string tagName = null) if(is(T : Element)) {
1004 		if(tagName is null) {
1005 			static if(is(T == Form))
1006 				tagName = "form";
1007 			else static if(is(T == Table))
1008 				tagName = "table";
1009 			else static if(is(T == Link))
1010 				tagName == "a";
1011 		}
1012 
1013 		auto par = this.parentNode;
1014 		while(par !is null) {
1015 			if(tagName is null || par.tagName == tagName)
1016 				break;
1017 			par = par.parentNode;
1018 		}
1019 
1020 		static if(!is(T == Element)) {
1021 			auto t = cast(T) par;
1022 			if(t is null)
1023 				throw new ElementNotFoundException("", tagName ~ " parent not found");
1024 		} else
1025 			auto t = par;
1026 
1027 		return t;
1028 	}
1029 
1030 	///.
1031 	Element getElementById(string id) {
1032 		// FIXME: I use this function a lot, and it's kinda slow
1033 		// not terribly slow, but not great.
1034 		foreach(e; tree)
1035 			if(e.id == id)
1036 				return e;
1037 		return null;
1038 	}
1039 
1040 	/// Note: you can give multiple selectors, separated by commas.
1041 	/// It will return the first match it finds.
1042 	Element querySelector(string selector) {
1043 		// FIXME: inefficient; it gets all results just to discard most of them
1044 		auto list = getElementsBySelector(selector);
1045 		if(list.length == 0)
1046 			return null;
1047 		return list[0];
1048 	}
1049 
1050 	/// a more standards-compliant alias for getElementsBySelector
1051 	Element[] querySelectorAll(string selector) {
1052 		return getElementsBySelector(selector);
1053 	}
1054 
1055 	/**
1056 		Does a CSS selector
1057 
1058 		* -- all, default if nothing else is there
1059 
1060 		tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector
1061 
1062 		It is all additive
1063 
1064 		OP
1065 
1066 		space = descendant
1067 		>     = direct descendant
1068 		+     = sibling (E+F Matches any F element immediately preceded by a sibling element E)
1069 
1070 		[foo]        Foo is present as an attribute
1071 		[foo="warning"]   Matches any E element whose "foo" attribute value is exactly equal to "warning".
1072 		E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning"
1073 		E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en".
1074 
1075 		[item$=sdas] ends with
1076 		[item^-sdsad] begins with
1077 
1078 		Quotes are optional here.
1079 
1080 		Pseudos:
1081 			:first-child
1082 			:last-child
1083 			:link (same as a[href] for our purposes here)
1084 
1085 
1086 		There can be commas separating the selector. A comma separated list result is OR'd onto the main.
1087 
1088 
1089 
1090 		This ONLY cares about elements. text, etc, are ignored
1091 
1092 
1093 		There should be two functions: given element, does it match the selector? and given a selector, give me all the elements
1094 	*/
1095 	Element[] getElementsBySelector(string selector) {
1096 		// FIXME: this function could probably use some performance attention
1097 		// ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app.
1098 
1099 
1100 		bool caseSensitiveTags = true;
1101 		if(parentDocument && parentDocument.loose)
1102 			caseSensitiveTags = false;
1103 
1104 		Element[] ret;
1105 		foreach(sel; parseSelectorString(selector, caseSensitiveTags))
1106 			ret ~= sel.getElements(this);
1107 		return ret;
1108 	}
1109 
1110 	/// .
1111 	Element[] getElementsByClassName(string cn) {
1112 		// is this correct?
1113 		return getElementsBySelector("." ~ cn);
1114 	}
1115 
1116 	///.
1117 	Element[] getElementsByTagName(string tag) {
1118 		if(parentDocument && parentDocument.loose)
1119 			tag = tag.toLower();
1120 		Element[] ret;
1121 		foreach(e; tree)
1122 			if(e.tagName == tag)
1123 				ret ~= e;
1124 		return ret;
1125 	}
1126 
1127 
1128 	/* *******************************
1129 	          Attributes
1130 	*********************************/
1131 
1132 	/**
1133 		Gets the given attribute value, or null if the
1134 		attribute is not set.
1135 
1136 		Note that the returned string is decoded, so it no longer contains any xml entities.
1137 	*/
1138 	string getAttribute(string name) const {
1139 		if(parentDocument && parentDocument.loose)
1140 			name = name.toLower();
1141 		auto e = name in attributes;
1142 		if(e)
1143 			return *e;
1144 		else
1145 			return null;
1146 	}
1147 
1148 	/**
1149 		Sets an attribute. Returns this for easy chaining
1150 	*/
1151 	Element setAttribute(string name, string value) {
1152 		if(parentDocument && parentDocument.loose)
1153 			name = name.toLower();
1154 
1155 		// I never use this shit legitimately and neither should you
1156 		auto it = name.toLower();
1157 		if(it == "href" || it == "src") {
1158 			auto v = value.strip().toLower();
1159 			if(v.startsWith("vbscript:"))
1160 				value = value[9..$];
1161 			if(v.startsWith("javascript:"))
1162 				value = value[11..$];
1163 		}
1164 
1165 		attributes[name] = value;
1166 
1167 		sendObserverEvent(DomMutationOperations.setAttribute, name, value);
1168 
1169 		return this;
1170 	}
1171 
1172 	/**
1173 		Returns if the attribute exists.
1174 	*/
1175 	bool hasAttribute(string name) {
1176 		if(parentDocument && parentDocument.loose)
1177 			name = name.toLower();
1178 
1179 		if(name in attributes)
1180 			return true;
1181 		else
1182 			return false;
1183 	}
1184 
1185 	/**
1186 		Removes the given attribute from the element.
1187 	*/
1188 	Element removeAttribute(string name)
1189 	out(ret) {
1190 		assert(ret is this);
1191 	}
1192 	body {
1193 		if(parentDocument && parentDocument.loose)
1194 			name = name.toLower();
1195 		if(name in attributes)
1196 			attributes.remove(name);
1197 
1198 		sendObserverEvent(DomMutationOperations.removeAttribute, name);
1199 		return this;
1200 	}
1201 
1202 	/**
1203 		Gets the class attribute's contents. Returns
1204 		an empty string if it has no class.
1205 	*/
1206 	@property string className() const {
1207 		auto c = getAttribute("class");
1208 		if(c is null)
1209 			return "";
1210 		return c;
1211 	}
1212 
1213 	///.
1214 	@property Element className(string c) {
1215 		setAttribute("class", c);
1216 		return this;
1217 	}
1218 
1219 	/**
1220 		Provides easy access to attributes, object style.
1221 
1222 		auto element = Element.make("a");
1223 		a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html");
1224 		string where = a.href; // same as a.getAttribute("href");
1225 	*/
1226 		// name != "popFront" is so duck typing doesn't think it's a range
1227 	@property string opDispatch(string name)(string v = null) if(name != "popFront") {
1228 		if(v !is null)
1229 			setAttribute(name, v);
1230 		return getAttribute(name);
1231 	}
1232 
1233 	/*
1234 	// this would be nice for convenience, but it broke the getter above.
1235 	@property void opDispatch(string name)(bool boolean) if(name != "popFront") {
1236 		if(boolean)
1237 			setAttribute(name, name);
1238 		else
1239 			removeAttribute(name);
1240 	}
1241 	*/
1242 
1243 	/**
1244 		Returns the element's children.
1245 	*/
1246 	@property const(Element[]) childNodes() const {
1247 		return children;
1248 	}
1249 
1250 	/// Mutable version of the same
1251 	@property Element[] childNodes() { // FIXME: the above should be inout
1252 		return children;
1253 	}
1254 
1255 	/// HTML5's dataset property. It is an alternate view into attributes with the data- prefix.
1256 	///
1257 	/// Given: <a data-my-property="cool" />
1258 	///
1259 	/// We get: assert(a.dataset.myProperty == "cool");
1260 	DataSet dataset() {
1261 		return DataSet(this);
1262 	}
1263 
1264 	/// Provides both string and object style (like in Javascript) access to the style attribute.
1265 	@property ElementStyle style() {
1266 		return ElementStyle(this);
1267 	}
1268 
1269 	/// This sets the style attribute with a string.
1270 	@property ElementStyle style(string s) {
1271 		this.setAttribute("style", s);
1272 		return this.style();
1273 	}
1274 
1275 	private void parseAttributes(string[] whichOnes = null) {
1276 /+
1277 		if(whichOnes is null)
1278 			whichOnes = attributes.keys;
1279 		foreach(attr; whichOnes) {
1280 			switch(attr) {
1281 				case "id":
1282 
1283 				break;
1284 				case "class":
1285 
1286 				break;
1287 				case "style":
1288 
1289 				break;
1290 				default:
1291 					// we don't care about it
1292 			}
1293 		}
1294 +/
1295 	}
1296 
1297 
1298 	// if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there.
1299 	///.
1300 	@property CssStyle computedStyle() {
1301 		if(_computedStyle is null) {
1302 			auto style = this.getAttribute("style");
1303 		/* we'll treat shitty old html attributes as css here */
1304 			if(this.hasAttribute("width"))
1305 				style ~= "; width: " ~ this.width;
1306 			if(this.hasAttribute("height"))
1307 				style ~= "; height: " ~ this.height;
1308 			if(this.hasAttribute("bgcolor"))
1309 				style ~= "; background-color: " ~ this.bgcolor;
1310 			if(this.tagName == "body" && this.hasAttribute("text"))
1311 				style ~= "; color: " ~ this.text;
1312 			if(this.hasAttribute("color"))
1313 				style ~= "; color: " ~ this.color;
1314 		/* done */
1315 
1316 
1317 			_computedStyle = new CssStyle(null, style); // gives at least something to work with
1318 		}
1319 		return _computedStyle;
1320 	}
1321 
1322 	/// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good
1323 	version(browser) {
1324 		void* expansionHook; ///ditto
1325 		int offsetWidth; ///ditto
1326 		int offsetHeight; ///ditto
1327 		int offsetLeft; ///ditto
1328 		int offsetTop; ///ditto
1329 		Element offsetParent; ///ditto
1330 		bool hasLayout; ///ditto
1331 		int zIndex; ///ditto
1332 
1333 		///ditto
1334 		int absoluteLeft() {
1335 			int a = offsetLeft;
1336 			auto p = offsetParent;
1337 			while(p) {
1338 				a += p.offsetLeft;
1339 				p = p.offsetParent;
1340 			}
1341 
1342 			return a;
1343 		}
1344 
1345 		///ditto
1346 		int absoluteTop() {
1347 			int a = offsetTop;
1348 			auto p = offsetParent;
1349 			while(p) {
1350 				a += p.offsetTop;
1351 				p = p.offsetParent;
1352 			}
1353 
1354 			return a;
1355 		}
1356 	}
1357 
1358 	// Back to the regular dom functions
1359 
1360     public:
1361 
1362 
1363 	/* *******************************
1364 	          DOM Mutation
1365 	*********************************/
1366 
1367 	/// Removes all inner content from the tag; all child text and elements are gone.
1368 	void removeAllChildren()
1369 		out {
1370 			assert(this.children.length == 0);
1371 		}
1372 	body {
1373 		children = null;
1374 	}
1375 
1376 
1377     	/// Appends the given element to this one. The given element must not have a parent already.
1378 	Element appendChild(Element e)
1379 		in {
1380 			assert(e !is null);
1381 			assert(e.parentNode is null);
1382 		}
1383 		out (ret) {
1384 			assert(e.parentNode is this);
1385 			assert(e.parentDocument is this.parentDocument);
1386 			assert(e is ret);
1387 		}
1388 	body {
1389 		selfClosed = false;
1390 		e.parentNode = this;
1391 		e.parentDocument = this.parentDocument;
1392 		children ~= e;
1393 
1394 		sendObserverEvent(DomMutationOperations.appendChild, null, null, e);
1395 
1396 		return e;
1397 	}
1398 
1399 	/// Inserts the second element to this node, right before the first param
1400 	Element insertBefore(in Element where, Element what)
1401 		in {
1402 			assert(where !is null);
1403 			assert(where.parentNode is this);
1404 			assert(what !is null);
1405 			assert(what.parentNode is null);
1406 		}
1407 		out (ret) {
1408 			assert(where.parentNode is this);
1409 			assert(what.parentNode is this);
1410 
1411 			assert(what.parentDocument is this.parentDocument);
1412 			assert(ret is what);
1413 		}
1414 	body {
1415 		foreach(i, e; children) {
1416 			if(e is where) {
1417 				children = children[0..i] ~ what ~ children[i..$];
1418 				what.parentDocument = this.parentDocument;
1419 				what.parentNode = this;
1420 				return what;
1421 			}
1422 		}
1423 
1424 		return what;
1425 
1426 		assert(0);
1427 	}
1428 
1429 	///.
1430 	Element insertAfter(in Element where, Element what)
1431 		in {
1432 			assert(where !is null);
1433 			assert(where.parentNode is this);
1434 			assert(what !is null);
1435 			assert(what.parentNode is null);
1436 		}
1437 		out (ret) {
1438 			assert(where.parentNode is this);
1439 			assert(what.parentNode is this);
1440 			assert(what.parentDocument is this.parentDocument);
1441 			assert(ret is what);
1442 		}
1443 	body {
1444 		foreach(i, e; children) {
1445 			if(e is where) {
1446 				children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $];
1447 				what.parentNode = this;
1448 				what.parentDocument = this.parentDocument;
1449 				return what;
1450 			}
1451 		}
1452 
1453 		return what;
1454 
1455 		assert(0);
1456 	}
1457 
1458 	/// swaps one child for a new thing. Returns the old child which is now parentless.
1459 	Element swapNode(Element child, Element replacement)
1460 		in {
1461 			assert(child !is null);
1462 			assert(replacement !is null);
1463 			assert(child.parentNode is this);
1464 		}
1465 		out(ret) {
1466 			assert(ret is child);
1467 			assert(ret.parentNode is null);
1468 			assert(replacement.parentNode is this);
1469 			assert(replacement.parentDocument is this.parentDocument);
1470 		}
1471 	body {
1472 		foreach(ref c; this.children)
1473 			if(c is child) {
1474 				c.parentNode = null;
1475 				c = replacement;
1476 				c.parentNode = this;
1477 				c.parentDocument = this.parentDocument;
1478 				return child;
1479 			}
1480 		assert(0);
1481 	}
1482 
1483 
1484 	///.
1485 	Element appendText(string text) {
1486 		Element e = new TextNode(parentDocument, text);
1487 		appendChild(e);
1488 		return this;
1489 	}
1490 
1491 	///.
1492 	@property Element[] childElements() {
1493 		Element[] ret;
1494 		foreach(c; children)
1495 			if(c.nodeType == 1)
1496 				ret ~= c;
1497 		return ret;
1498 	}
1499 
1500 	/// Appends the given html to the element, returning the elements appended
1501 	Element[] appendHtml(string html) {
1502 		Document d = new Document("<root>" ~ html ~ "</root>");
1503 		return stealChildren(d.root);
1504 	}
1505 
1506 
1507 	///.
1508 	void insertChildAfter(Element child, Element where)
1509 		in {
1510 			assert(child !is null);
1511 			assert(where !is null);
1512 			assert(where.parentNode is this);
1513 			assert(!selfClosed);
1514 			//assert(isInArray(where, children));
1515 		}
1516 		out {
1517 			assert(child.parentNode is this);
1518 			assert(where.parentNode is this);
1519 			//assert(isInArray(where, children));
1520 			//assert(isInArray(child, children));
1521 		}
1522 	body {
1523 		foreach(ref i, c; children) {
1524 			if(c is where) {
1525 				i++;
1526 				children = children[0..i] ~ child ~ children[i..$];
1527 				child.parentNode = this;
1528 				child.parentDocument = this.parentDocument;
1529 				break;
1530 			}
1531 		}
1532 	}
1533 
1534 	///.
1535 	Element[] stealChildren(Element e, Element position = null)
1536 		in {
1537 			assert(!selfClosed);
1538 			assert(e !is null);
1539 			//if(position !is null)
1540 				//assert(isInArray(position, children));
1541 		}
1542 		out (ret) {
1543 			assert(e.children.length == 0);
1544 			debug foreach(child; ret) {
1545 				assert(child.parentNode is this);
1546 				assert(child.parentDocument is this.parentDocument);
1547 			}
1548 		}
1549 	body {
1550 		foreach(c; e.children) {
1551 			c.parentNode = this;
1552 			c.parentDocument = this.parentDocument;
1553 		}
1554 		if(position is null)
1555 			children ~= e.children;
1556 		else {
1557 			foreach(i, child; children) {
1558 				if(child is position) {
1559 					children = children[0..i] ~
1560 						e.children ~
1561 						children[i..$];
1562 					break;
1563 				}
1564 			}
1565 		}
1566 
1567 		auto ret = std.container.dup(e.children);
1568 		e.children.length = 0;
1569 
1570 		return ret;
1571 	}
1572 
1573     	/// Puts the current element first in our children list. The given element must not have a parent already.
1574 	Element prependChild(Element e)
1575 		in {
1576 			assert(e.parentNode is null);
1577 			assert(!selfClosed);
1578 		}
1579 		out {
1580 			assert(e.parentNode is this);
1581 			assert(e.parentDocument is this.parentDocument);
1582 			assert(children[0] is e);
1583 		}
1584 	body {
1585 		e.parentNode = this;
1586 		e.parentDocument = this.parentDocument;
1587 		children = e ~ children;
1588 		return e;
1589 	}
1590 
1591 
1592 	/**
1593 		Returns a string containing all child elements, formatted such that it could be pasted into
1594 		an XML file.
1595 	*/
1596 	@property string innerHTML(Appender!string where = appender!string()) const {
1597 		if(children is null)
1598 			return "";
1599 
1600 		auto start = where.data.length;
1601 
1602 		foreach(child; children) {
1603 			assert(child !is null);
1604 
1605 			child.writeToAppender(where);
1606 		}
1607 
1608 		return where.data[start .. $];
1609 	}
1610 
1611 	/**
1612 		Takes some html and replaces the element's children with the tree made from the string.
1613 	*/
1614 	@property Element innerHTML(string html, bool strict = false) {
1615 		if(html.length)
1616 			selfClosed = false;
1617 
1618 		if(html.length == 0) {
1619 			// I often say innerHTML = ""; as a shortcut to clear it out,
1620 			// so let's optimize that slightly.
1621 			removeAllChildren();
1622 			return this;
1623 		}
1624 
1625 		auto doc = new Document();
1626 		doc.parse("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document
1627 
1628 		children = doc.root.children;
1629 		foreach(c; children) {
1630 			c.parentNode = this;
1631 			c.parentDocument = this.parentDocument;
1632 		}
1633 
1634 		reparentTreeDocuments();
1635 
1636 		doc.root.children = null;
1637 
1638 		return this;
1639 	}
1640 
1641 	/// ditto
1642 	@property Element innerHTML(Html html) {
1643 		return this.innerHTML(html.source);
1644 	}
1645 
1646 	private void reparentTreeDocuments() {
1647 		foreach(c; this.tree)
1648 			c.parentDocument = this.parentDocument;
1649 	}
1650 
1651 	/**
1652 		Replaces this node with the given html string, which is parsed
1653 
1654 		Note: this invalidates the this reference, since it is removed
1655 		from the tree.
1656 
1657 		Returns the new children that replace this.
1658 	*/
1659 	@property Element[] outerHTML(string html) {
1660 		auto doc = new Document();
1661 		doc.parse("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness
1662 
1663 		children = doc.root.children;
1664 		foreach(c; children) {
1665 			c.parentNode = this;
1666 			c.parentDocument = this.parentDocument;
1667 		}
1668 
1669 
1670 		reparentTreeDocuments();
1671 
1672 
1673 		stripOut();
1674 
1675 		return doc.root.children;
1676 	}
1677 
1678 	/// Returns all the html for this element, including the tag itself.
1679 	/// This is equivalent to calling toString().
1680 	@property string outerHTML() {
1681 		return this.toString();
1682 	}
1683 
1684 	/// This sets the inner content of the element *without* trying to parse it.
1685 	/// You can inject any code in there; this serves as an escape hatch from the dom.
1686 	///
1687 	/// The only times you might actually need it are for < style > and < script > tags in html.
1688 	/// Other than that, innerHTML and/or innerText should do the job.
1689 	@property void innerRawSource(string rawSource) {
1690 		children.length = 0;
1691 		auto rs = new RawSource(parentDocument, rawSource);
1692 		rs.parentNode = this;
1693 
1694 		children ~= rs;
1695 	}
1696 
1697 	///.
1698 	Element replaceChild(Element find, Element replace) 
1699 		in {
1700 			assert(find !is null);
1701 			assert(replace !is null);
1702 			assert(replace.parentNode is null);
1703 		}
1704 		out(ret) {
1705 			assert(ret is replace);
1706 			assert(replace.parentNode is this);
1707 			assert(replace.parentDocument is this.parentDocument);
1708 			assert(find.parentNode is null);
1709 		}
1710 	body {
1711 		for(int i = 0; i < children.length; i++) {
1712 			if(children[i] is find) {
1713 				replace.parentNode = this;
1714 				children[i].parentNode = null;
1715 				children[i] = replace;
1716 				replace.parentDocument = this.parentDocument;
1717 				return replace;
1718 			}
1719 		}
1720 
1721 		throw new Exception("no such child");
1722 	}
1723 
1724 	/**
1725 		Replaces the given element with a whole group.
1726 	*/
1727 	void replaceChild(Element find, Element[] replace)
1728 		in {
1729 			assert(find !is null);
1730 			assert(replace !is null);
1731 			assert(find.parentNode is this);
1732 			debug foreach(r; replace)
1733 				assert(r.parentNode is null);
1734 		}
1735 		out {
1736 			assert(find.parentNode is null);
1737 			assert(children.length >= replace.length);
1738 			debug foreach(child; children)
1739 				assert(child !is find);
1740 			debug foreach(r; replace)
1741 				assert(r.parentNode is this);
1742 		}
1743 	body {
1744 		if(replace.length == 0) {
1745 			removeChild(find);
1746 			return;
1747 		}
1748 		assert(replace.length);
1749 		for(int i = 0; i < children.length; i++) {
1750 			if(children[i] is find) {
1751 				children[i].parentNode = null; // this element should now be dead
1752 				children[i] = replace[0];
1753 				foreach(e; replace) {
1754 					e.parentNode = this;
1755 					e.parentDocument = this.parentDocument;
1756 				}
1757 
1758 				children = .insertAfter(children, i, replace[1..$]);
1759 
1760 				return;
1761 			}
1762 		}
1763 
1764 		throw new Exception("no such child");
1765 	}
1766 
1767 
1768 	/**
1769 		Removes the given child from this list.
1770 
1771 		Returns the removed element.
1772 	*/
1773 	Element removeChild(Element c)
1774 		in {
1775 			assert(c !is null);
1776 			assert(c.parentNode is this);
1777 		}
1778 		out {
1779 			debug foreach(child; children)
1780 				assert(child !is c);
1781 			assert(c.parentNode is null);
1782 		}
1783 	body {
1784 		foreach(i, e; children) {
1785 			if(e is c) {
1786 				children = children[0..i] ~ children [i+1..$];
1787 				c.parentNode = null;
1788 				return c;
1789 			}
1790 		}
1791 
1792 		throw new Exception("no such child");
1793 	}
1794 
1795 	/// This removes all the children from this element, returning the old list.
1796 	Element[] removeChildren()
1797 		out (ret) {
1798 			assert(children.length == 0);
1799 			debug foreach(r; ret)
1800 				assert(r.parentNode is null);
1801 		}
1802 	body {
1803 		Element[] oldChildren = std.container.dup(children);
1804 		foreach(c; oldChildren)
1805 			c.parentNode = null;
1806 
1807 		children.length = 0;
1808 
1809 		return oldChildren;
1810 	}
1811 
1812 	/**
1813 		Fetch the inside text, with all tags stripped out.
1814 
1815 		<p>cool <b>api</b> &amp; code dude<p>
1816 		innerText of that is "cool api & code dude".
1817 	*/
1818 	@property string innerText() const {
1819 		string s;
1820 		foreach(child; children) {
1821 			if(child.nodeType != NodeType.Text)
1822 				s ~= child.innerText;
1823 			else
1824 				s ~= child.nodeValue();
1825 		}
1826 		return s;
1827 	}
1828 
1829 	/**
1830 		Sets the inside text, replacing all children. You don't
1831 		have to worry about entity encoding.
1832 	*/
1833 	@property void innerText(string text) {
1834 		selfClosed = false;
1835 		Element e = new TextNode(parentDocument, text);
1836 		e.parentNode = this;
1837 		children = [e];
1838 	}
1839 
1840 	/**
1841 		Strips this node out of the document, replacing it with the given text
1842 	*/
1843 	@property void outerText(string text) {
1844 		parentNode.replaceChild(this, new TextNode(parentDocument, text));
1845 	}
1846 
1847 	/**
1848 		Same result as innerText; the tag with all inner tags stripped out
1849 	*/
1850 	string outerText() const {
1851 		return innerText();
1852 	}
1853 
1854 
1855 	/* *******************************
1856 	          Miscellaneous
1857 	*********************************/
1858 
1859 	/// This is a full clone of the element
1860 	@property Element cloned()
1861 	/+
1862 		out(ret) {
1863 			// FIXME: not sure why these fail...
1864 			assert(ret.children.length == this.children.length, format("%d %d", ret.children.length, this.children.length));
1865 			assert(ret.tagName == this.tagName);
1866 		}
1867 	body {
1868 	+/
1869 	{
1870 		auto e = new Element(parentDocument, tagName, attributes.dup, selfClosed);
1871 		foreach(child; children) {
1872 			e.appendChild(child.cloned);
1873 		}
1874 
1875 		return e;
1876 	}
1877 
1878 	/// Clones the node. If deepClone is true, clone all inner tags too. If false, only do this tag (and its attributes), but it will have no contents.
1879 	Element cloneNode(bool deepClone) {
1880 		if(deepClone)
1881 			return this.cloned;
1882 
1883 		// shallow clone
1884 		auto e = new Element(parentDocument, tagName, attributes.dup, selfClosed);
1885 		return e;
1886 	}
1887 
1888 	///.
1889 	string nodeValue() const {
1890 		return "";
1891 	}
1892 
1893 	// should return int
1894 	///.
1895 	@property int nodeType() const {
1896 		return 1;
1897 	}
1898 
1899 
1900 	invariant () {
1901 		assert(tagName.indexOf(" ") == -1);
1902 
1903 		if(children !is null)
1904 		debug foreach(child; children) {
1905 		//	assert(parentNode !is null);
1906 			assert(child !is null);
1907 			assert(child.parentNode is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parentNode is null ? "null" : child.parentNode.tagName));
1908 			assert(child !is this);
1909 			assert(child !is parentNode);
1910 		}
1911 
1912 		/+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out
1913 		if(parentNode !is null) {
1914 			// if you have a parent, you should share the same parentDocument; this is appendChild()'s job
1915 			auto lol = cast(TextNode) this;
1916 			assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents);
1917 		}
1918 		+/
1919 		//assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required
1920 		// reason is so you can create these without needing a reference to the document
1921 	}
1922 
1923 	/**
1924 		Turns the whole element, including tag, attributes, and children, into a string which could be pasted into
1925 		an XML file.
1926 	*/
1927 	override string toString() const {
1928 		return writeToAppender();
1929 	}
1930 
1931 	/// This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time.
1932 	/// Returns the string it creates.
1933 	string writeToAppender(Appender!string where = appender!string()) const {
1934 		assert(tagName !is null);
1935 
1936 		where.reserve((this.children.length + 1) * 512);
1937 
1938 		auto start = where.data.length;
1939 
1940 		where.put("<");
1941 		where.put(tagName);
1942 
1943 		foreach(n, v ; attributes) {
1944 			assert(n !is null);
1945 			//assert(v !is null);
1946 			where.put(" ");
1947 			where.put(n);
1948 			where.put("=\"");
1949 			htmlEntitiesEncode(v, where);
1950 			where.put("\"");
1951 		}
1952 
1953 		if(selfClosed){
1954 			where.put(" />");
1955 			return where.data[start .. $];
1956 		}
1957 
1958 		where.put('>');
1959 
1960 		innerHTML(where);
1961 
1962 		where.put("</");
1963 		where.put(tagName);
1964 		where.put('>');
1965 
1966 		return where.data[start .. $];
1967 	}
1968 
1969 	/**
1970 		Returns a lazy range of all its children, recursively.
1971 	*/
1972 	@property ElementStream tree() {
1973 		return new ElementStream(this);
1974 	}
1975 
1976 	// I moved these from Form because they are generally useful.
1977 	// Ideally, I'd put them in arsd.html and use UFCS, but that doesn't work with the opDispatch here.
1978 	/// Tags: HTML, HTML5
1979 	// FIXME: add overloads for other label types... 
1980 	Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) {
1981 		auto fs = this;
1982 		auto i = fs.addChild("label");
1983 		i.addChild("span", label);
1984 		Element input;
1985 		if(type == "textarea")
1986 			input = i.addChild("textarea").
1987 			setAttribute("name", name).
1988 			setAttribute("rows", "6");
1989 		else
1990 			input = i.addChild("input").
1991 			setAttribute("name", name).
1992 			setAttribute("type", type);
1993 
1994 		// these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later.
1995 		fieldOptions.applyToElement(input);
1996 		return i;
1997 	}
1998 
1999 	Element addField(Element label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) {
2000 		auto fs = this;
2001 		auto i = fs.addChild("label");
2002 		i.addChild(label);
2003 		Element input;
2004 		if(type == "textarea")
2005 			input = i.addChild("textarea").
2006 			setAttribute("name", name).
2007 			setAttribute("rows", "6");
2008 		else
2009 			input = i.addChild("input").
2010 			setAttribute("name", name).
2011 			setAttribute("type", type);
2012 
2013 		// these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later.
2014 		fieldOptions.applyToElement(input);
2015 		return i;
2016 	}
2017 
2018 	Element addField(string label, string name, FormFieldOptions fieldOptions) {
2019 		return addField(label, name, "text", fieldOptions);
2020 	}
2021 
2022 	Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) {
2023 		auto fs = this;
2024 		auto i = fs.addChild("label");
2025 		i.addChild("span", label);
2026 		auto sel = i.addChild("select").setAttribute("name", name);
2027 
2028 		foreach(k, opt; options)
2029 			sel.addChild("option", opt, k);
2030 
2031 		// FIXME: implement requirements somehow
2032 
2033 		return i;
2034 	}
2035 
2036 	Element addSubmitButton(string label = null) {
2037 		auto t = this;
2038 		auto holder = t.addChild("div");
2039 		holder.addClass("submit-holder");
2040 		auto i = holder.addChild("input");
2041 		i.type = "submit";
2042 		if(label.length)
2043 			i.value = label;
2044 		return holder;
2045 	}
2046 
2047 }
2048 
2049 ///.
2050 class DocumentFragment : Element {
2051 	///.
2052 	this(Document _parentDocument) {
2053 		tagName = "#fragment";
2054 		super(_parentDocument);
2055 	}
2056 
2057 	///.
2058 	override string writeToAppender(Appender!string where = appender!string()) const {
2059 		return this.innerHTML(where);
2060 	}
2061 }
2062 
2063 /// Given text, encode all html entities on it - &, <, >, and ". This function also
2064 /// encodes all 8 bit characters as entities, thus ensuring the resultant text will work
2065 /// even if your charset isn't set right.
2066 ///
2067 /// The output parameter can be given to append to an existing buffer. You don't have to
2068 /// pass one; regardless, the return value will be usable for you, with just the data encoded.
2069 string htmlEntitiesEncode(string data, Appender!string output = appender!string()) {
2070 	// if there's no entities, we can save a lot of time by not bothering with the
2071 	// decoding loop. This check cuts the net toString time by better than half in my test.
2072 	// let me know if it made your tests worse though, since if you use an entity in just about
2073 	// every location, the check will add time... but I suspect the average experience is like mine
2074 	// since the check gives up as soon as it can anyway.
2075 
2076 	bool shortcut = true;
2077 	foreach(char c; data) {
2078 		// non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it.
2079 		if(c == '<' || c == '>' || c == '"' || c == '&' || cast(uint) c > 127) {
2080 			shortcut = false; // there's actual work to be done
2081 			break;
2082 		}
2083 	}
2084 
2085 	if(shortcut) {
2086 		output.put(data);
2087 		return data;
2088 	}
2089 
2090 	auto start = output.data.length;
2091 
2092 	output.reserve(data.length + 64); // grab some extra space for the encoded entities
2093 
2094 	foreach(dchar d; data) {
2095 		if(d == '&')
2096 			output.put("&amp;");
2097 		else if (d == '<')
2098 			output.put("&lt;");
2099 		else if (d == '>')
2100 			output.put("&gt;");
2101 		else if (d == '\"')
2102 			output.put("&quot;");
2103 //		else if (d == '\'')
2104 //			output.put("&#39;"); // if you are in an attribute, it might be important to encode for the same reason as double quotes
2105 			// FIXME: should I encode apostrophes too? as &#39;... I could also do space but if your html is so bad that it doesn't
2106 			// quote attributes at all, maybe you deserve the xss. Encoding spaces will make everything really ugly so meh
2107 			// idk about apostrophes though. Might be worth it, might not.
2108 		else if (d < 128 && d > 0)
2109 			output.put(d);
2110 		else
2111 			output.put("&#" ~ std.conv.to!string(cast(int) d) ~ ";");
2112 	}
2113 
2114 	//assert(output !is null); // this fails on empty attributes.....
2115 	return output.data[start .. $];
2116 
2117 //	data = data.replace("\u00a0", "&nbsp;");
2118 }
2119 
2120 /// An alias for htmlEntitiesEncode; it works for xml too
2121 string xmlEntitiesEncode(string data) {
2122 	return htmlEntitiesEncode(data);
2123 }
2124 
2125 /// This helper function is used for decoding html entities. It has a hard-coded list of entities and characters.
2126 dchar parseEntity(in dchar[] entity) {
2127 	switch(entity[1..$-1]) {
2128 		case "quot":
2129 			return '"';
2130 		case "apos":
2131 			return '\'';
2132 		case "lt":
2133 			return '<';
2134 		case "gt":
2135 			return '>';
2136 		case "amp":
2137 			return '&';
2138 		// the next are html rather than xml
2139 
2140 		case "Agrave": return '\u00C0';
2141 		case "Aacute": return '\u00C1';
2142 		case "Acirc": return '\u00C2';
2143 		case "Atilde": return '\u00C3';
2144 		case "Auml": return '\u00C4';
2145 		case "Aring": return '\u00C5';
2146 		case "AElig": return '\u00C6';
2147 		case "Ccedil": return '\u00C7';
2148 		case "Egrave": return '\u00C8';
2149 		case "Eacute": return '\u00C9';
2150 		case "Ecirc": return '\u00CA';
2151 		case "Euml": return '\u00CB';
2152 		case "Igrave": return '\u00CC';
2153 		case "Iacute": return '\u00CD';
2154 		case "Icirc": return '\u00CE';
2155 		case "Iuml": return '\u00CF';
2156 		case "ETH": return '\u00D0';
2157 		case "Ntilde": return '\u00D1';
2158 		case "Ograve": return '\u00D2';
2159 		case "Oacute": return '\u00D3';
2160 		case "Ocirc": return '\u00D4';
2161 		case "Otilde": return '\u00D5';
2162 		case "Ouml": return '\u00D6';
2163 		case "Oslash": return '\u00D8';
2164 		case "Ugrave": return '\u00D9';
2165 		case "Uacute": return '\u00DA';
2166 		case "Ucirc": return '\u00DB';
2167 		case "Uuml": return '\u00DC';
2168 		case "Yacute": return '\u00DD';
2169 		case "THORN": return '\u00DE';
2170 		case "szlig": return '\u00DF';
2171 		case "agrave": return '\u00E0';
2172 		case "aacute": return '\u00E1';
2173 		case "acirc": return '\u00E2';
2174 		case "atilde": return '\u00E3';
2175 		case "auml": return '\u00E4';
2176 		case "aring": return '\u00E5';
2177 		case "aelig": return '\u00E6';
2178 		case "ccedil": return '\u00E7';
2179 		case "egrave": return '\u00E8';
2180 		case "eacute": return '\u00E9';
2181 		case "ecirc": return '\u00EA';
2182 		case "euml": return '\u00EB';
2183 		case "igrave": return '\u00EC';
2184 		case "iacute": return '\u00ED';
2185 		case "icirc": return '\u00EE';
2186 		case "iuml": return '\u00EF';
2187 		case "eth": return '\u00F0';
2188 		case "ntilde": return '\u00F1';
2189 		case "ograve": return '\u00F2';
2190 		case "oacute": return '\u00F3';
2191 		case "ocirc": return '\u00F4';
2192 		case "otilde": return '\u00F5';
2193 		case "ouml": return '\u00F6';
2194 		case "oslash": return '\u00F8';
2195 		case "ugrave": return '\u00F9';
2196 		case "uacute": return '\u00FA';
2197 		case "ucirc": return '\u00FB';
2198 		case "uuml": return '\u00FC';
2199 		case "yacute": return '\u00FD';
2200 		case "thorn": return '\u00FE';
2201 		case "yuml": return '\u00FF';
2202 		case "nbsp": return '\u00A0';
2203 		case "iexcl": return '\u00A1';
2204 		case "cent": return '\u00A2';
2205 		case "pound": return '\u00A3';
2206 		case "curren": return '\u00A4';
2207 		case "yen": return '\u00A5';
2208 		case "brvbar": return '\u00A6';
2209 		case "sect": return '\u00A7';
2210 		case "uml": return '\u00A8';
2211 		case "copy": return '\u00A9';
2212 		case "ordf": return '\u00AA';
2213 		case "laquo": return '\u00AB';
2214 		case "not": return '\u00AC';
2215 		case "shy": return '\u00AD';
2216 		case "reg": return '\u00AE';
2217 		case "ldquo": return '\u201c';
2218 		case "rdquo": return '\u201d';
2219 		case "macr": return '\u00AF';
2220 		case "deg": return '\u00B0';
2221 		case "plusmn": return '\u00B1';
2222 		case "sup2": return '\u00B2';
2223 		case "sup3": return '\u00B3';
2224 		case "acute": return '\u00B4';
2225 		case "micro": return '\u00B5';
2226 		case "para": return '\u00B6';
2227 		case "middot": return '\u00B7';
2228 		case "cedil": return '\u00B8';
2229 		case "sup1": return '\u00B9';
2230 		case "ordm": return '\u00BA';
2231 		case "raquo": return '\u00BB';
2232 		case "frac14": return '\u00BC';
2233 		case "frac12": return '\u00BD';
2234 		case "frac34": return '\u00BE';
2235 		case "iquest": return '\u00BF';
2236 		case "times": return '\u00D7';
2237 		case "divide": return '\u00F7';
2238 		case "OElig": return '\u0152';
2239 		case "oelig": return '\u0153';
2240 		case "Scaron": return '\u0160';
2241 		case "scaron": return '\u0161';
2242 		case "Yuml": return '\u0178';
2243 		case "fnof": return '\u0192';
2244 		case "circ": return '\u02C6';
2245 		case "tilde": return '\u02DC';
2246 		case "trade": return '\u2122';
2247 
2248 
2249 		/*
2250 		case "cent":
2251 		case "pound":
2252 		case "sect":
2253 		case "deg":
2254 		case "micro"
2255 		*/
2256 		/*
2257 		case "egrave":
2258 			return '\u0038';
2259 		case "Egrave":
2260 			return '\u00c8';
2261 		case "times":
2262 			return '\u00d7';
2263 		case "hellip":
2264 			return '\u2026';
2265 		case "laquo":
2266 			return '\u00ab';
2267 		case "raquo":
2268 			return '\u00bb';
2269 		case "lsquo":
2270 			return '\u2018';
2271 		case "rsquo":
2272 			return '\u2019';
2273 		case "ldquo":
2274 			return '\u201c';
2275 		case "rdquo":
2276 			return '\u201d';
2277 		case "reg":
2278 			return '\u00ae';
2279 		case "trade":
2280 			return '\u2122';
2281 		case "nbsp":
2282 			return '\u00a0';
2283 		case "copy":
2284 			return '\u00a9';
2285 		case "eacute":
2286 			return '\u00e9';
2287 		case "mdash":
2288 			return '\u2014';
2289 		case "ndash":
2290 			return '\u2013';
2291 		case "Omicron":
2292 			return '\u039f';
2293 		case "omicron":
2294 			return '\u03bf';
2295 		case "middot":
2296 			return '\u00b7';
2297 		*/
2298 		// and handling numeric entities
2299 		default:
2300 			if(entity[1] == '#') {
2301 				if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) {
2302 					auto hex = entity[3..$-1];
2303 
2304 					auto p = intFromHex(to!string(hex).toLower());
2305 					return cast(dchar) p;
2306 				} else {
2307 					auto decimal = entity[2..$-1];
2308 
2309 					// dealing with broken html entities
2310 					while(decimal.length && (decimal[0] < '0' || decimal[0] >   '9'))
2311 						decimal = decimal[1 .. $];
2312 
2313 					if(decimal.length == 0)
2314 						return ' '; // this is really broken html
2315 					// done with dealing with broken stuff
2316 
2317 					auto p = std.conv.to!int(decimal);
2318 					return cast(dchar) p;
2319 				}
2320 			} else
2321 				return '?';
2322 	}
2323 
2324 	assert(0);
2325 }
2326 
2327 import std.utf;
2328 import std.stdio;
2329 
2330 /// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string.
2331 /// By default, it uses loose mode - it will try to return a useful string from garbage input too.
2332 /// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input.
2333 string htmlEntitiesDecode(string data, bool strict = false) {
2334 	// this check makes a *big* difference; about a 50% improvement of parse speed on my test.
2335 	if(data.indexOf("&") == -1) // all html entities begin with &
2336 		return data; // if there are no entities in here, we can return the original slice and save some time
2337 
2338 	char[] a; // this seems to do a *better* job than appender!
2339 
2340 	char[4] buffer;
2341 
2342 	bool tryingEntity = false;
2343 	dchar[] entityBeingTried;
2344 	int entityAttemptIndex = 0;
2345 
2346 	foreach(dchar ch; data) {
2347 		if(tryingEntity) {
2348 			entityAttemptIndex++;
2349 			entityBeingTried ~= ch;
2350 
2351 			// I saw some crappy html in the wild that looked like &0&#1111; this tries to handle that.
2352 			if(ch == '&') {
2353 				if(strict)
2354 					throw new Exception("unterminated entity; & inside another at " ~ to!string(entityBeingTried));
2355 
2356 				// if not strict, let's try to parse both.
2357 
2358 				if(entityBeingTried == "&&")
2359 					a ~= "&"; // double amp means keep the first one, still try to parse the next one
2360 				else
2361 					a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried))];
2362 
2363 				// tryingEntity is still true
2364 				entityBeingTried = entityBeingTried[0 .. 1]; // keep the &
2365 				entityAttemptIndex = 0; // restarting o this
2366 			} else
2367 			if(ch == ';') {
2368 				tryingEntity = false;
2369 				a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried))];
2370 			} else if(ch == ' ') {
2371 				// e.g. you &amp i
2372 				if(strict)
2373 					throw new Exception("unterminated entity at " ~ to!string(entityBeingTried));
2374 				else {
2375 					tryingEntity = false;
2376 					a ~= to!(char[])(entityBeingTried);
2377 				}
2378 			} else {
2379 				if(entityAttemptIndex >= 9) {
2380 					if(strict)
2381 						throw new Exception("unterminated entity at " ~ to!string(entityBeingTried));
2382 					else {
2383 						tryingEntity = false;
2384 						a ~= to!(char[])(entityBeingTried);
2385 					}
2386 				}
2387 			}
2388 		} else {
2389 			if(ch == '&') {
2390 				tryingEntity = true;
2391 				entityBeingTried = null;
2392 				entityBeingTried ~= ch;
2393 				entityAttemptIndex = 0;
2394 			} else {
2395 				a ~= buffer[0 .. std.utf.encode(buffer, ch)];
2396 			}
2397 		}
2398 	}
2399 
2400 	if(tryingEntity) {
2401 		if(strict)
2402 			throw new Exception("unterminated entity at " ~ to!string(entityBeingTried));
2403 
2404 		// otherwise, let's try to recover, at least so we don't drop any data
2405 		a ~= to!string(entityBeingTried);
2406 		// FIXME: what if we have "cool &amp"? should we try to parse it?
2407 	}
2408 
2409 	return cast(string) a; // assumeUnique is actually kinda slow, lol
2410 }
2411 
2412 abstract class SpecialElement : Element {
2413 	this(Document _parentDocument) {
2414 		super(_parentDocument);
2415 	}
2416 
2417 	///.
2418 	override Element appendChild(Element e) {
2419 		assert(0, "Cannot append to a special node");
2420 	}
2421 
2422 	///.
2423 	@property override int nodeType() const {
2424 		return 100;
2425 	}
2426 }
2427 
2428 ///.
2429 class RawSource : SpecialElement {
2430 	///.
2431 	this(Document _parentDocument, string s) {
2432 		super(_parentDocument);
2433 		source = s;
2434 		tagName = "#raw";
2435 	}
2436 
2437 	///.
2438 	override string nodeValue() const {
2439 		return this.toString();
2440 	}
2441 
2442 	///.
2443 	override string writeToAppender(Appender!string where = appender!string()) const {
2444 		where.put(source);
2445 		return source;
2446 	}
2447 
2448 	///.
2449 	string source;
2450 }
2451 
2452 abstract class ServerSideCode : SpecialElement {
2453 	this(Document _parentDocument, string type) {
2454 		super(_parentDocument);
2455 		tagName = "#" ~ type;
2456 	}
2457 
2458 	///.
2459 	override string nodeValue() const {
2460 		return this.source;
2461 	}
2462 
2463 	///.
2464 	override string writeToAppender(Appender!string where = appender!string()) const {
2465 		auto start = where.data.length;
2466 		where.put("<");
2467 		where.put(source);
2468 		where.put(">");
2469 		return where.data[start .. $];
2470 	}
2471 
2472 	///.
2473 	string source;
2474 }
2475 
2476 ///.
2477 class PhpCode : ServerSideCode {
2478 	///.
2479 	this(Document _parentDocument, string s) {
2480 		super(_parentDocument, "php");
2481 		source = s;
2482 	}
2483 }
2484 
2485 ///.
2486 class AspCode : ServerSideCode {
2487 	///.
2488 	this(Document _parentDocument, string s) {
2489 		super(_parentDocument, "asp");
2490 		source = s;
2491 	}
2492 }
2493 
2494 ///.
2495 class BangInstruction : SpecialElement {
2496 	///.
2497 	this(Document _parentDocument, string s) {
2498 		super(_parentDocument);
2499 		source = s;
2500 		tagName = "#bpi";
2501 	}
2502 
2503 	///.
2504 	override string nodeValue() const {
2505 		return this.source;
2506 	}
2507 
2508 	///.
2509 	override string writeToAppender(Appender!string where = appender!string()) const {
2510 		auto start = where.data.length;
2511 		where.put("<!");
2512 		where.put(source);
2513 		where.put(">");
2514 		return where.data[start .. $];
2515 	}
2516 
2517 	///.
2518 	string source;
2519 }
2520 
2521 ///.
2522 class QuestionInstruction : SpecialElement {
2523 	///.
2524 	this(Document _parentDocument, string s) {
2525 		super(_parentDocument);
2526 		source = s;
2527 		tagName = "#qpi";
2528 	}
2529 
2530 	///.
2531 	override string nodeValue() const {
2532 		return this.source;
2533 	}
2534 
2535 	///.
2536 	override string writeToAppender(Appender!string where = appender!string()) const {
2537 		auto start = where.data.length;
2538 		where.put("<");
2539 		where.put(source);
2540 		where.put(">");
2541 		return where.data[start .. $];
2542 	}
2543 
2544 	///.
2545 	string source;
2546 }
2547 
2548 ///.
2549 class HtmlComment : SpecialElement {
2550 	///.
2551 	this(Document _parentDocument, string s) {
2552 		super(_parentDocument);
2553 		source = s;
2554 		tagName = "#comment";
2555 	}
2556 
2557 	///.
2558 	override string nodeValue() const {
2559 		return this.source;
2560 	}
2561 
2562 	///.
2563 	override string writeToAppender(Appender!string where = appender!string()) const {
2564 		auto start = where.data.length;
2565 		where.put("<!--");
2566 		where.put(source);
2567 		where.put("-->");
2568 		return where.data[start .. $];
2569 	}
2570 
2571 	///.
2572 	string source;
2573 }
2574 
2575 
2576 
2577 
2578 ///.
2579 class TextNode : Element {
2580   public:
2581 	///.
2582 	this(Document _parentDocument, string e) {
2583 		super(_parentDocument);
2584 		contents = e;
2585 		tagName = "#text";
2586 	}
2587 
2588 	string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes
2589 
2590 	///.
2591 	static TextNode fromUndecodedString(Document _parentDocument, string html) {
2592 		auto e = new TextNode(_parentDocument, "");
2593 		e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose);
2594 		return e;
2595 	}
2596 
2597 	///.
2598 	override @property Element cloned() {
2599 		auto n = new TextNode(parentDocument, contents);
2600 		return n;
2601 	}
2602 
2603 	///.
2604 	override string nodeValue() const {
2605 		return this.contents; //toString();
2606 	}
2607 
2608 	///.
2609 	@property override int nodeType() const {
2610 		return NodeType.Text;
2611 	}
2612 
2613 	///.
2614 	override string writeToAppender(Appender!string where = appender!string()) const {
2615 		string s;
2616 		if(contents.length)
2617 			s = htmlEntitiesEncode(contents, where);
2618 		else
2619 			s = "";
2620 
2621 		assert(s !is null);
2622 		return s;
2623 	}
2624 
2625 	///.
2626 	override Element appendChild(Element e) {
2627 		assert(0, "Cannot append to a text node");
2628 	}
2629 
2630 	///.
2631 	string contents;
2632 	// alias contents content; // I just mistype this a lot, 
2633 }
2634 
2635 /**
2636 	There are subclasses of Element offering improved helper
2637 	functions for the element in HTML.
2638 */
2639 
2640 ///.
2641 class Link : Element {
2642 
2643 	///.
2644 	this(Document _parentDocument) {
2645 		super(_parentDocument);
2646 		this.tagName = "a";
2647 	}
2648 
2649 
2650 	///.
2651 	this(string href, string text) {
2652 		super("a");
2653 		setAttribute("href", href);
2654 		innerText = text;
2655 	}
2656 /+
2657 	/// Returns everything in the href EXCEPT the query string
2658 	@property string targetSansQuery() {
2659 
2660 	}
2661 
2662 	///.
2663 	@property string domainName() {
2664 
2665 	}
2666 
2667 	///.
2668 	@property string path
2669 +/
2670 	/// This gets a variable from the URL's query string.
2671 	string getValue(string name) {
2672 		auto vars = variablesHash();
2673 		if(name in vars)
2674 			return vars[name];
2675 		return null;
2676 	}
2677 
2678 	private string[string] variablesHash() {
2679 		string href = getAttribute("href");
2680 		if(href is null)
2681 			return null;
2682 
2683 		auto ques = href.indexOf("?");
2684 		string str = "";
2685 		if(ques != -1) {
2686 			str = href[ques+1..$];
2687 
2688 			auto fragment = str.indexOf("#");
2689 			if(fragment != -1)
2690 				str = str[0..fragment];
2691 		}
2692 
2693 		string[] variables = str.split("&");
2694 
2695 		string[string] hash;
2696 
2697 		foreach(var; variables) {
2698 			auto index = var.indexOf("=");
2699 			if(index == -1)
2700 				hash[var] = "";
2701 			else {
2702 				hash[decodeComponent(var[0..index])] = decodeComponent(var[index + 1 .. $]);
2703 			}
2704 		}
2705 
2706 		return hash;
2707 	}
2708 
2709 	///.
2710 	/*private*/ void updateQueryString(string[string] vars) {
2711 		string href = getAttribute("href");
2712 
2713 		auto question = href.indexOf("?");
2714 		if(question != -1)
2715 			href = href[0..question];
2716 
2717 		string frag = "";
2718 		auto fragment = href.indexOf("#");
2719 		if(fragment != -1) {
2720 			frag = href[fragment..$];
2721 			href = href[0..fragment];
2722 		}
2723 
2724 		string query = "?";
2725 		bool first = true;
2726 		foreach(name, value; vars) {
2727 			if(!first)
2728 				query ~= "&";
2729 			else
2730 				first = false;
2731 
2732 			query ~= encodeComponent(name);
2733 			if(value.length)
2734 				query ~= "=" ~ encodeComponent(value);
2735 		}
2736 
2737 		if(query != "?")
2738 			href ~= query;
2739 
2740 		href ~= frag;
2741 
2742 		setAttribute("href", href);
2743 	}
2744 
2745 	/// Sets or adds the variable with the given name to the given value
2746 	/// It automatically URI encodes the values and takes care of the ? and &.
2747 	void setValue(string name, string variable) {
2748 		auto vars = variablesHash();
2749 		vars[name] = variable;
2750 
2751 		updateQueryString(vars);
2752 	}
2753 
2754 	/// Removes the given variable from the query string
2755 	void removeValue(string name) {
2756 		auto vars = variablesHash();
2757 		vars.remove(name);
2758 
2759 		updateQueryString(vars);
2760 	}
2761 
2762 	/*
2763 	///.
2764 	override string toString() {
2765 
2766 	}
2767 
2768 	///.
2769 	override string getAttribute(string name) {
2770 		if(name == "href") {
2771 
2772 		} else
2773 			return super.getAttribute(name);
2774 	}
2775 	*/
2776 }
2777 
2778 ///.
2779 class Form : Element {
2780 
2781 	///.
2782 	this(Document _parentDocument) {
2783 		super(_parentDocument);
2784 		tagName = "form";
2785 	}
2786 
2787 	override Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) {
2788 		auto t = this.querySelector("fieldset div");
2789 		if(t is null)
2790 			return super.addField(label, name, type, fieldOptions);
2791 		else
2792 			return t.addField(label, name, type, fieldOptions);
2793 	}
2794 
2795 	override Element addField(string label, string name, FormFieldOptions fieldOptions) {
2796 		auto type = "text";
2797 		auto t = this.querySelector("fieldset div");
2798 		if(t is null)
2799 			return super.addField(label, name, type, fieldOptions);
2800 		else
2801 			return t.addField(label, name, type, fieldOptions);
2802 	}
2803 
2804 	override Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) {
2805 		auto t = this.querySelector("fieldset div");
2806 		if(t is null)
2807 			return super.addField(label, name, options, fieldOptions);
2808 		else
2809 			return t.addField(label, name, options, fieldOptions);
2810 	}
2811 
2812 	// FIXME: doesn't handle arrays; multiple fields can have the same name
2813 
2814 	/// Set's the form field's value. For input boxes, this sets the value attribute. For
2815 	/// textareas, it sets the innerText. For radio boxes and select boxes, it removes
2816 	/// the checked/selected attribute from all, and adds it to the one matching the value.
2817 	/// For checkboxes, if the value is non-null and not empty, it checks the box.
2818 
2819 	/// If you set a value that doesn't exist, it throws an exception if makeNew is false.
2820 	/// Otherwise, it makes a new input with type=hidden to keep the value.
2821 	void setValue(string field, string value, bool makeNew = true) {
2822 		auto eles = getField(field);
2823 		if(eles.length == 0) {
2824 			if(makeNew) {
2825 				addInput(field, value);
2826 				return;
2827 			} else
2828 				throw new Exception("form field does not exist");
2829 		}
2830 
2831 		if(eles.length == 1) {
2832 			auto e = eles[0];
2833 			switch(e.tagName) {
2834 				default: assert(0);
2835 				case "textarea":
2836 					e.innerText = value;
2837 				break;
2838 				case "input":
2839 					string type = e.getAttribute("type");
2840 					if(type is null) {
2841 						e.value = value;
2842 						return;
2843 					}
2844 					switch(type) {
2845 						case "checkbox":
2846 						case "radio":
2847 							if(value.length)
2848 								e.setAttribute("checked", "checked");
2849 							else
2850 								e.removeAttribute("checked");
2851 						break;
2852 						default:
2853 							e.value = value;
2854 							return;
2855 					}
2856 				break;
2857 				case "select":
2858 					bool found = false;
2859 					foreach(child; e.tree) {
2860 						if(child.tagName != "option")
2861 							continue;
2862 						string val = child.getAttribute("value");
2863 						if(val is null)
2864 							val = child.innerText;
2865 						if(val == value) {
2866 							child.setAttribute("selected", "selected");
2867 							found = true;
2868 						} else
2869 							child.removeAttribute("selected");
2870 					}
2871 
2872 					if(!found) {
2873 						e.addChild("option", value)
2874 						.setAttribute("selected", "selected");
2875 					}
2876 				break;
2877 			}
2878 		} else {
2879 			// assume radio boxes
2880 			foreach(e; eles) {
2881 				string val = e.getAttribute("value");
2882 				//if(val is null)
2883 				//	throw new Exception("don't know what to do with radio boxes with null value");
2884 				if(val == value)
2885 					e.setAttribute("checked", "checked");
2886 				else
2887 					e.removeAttribute("checked");
2888 			}
2889 		}
2890 	}
2891 
2892 	/// This takes an array of strings and adds hidden <input> elements for each one of them. Unlike setValue,
2893 	/// it makes no attempt to find and modify existing elements in the form to the new values.
2894 	void addValueArray(string key, string[] arrayOfValues) {
2895 		foreach(arr; arrayOfValues)
2896 			addChild("input", key, arr);
2897 	}
2898 
2899 	/// Gets the value of the field; what would be given if it submitted right now. (so
2900 	/// it handles select boxes and radio buttons too). For checkboxes, if a value isn't
2901 	/// given, but it is checked, it returns "checked", since null and "" are indistinguishable
2902 	string getValue(string field) {
2903 		auto eles = getField(field);
2904 		if(eles.length == 0)
2905 			return "";
2906 		if(eles.length == 1) {
2907 			auto e = eles[0];
2908 			switch(e.tagName) {
2909 				default: assert(0);
2910 				case "input":
2911 					if(e.type == "checkbox") {
2912 						if(e.checked)
2913 							return e.value.length ? e.value : "checked";
2914 						return "";
2915 					} else
2916 						return e.value;
2917 				case "textarea":
2918 					return e.innerText;
2919 				case "select":
2920 					foreach(child; e.tree) {
2921 						if(child.tagName != "option")
2922 							continue;
2923 						if(child.selected)
2924 							return child.value;
2925 					}
2926 				break;
2927 			}
2928 		} else {
2929 			// assuming radio
2930 			foreach(e; eles) {
2931 				if(e.checked)
2932 					return e.value;
2933 			}
2934 		}
2935 
2936 		return "";
2937 	}
2938 
2939 	// FIXME: doesn't handle multiple elements with the same name (except radio buttons)
2940 	///.
2941 	string getPostableData() {
2942 		bool[string] namesDone;
2943 
2944 		string ret;
2945 		bool outputted = false;
2946 
2947 		foreach(e; getElementsBySelector("[name]")) {
2948 			if(e.name in namesDone)
2949 				continue;
2950 
2951 			if(outputted)
2952 				ret ~= "&";
2953 			else
2954 				outputted = true;
2955 
2956 			ret ~= std.uri.encodeComponent(e.name) ~ "=" ~ std.uri.encodeComponent(getValue(e.name));
2957 
2958 			namesDone[e.name] = true;
2959 		}
2960 
2961 		return ret;
2962 	}
2963 
2964 	/// Gets the actual elements with the given name
2965 	Element[] getField(string name) {
2966 		Element[] ret;
2967 		foreach(e; tree) {
2968 			if(e.name == name)
2969 				ret ~= e;
2970 		}
2971 		return ret;
2972 	}
2973 
2974 	/// Grabs the <label> with the given for tag, if there is one.
2975 	Element getLabel(string forId) {
2976 		foreach(e; tree)
2977 			if(e.tagName == "label" && e.getAttribute("for") == forId)
2978 				return e;
2979 		return null;
2980 	}
2981 
2982 	/// Adds a new INPUT field to the end of the form with the given attributes.
2983 	Element addInput(string name, string value, string type = "hidden") {
2984 		auto e = new Element(parentDocument, "input", null, true);
2985 		e.name = name;
2986 		e.value = value;
2987 		e.type = type;
2988 
2989 		appendChild(e);
2990 
2991 		return e;
2992 	}
2993 
2994 	/// Removes the given field from the form. It finds the element and knocks it right out.
2995 	void removeField(string name) {
2996 		foreach(e; getField(name))
2997 			e.parentNode.removeChild(e);
2998 	}
2999 
3000 	/+
3001 	/// Returns all form members.
3002 	@property Element[] elements() {
3003 
3004 	}
3005 
3006 	///.
3007 	string opDispatch(string name)(string v = null)
3008 		// filter things that should actually be attributes on the form
3009 		if( name != "method" && name != "action" && name != "enctype"
3010 		 && name != "style"  && name != "name" && name != "id" && name != "class")
3011 	{
3012 
3013 	}
3014 	+/
3015 /+
3016 	void submit() {
3017 		// take its elements and submit them through http
3018 	}
3019 +/
3020 }
3021 
3022 import std.conv;
3023 
3024 ///.
3025 class Table : Element {
3026 
3027 	///.
3028 	this(Document _parentDocument) {
3029 		super(_parentDocument);
3030 		tagName = "table";
3031 	}
3032 
3033 	///.
3034 	Element th(T)(T t) {
3035 		Element e;
3036 		if(parentDocument !is null)
3037 			e = parentDocument.createElement("th");
3038 		else
3039 			e = Element.make("th");
3040 		static if(is(T == Html))
3041 			e.innerHTML = t;
3042 		else
3043 			e.innerText = to!string(t);
3044 		return e;
3045 	}
3046 
3047 	///.
3048 	Element td(T)(T t) {
3049 		Element e;
3050 		if(parentDocument !is null)
3051 			e = parentDocument.createElement("td");
3052 		else
3053 			e = Element.make("td");
3054 		static if(is(T == Html))
3055 			e.innerHTML = t;
3056 		else
3057 			e.innerText = to!string(t);
3058 		return e;
3059 	}
3060 
3061 	/// .
3062 	Element appendHeaderRow(T...)(T t) {
3063 		return appendRowInternal("th", "thead", t);
3064 	}
3065 
3066 	/// .
3067 	Element appendFooterRow(T...)(T t) {
3068 		return appendRowInternal("td", "tfoot", t);
3069 	}
3070 
3071 	/// .
3072 	Element appendRow(T...)(T t) {
3073 		return appendRowInternal("td", "tbody", t);
3074 	}
3075 
3076 	void addColumnClasses(string[] classes...) {
3077 		auto grid = getGrid();
3078 		foreach(row; grid)
3079 		foreach(i, cl; classes) {
3080 			if(cl.length)
3081 			if(i < row.length)
3082 				row[i].addClass(cl);
3083 		}
3084 	}
3085 
3086 	private Element appendRowInternal(T...)(string innerType, string findType, T t) {
3087 		Element row = Element.make("tr");
3088 
3089 		foreach(e; t) {
3090 			static if(is(typeof(e) : Element)) {
3091 				if(e.tagName == "td" || e.tagName == "th")
3092 					row.appendChild(e);
3093 				else {
3094 					Element a = Element.make(innerType);
3095 
3096 					a.appendChild(e);
3097 
3098 					row.appendChild(a);
3099 				}
3100 			} else static if(is(typeof(e) == Html)) {
3101 				Element a = Element.make(innerType);
3102 				a.innerHTML = e.source;
3103 				row.appendChild(a);
3104 			} else static if(is(typeof(e) == Element[])) {
3105 				Element a = Element.make(innerType);
3106 				foreach(ele; e)
3107 					a.appendChild(ele);
3108 				row.appendChild(a);
3109 			} else {
3110 				Element a = Element.make(innerType);
3111 				a.innerText = to!string(e);
3112 				row.appendChild(a);
3113 			}
3114 		}
3115 
3116 		foreach(e; children) {
3117 			if(e.tagName == findType) {
3118 				e.appendChild(row);
3119 				return row;
3120 			}
3121 		}
3122 
3123 		// the type was not found if we are here... let's add it so it is well-formed
3124 		auto lol = this.addChild(findType);
3125 		lol.appendChild(row);
3126 
3127 		return row;
3128 	}
3129 
3130 	///.
3131 	Element captionElement() {
3132 		Element cap;
3133 		foreach(c; children) {
3134 			if(c.tagName == "caption") {
3135 				cap = c;
3136 				break;
3137 			}
3138 		}
3139 
3140 		if(cap is null) {
3141 			cap = Element.make("caption");
3142 			appendChild(cap);
3143 		}
3144 
3145 		return cap;
3146 	}
3147 
3148 	///.
3149 	@property string caption() {
3150 		return captionElement().innerText;
3151 	}
3152 
3153 	///.
3154 	@property void caption(string text) {
3155 		captionElement().innerText = text;
3156 	}
3157 
3158 	/// Gets the logical layout of the table as a rectangular grid of
3159 	/// cells. It considers rowspan and colspan. A cell with a large
3160 	/// span is represented in the grid by being referenced several times.
3161 	/// The tablePortition parameter can get just a <thead>, <tbody>, or
3162 	/// <tfoot> portion if you pass one.
3163 	///
3164 	/// Note: the rectangular grid might include null cells.
3165 	///
3166 	/// This is kinda expensive so you should call once when you want the grid,
3167 	/// then do lookups on the returned array.
3168 	TableCell[][] getGrid(Element tablePortition = null)
3169 		in {
3170 			if(tablePortition is null)
3171 				assert(tablePortition is null);
3172 			else {
3173 				assert(tablePortition !is null);
3174 				assert(tablePortition.parentNode is this);
3175 				assert(
3176 					tablePortition.tagName == "tbody"
3177 					||
3178 					tablePortition.tagName == "tfoot"
3179 					||
3180 					tablePortition.tagName == "thead"
3181 				);
3182 			}
3183 		}
3184 	body {
3185 		if(tablePortition is null)
3186 			tablePortition = this;
3187 
3188 		TableCell[][] ret;
3189 
3190 		// FIXME: will also return rows of sub tables!
3191 		auto rows = tablePortition.getElementsByTagName("tr");
3192 		ret.length = rows.length;
3193 
3194 		int maxLength = 0;
3195 
3196 		int insertCell(int row, int position, TableCell cell) {
3197 			if(row >= ret.length)
3198 				return position; // not supposed to happen - a rowspan is prolly too big.
3199 
3200 			if(position == -1) {
3201 				position++;
3202 				foreach(item; ret[row]) {
3203 					if(item is null)
3204 						break;
3205 					position++;
3206 				}
3207 			}
3208 
3209 			if(position < ret[row].length)
3210 				ret[row][position] = cell;
3211 			else
3212 				foreach(i; ret[row].length .. position + 1) {
3213 					if(i == position)
3214 						ret[row] ~= cell;
3215 					else
3216 						ret[row] ~= null;
3217 				}
3218 			return position;
3219 		}
3220 
3221 		foreach(int i, rowElement; rows) {
3222 			auto row = cast(TableRow) rowElement;
3223 			assert(row !is null);
3224 			assert(i < ret.length);
3225 
3226 			int position = 0;
3227 			foreach(cellElement; rowElement.childNodes) {
3228 				auto cell = cast(TableCell) cellElement;
3229 				if(cell is null)
3230 					continue;
3231 
3232 				// FIXME: colspan == 0 or rowspan == 0
3233 				// is supposed to mean fill in the rest of
3234 				// the table, not skip it
3235 				foreach(int j; 0 .. cell.colspan) {
3236 					foreach(int k; 0 .. cell.rowspan)
3237 						// if the first row, always append.
3238 						insertCell(k + i, k == 0 ? -1 : position, cell);
3239 					position++;
3240 				}
3241 			}
3242 
3243 			if(ret[i].length > maxLength)
3244 				maxLength = cast(int) ret[i].length;
3245 		}
3246 
3247 		// want to ensure it's rectangular
3248 		foreach(ref r; ret) {
3249 			foreach(i; r.length .. maxLength)
3250 				r ~= null;
3251 		}
3252 
3253 		return ret;
3254 	}
3255 }
3256 
3257 /// Represents a table row element - a <tr>
3258 class TableRow : Element {
3259 	///.
3260 	this(Document _parentDocument) {
3261 		super(_parentDocument);
3262 		tagName = "tr";
3263 	}
3264 
3265 	// FIXME: the standard says there should be a lot more in here,
3266 	// but meh, I never use it and it's a pain to implement.
3267 }
3268 
3269 /// Represents anything that can be a table cell - <td> or <th> html.
3270 class TableCell : Element {
3271 	///.
3272 	this(Document _parentDocument, string _tagName) {
3273 		super(_parentDocument, _tagName);
3274 	}
3275 
3276 	@property int rowspan() const {
3277 		int ret = 1;
3278 		auto it = getAttribute("rowspan");
3279 		if(it.length)
3280 			ret = to!int(it);
3281 		return ret;
3282 	}
3283 
3284 	@property int colspan() const {
3285 		int ret = 1;
3286 		auto it = getAttribute("colspan");
3287 		if(it.length)
3288 			ret = to!int(it);
3289 		return ret;
3290 	}
3291 
3292 	@property int rowspan(int i) {
3293 		setAttribute("rowspan", to!string(i));
3294 		return i;
3295 	}
3296 
3297 	@property int colspan(int i) {
3298 		setAttribute("colspan", to!string(i));
3299 		return i;
3300 	}
3301 
3302 }
3303 
3304 
3305 ///.
3306 class MarkupException : Exception {
3307 
3308 	///.
3309 	this(string message, string file = __FILE__, size_t line = __LINE__) {
3310 		super(message, file, line);
3311 	}
3312 }
3313 
3314 /// This is used when you are using one of the require variants of navigation, and no matching element can be found in the tree.
3315 class ElementNotFoundException : Exception {
3316 
3317 	/// type == kind of element you were looking for and search == a selector describing the search.
3318 	this(string type, string search, string file = __FILE__, size_t line = __LINE__) {
3319 		super("Element of type '"~type~"' matching {"~search~"} not found.", file, line);
3320 	}
3321 }
3322 
3323 /// The html struct is used to differentiate between regular text nodes and html in certain functions
3324 ///
3325 /// Easiest way to construct it is like this: auto html = Html("<p>hello</p>");
3326 struct Html {
3327 	/// This string holds the actual html. Use it to retrieve the contents.
3328 	string source;
3329 }
3330 
3331 /// The main document interface, including a html parser.
3332 class Document : FileResource {
3333 	///.
3334 	this(string data, bool caseSensitive = false, bool strict = false) {
3335 		parse(data, caseSensitive, strict);
3336 	}
3337 
3338 	/**
3339 		Creates an empty document. It has *nothing* in it at all.
3340 	*/
3341 	this() {
3342 
3343 	}
3344 
3345 	/// This is just something I'm toying with. Right now, you use opIndex to put in css selectors.
3346 	/// It returns a struct that forwards calls to all elements it holds, and returns itself so you
3347 	/// can chain it.
3348 	///
3349 	/// Example: document["p"].innerText("hello").addClass("modified");
3350 	///
3351 	/// Equivalent to: foreach(e; document.getElementsBySelector("p")) { e.innerText("hello"); e.addClas("modified"); }
3352 	///
3353 	/// Note: always use function calls (not property syntax) and don't use toString in there for best results.
3354 	///
3355 	/// You can also do things like: document["p"]["b"] though tbh I'm not sure why since the selector string can do all that anyway. Maybe
3356 	/// you could put in some kind of custom filter function tho.
3357 	ElementCollection opIndex(string selector) {
3358 		auto e = ElementCollection(this.root);
3359 		return e[selector];
3360 	}
3361 
3362 	string _contentType = "text/html; charset=utf-8";
3363 
3364 	/// If you're using this for some other kind of XML, you can
3365 	/// set the content type here.
3366 	///
3367 	/// Note: this has no impact on the function of this class.
3368 	/// It is only used if the document is sent via a protocol like HTTP.
3369 	///
3370 	/// This may be called by parse() if it recognizes the data. Otherwise,
3371 	/// if you don't set it, it assumes text/html; charset=utf-8.
3372 	@property string contentType(string mimeType) {
3373 		_contentType = mimeType;
3374 		return _contentType;
3375 	}
3376 
3377 	/// implementing the FileResource interface, useful for sending via
3378 	/// http automatically.
3379 	override @property string contentType() const {
3380 		return _contentType;
3381 	}
3382 
3383 	/// implementing the FileResource interface; it calls toString.
3384 	override immutable(ubyte)[] getData() const {
3385 		return cast(immutable(ubyte)[]) this.toString();
3386 	}
3387 
3388 
3389 	/// Concatenates any consecutive text nodes
3390 	/*
3391 	void normalize() {
3392 		
3393 	}
3394 	*/
3395 
3396 	/// This will set delegates for parseSaw* (note: this overwrites anything else you set, and you setting subsequently will overwrite this) that add those things to the dom tree when it sees them.
3397 	/// Call this before calling parse().
3398 
3399 	/// Note this will also preserve the prolog and doctype from the original file, if there was one.
3400 	void enableAddingSpecialTagsToDom() {
3401 		parseSawComment = (string) => true;
3402 		parseSawAspCode = (string) => true;
3403 		parseSawPhpCode = (string) => true;
3404 		parseSawQuestionInstruction = (string) => true;
3405 		parseSawBangInstruction = (string) => true;
3406 	}
3407 
3408 	/// If the parser sees a html comment, it will call this callback
3409 	/// <!-- comment --> will call parseSawComment(" comment ")
3410 	/// Return true if you want the node appended to the document.
3411 	bool delegate(string) parseSawComment;
3412 
3413 	/// If the parser sees <% asp code... %>, it will call this callback.
3414 	/// It will be passed "% asp code... %" or "%= asp code .. %"
3415 	/// Return true if you want the node appended to the document.
3416 	bool delegate(string) parseSawAspCode;
3417 
3418 	/// If the parser sees <?php php code... ?>, it will call this callback.
3419 	/// It will be passed "?php php code... ?" or "?= asp code .. ?"
3420 	/// Note: dom.d cannot identify  the other php <? code ?> short format.
3421 	/// Return true if you want the node appended to the document.
3422 	bool delegate(string) parseSawPhpCode;
3423 
3424 	/// if it sees a <?xxx> that is not php or asp   
3425 	/// it calls this function with the contents.
3426 	/// <?SOMETHING foo> calls parseSawQuestionInstruction("?SOMETHING foo")
3427 	/// Unlike the php/asp ones, this ends on the first > it sees, without requiring ?>.
3428 	/// Return true if you want the node appended to the document.
3429 	bool delegate(string) parseSawQuestionInstruction;
3430 
3431 	/// if it sees a <! that is not CDATA or comment (CDATA is handled automatically and comments call parseSawComment),
3432 	/// it calls this function with the contents.
3433 	/// <!SOMETHING foo> calls parseSawBangInstruction("SOMETHING foo")
3434 	/// Return true if you want the node appended to the document.
3435 	bool delegate(string) parseSawBangInstruction;
3436 
3437 	/// Given the kind of garbage you find on the Internet, try to make sense of it.
3438 	/// Equivalent to document.parse(data, false, false, null);
3439 	/// (Case-insensitive, non-strict, determine character encoding from the data.)
3440 
3441 	/// NOTE: this makes no attempt at added security.
3442 	void parseGarbage(string data) {
3443 		parse(data, false, false, null);
3444 	}
3445 
3446 	Utf8Stream handleDataEncoding(in string rawdata, string dataEncoding, bool strict) {
3447 		// gotta determine the data encoding. If you know it, pass it in above to skip all this.
3448 		if(dataEncoding is null) {
3449 			dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata);
3450 			// it can't tell... probably a random 8 bit encoding. Let's check the document itself.
3451 			// Now, XML and HTML can both list encoding in the document, but we can't really parse
3452 			// it here without changing a lot of code until we know the encoding. So I'm going to
3453 			// do some hackish string checking.
3454 			if(dataEncoding is null) {
3455 				auto dataAsBytes = cast(immutable(ubyte)[]) rawdata;
3456 				// first, look for an XML prolog
3457 				auto idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "encoding=\"");
3458 				if(idx != -1) {
3459 					idx += "encoding=\"".length;
3460 					// we're probably past the prolog if it's this far in; we might be looking at
3461 					// content. Forget about it.
3462 					if(idx > 100)
3463 						idx = -1;
3464 				}
3465 				// if that fails, we're looking for Content-Type http-equiv or a meta charset (see html5)..
3466 				if(idx == -1) {
3467 					idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "charset=");
3468 					if(idx != -1) {
3469 						idx += "charset=".length;
3470 						if(dataAsBytes[idx] == '"')
3471 							idx++;
3472 					}
3473 				}
3474 
3475 				// found something in either branch...
3476 				if(idx != -1) {
3477 					// read till a quote or about 12 chars, whichever comes first...
3478 					auto end = idx;
3479 					while(end < dataAsBytes.length && dataAsBytes[end] != '"' && end - idx < 12)
3480 						end++;
3481 
3482 					dataEncoding = cast(string) dataAsBytes[idx .. end];
3483 				}
3484 				// otherwise, we just don't know.
3485 			}
3486 		}
3487 
3488 		if(dataEncoding is null) {
3489 			if(strict)
3490 				throw new MarkupException("I couldn't figure out the encoding of this document.");
3491 			else
3492 			// if we really don't know by here, it means we already tried UTF-8,
3493 			// looked for utf 16 and 32 byte order marks, and looked for xml or meta
3494 			// tags... let's assume it's Windows-1252, since that's probably the most
3495 			// common aside from utf that wouldn't be labeled.
3496 
3497 			dataEncoding = "Windows 1252";
3498 		}
3499 
3500 		// and now, go ahead and convert it.
3501 
3502 		string data;
3503 
3504 		if(!strict) {
3505 			// if we're in non-strict mode, we need to check
3506 			// the document for mislabeling too; sometimes
3507 			// web documents will say they are utf-8, but aren't
3508 			// actually properly encoded. If it fails to validate,
3509 			// we'll assume it's actually Windows encoding - the most
3510 			// likely candidate for mislabeled garbage.
3511 			dataEncoding = dataEncoding.toLower();
3512 			dataEncoding = dataEncoding.replace(" ", "");
3513 			dataEncoding = dataEncoding.replace("-", "");
3514 			dataEncoding = dataEncoding.replace("_", "");
3515 			if(dataEncoding == "utf8") {
3516 				try {
3517 					validate(rawdata);
3518 				} catch(UTFException e) {
3519 					dataEncoding = "Windows 1252";
3520 				}
3521 			}
3522 		}
3523 
3524 		if(dataEncoding != "UTF-8") {
3525 			if(strict)
3526 				data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding);
3527 			else {
3528 				try {
3529 					data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding);
3530 				} catch(Exception e) {
3531 					data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, "Windows 1252");
3532 				}
3533 			}
3534 		} else
3535 			data = rawdata;
3536 
3537 		static if(is(Utf8Stream == string))
3538 			return data;
3539 		else
3540 			return new Utf8Stream(data);
3541 	}
3542 
3543 	/**
3544 		Take XMLish data and try to make the DOM tree out of it.
3545 
3546 		The goal isn't to be perfect, but to just be good enough to
3547 		approximate Javascript's behavior.
3548 
3549 		If strict, it throws on something that doesn't make sense.
3550 		(Examples: mismatched tags. It doesn't validate!)
3551 		If not strict, it tries to recover anyway, and only throws
3552 		when something is REALLY unworkable.
3553 
3554 		If strict is false, it uses a magic list of tags that needn't
3555 		be closed. If you are writing a document specifically for this,
3556 		try to avoid such - use self closed tags at least. Easier to parse.
3557 
3558 		The dataEncoding argument can be used to pass a specific
3559 		charset encoding for automatic conversion. If null (which is NOT
3560 		the default!), it tries to determine from the data itself,
3561 		using the xml prolog or meta tags, and assumes UTF-8 if unsure.
3562 
3563 		If this assumption is wrong, it can throw on non-ascii
3564 		characters!
3565 
3566 
3567 		Note that it previously assumed the data was encoded as UTF-8, which
3568 		is why the dataEncoding argument defaults to that.
3569 
3570 		So it shouldn't break backward compatibility.
3571 
3572 		But, if you want the best behavior on wild data - figuring it out from the document
3573 		instead of assuming - you'll probably want to change that argument to null.
3574 
3575 	*/
3576 	void parse(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") {
3577 		auto data = handleDataEncoding(rawdata, dataEncoding, strict);
3578 		parseStream(data, caseSensitive, strict);
3579 	}
3580 
3581 	// note: this work best in strict mode, unless data is just a simple string wrapper
3582 	void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false) {
3583 		// FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler
3584 		// of my big app.
3585 
3586 		assert(data !is null);
3587 
3588 		// go through character by character.
3589 		// if you see a <, consider it a tag.
3590 		// name goes until the first non tagname character
3591 		// then see if it self closes or has an attribute
3592 
3593 		// if not in a tag, anything not a tag is a big text
3594 		// node child. It ends as soon as it sees a <
3595 
3596 		// Whitespace in text or attributes is preserved, but not between attributes
3597 
3598 		// &amp; and friends are converted when I know them, left the same otherwise
3599 
3600 
3601 		// this it should already be done correctly.. so I'm leaving it off to net a ~10% speed boost on my typical test file (really)
3602 		//validate(data); // it *must* be UTF-8 for this to work correctly
3603 
3604 		sizediff_t pos = 0;
3605 
3606 		clear();
3607 
3608 		loose = !caseSensitive;
3609 
3610 		bool sawImproperNesting = false;
3611 		bool paragraphHackfixRequired = false;
3612 
3613 		int getLineNumber(sizediff_t p) {
3614 			int line = 1;
3615 			foreach(c; data[0..p])
3616 				if(c == '\n')
3617 					line++;
3618 			return line;
3619 		}
3620 
3621 		void parseError(string message) {
3622 			throw new MarkupException(format("char %d (line %d): %s", pos, getLineNumber(pos), message));
3623 		}
3624 
3625 		void eatWhitespace() {
3626 			while(pos < data.length && (data[pos] == ' ' || data[pos] == '\n' || data[pos] == '\t'))
3627 				pos++;
3628 		}
3629 
3630 		string readTagName() {
3631 			// remember to include : for namespaces
3632 			// basically just keep going until >, /, or whitespace
3633 			auto start = pos;
3634 			while(  data[pos] != '>' && data[pos] != '/' &&
3635 				data[pos] != ' ' && data[pos] != '\n' && data[pos] != '\t')
3636 			{
3637 				pos++;
3638 				if(pos == data.length) {
3639 					if(strict)
3640 						throw new Exception("tag name incomplete when file ended");
3641 					else
3642 						break;
3643 				}
3644 			}
3645 
3646 			if(!caseSensitive)
3647 				return toLower(data[start..pos]);
3648 			else
3649 				return data[start..pos];
3650 		}
3651 
3652 		string readAttributeName() {
3653 			// remember to include : for namespaces
3654 			// basically just keep going until >, /, or whitespace
3655 			auto start = pos;
3656 			while(  data[pos] != '>' && data[pos] != '/'  && data[pos] != '=' &&
3657 				data[pos] != ' ' && data[pos] != '\n' && data[pos] != '\t')
3658 			{
3659 				if(data[pos] == '<') {
3660 					if(strict)
3661 						throw new MarkupException("The character < can never appear in an attribute name. Line " ~ to!string(getLineNumber(pos)));
3662 					else
3663 						break; // e.g. <a href="something" <img src="poo" /></a>. The > should have been after the href, but some shitty files don't do that right and the browser handles it, so we will too, by pretending the > was indeed there
3664 				}
3665 				pos++;
3666 				if(pos == data.length) {
3667 					if(strict)
3668 						throw new Exception("unterminated attribute name");
3669 					else
3670 						break;
3671 				}
3672 			}
3673 
3674 			if(!caseSensitive)
3675 				return toLower(data[start..pos]);
3676 			else
3677 				return data[start..pos];
3678 		}
3679 
3680 		string readAttributeValue() {
3681 			if(pos >= data.length) {
3682 				if(strict)
3683 					throw new Exception("no attribute value before end of file");
3684 				else
3685 					return null;
3686 			}
3687 			switch(data[pos]) {
3688 				case '\'':
3689 				case '"':
3690 					auto started = pos;
3691 					char end = data[pos];
3692 					pos++;
3693 					auto start = pos;
3694 					while(pos < data.length && data[pos] != end)
3695 						pos++;
3696 					if(strict && pos == data.length)
3697 						throw new MarkupException("Unclosed attribute value, started on char " ~ to!string(started));
3698 					string v = htmlEntitiesDecode(data[start..pos], strict);
3699 					pos++; // skip over the end
3700 				return v;
3701 				default:
3702 					if(strict)
3703 						parseError("Attributes must be quoted");
3704 					// read until whitespace or terminator (/ or >)
3705 					auto start = pos;
3706 					while(
3707 						pos < data.length &&
3708 						data[pos] != '>' &&
3709 						// unquoted attributes might be urls, so gotta be careful with them and self-closed elements
3710 						!(data[pos] == '/' && pos + 1 < data.length && data[pos+1] == '>') &&
3711 						data[pos] != ' ' && data[pos] != '\n' && data[pos] != '\t')
3712 					      	pos++;
3713 
3714 					string v = htmlEntitiesDecode(data[start..pos], strict);
3715 					// don't skip the end - we'll need it later
3716 					return v;
3717 			}
3718 		}
3719 
3720 		TextNode readTextNode() {
3721 			auto start = pos;
3722 			while(pos < data.length && data[pos] != '<') {
3723 				pos++;
3724 			}
3725 
3726 			return TextNode.fromUndecodedString(this, data[start..pos]);
3727 		}
3728 
3729 		// this is obsolete!
3730 		RawSource readCDataNode() {
3731 			auto start = pos;
3732 			while(pos < data.length && data[pos] != '<') {
3733 				pos++;
3734 			}
3735 
3736 			return new RawSource(this, data[start..pos]);
3737 		}
3738 
3739 
3740 		struct Ele {
3741 			int type; // element or closing tag or nothing
3742 				/*
3743 					type == 0 means regular node, self-closed (element is valid)
3744 					type == 1 means closing tag (payload is the tag name, element may be valid)
3745 					type == 2 means you should ignore it completely
3746 					type == 3 means it is a special element that should be appended, if possible, e.g. a <!DOCTYPE> that was chosen to be kept, php code, or comment. It will be appended at the current element if inside the root, and to a special document area if not
3747 					type == 4 means the document was totally empty
3748 				*/
3749 			Element element; // for type == 0 or type == 3
3750 			string payload; // for type == 1
3751 		}
3752 		// recursively read a tag
3753 		Ele readElement(string[] parentChain = null) {
3754 			// FIXME: this is the slowest function in this module, by far, even in strict mode.
3755 			// Loose mode should perform decently, but strict mode is the important one.
3756 			if(!strict && parentChain is null)
3757 				parentChain = [];
3758 
3759 			static string[] recentAutoClosedTags;
3760 
3761 			if(pos >= data.length)
3762 			{
3763 				if(strict) {
3764 					throw new MarkupException("Gone over the input (is there no root element or did it never close?), chain: " ~ to!string(parentChain));
3765 				} else {
3766 					if(parentChain.length)
3767 						return Ele(1, null, parentChain[0]); // in loose mode, we just assume the document has ended
3768 					else
3769 						return Ele(4); // signal emptiness upstream
3770 				}
3771 			}
3772 
3773 			if(data[pos] != '<') {
3774 				return Ele(0, readTextNode(), null);
3775 			}
3776 
3777 			enforce(data[pos] == '<');
3778 			pos++;
3779 			if(pos == data.length) {
3780 				if(strict)
3781 					throw new MarkupException("Found trailing < at end of file");
3782 				// if not strict, we'll just skip the switch
3783 			} else
3784 			switch(data[pos]) {
3785 				// I don't care about these, so I just want to skip them
3786 				case '!': // might be a comment, a doctype, or a special instruction
3787 					pos++;
3788 
3789 						// FIXME: we should store these in the tree too
3790 						// though I like having it stripped out tbh.
3791 
3792 					if(pos == data.length) {
3793 						if(strict)
3794 							throw new MarkupException("<! opened at end of file");
3795 					} else if(data[pos] == '-' && (pos + 1 < data.length) && data[pos+1] == '-') {
3796 						// comment
3797 						pos += 2;
3798 
3799 						// FIXME: technically, a comment is anything
3800 						// between -- and -- inside a <!> block.
3801 						// so in <!-- test -- lol> , the " lol" is NOT a comment
3802 						// and should probably be handled differently in here, but for now
3803 						// I'll just keep running until --> since that's the common way
3804 
3805 						auto commentStart = pos;
3806 						while(pos+3 < data.length && data[pos..pos+3] != "-->")
3807 							pos++;
3808 
3809 						auto end = commentStart;
3810 
3811 						if(pos + 3 >= data.length) {
3812 							if(strict)
3813 								throw new MarkupException("unclosed comment");
3814 							end = data.length;
3815 							pos = data.length;
3816 						} else {
3817 							end = pos;
3818 							assert(data[pos] == '-');
3819 							pos++;
3820 							assert(data[pos] == '-');
3821 							pos++;
3822 							assert(data[pos] == '>');
3823 							pos++;
3824 						}
3825 
3826 						if(parseSawComment !is null)
3827 							if(parseSawComment(data[commentStart .. end])) {
3828 								return Ele(3, new HtmlComment(this, data[commentStart .. end]), null);
3829 							}
3830 					} else if(pos + 7 <= data.length && data[pos..pos + 7] == "[CDATA[") {
3831 						pos += 7;
3832 
3833 						auto cdataStart = pos;
3834 
3835 						ptrdiff_t end = -1;
3836 						typeof(end) cdataEnd;
3837 
3838 						if(pos < data.length) {
3839 							// cdata isn't allowed to nest, so this should be generally ok, as long as it is found
3840 							end = data[pos .. $].indexOf("]]>");
3841 						}
3842 
3843 						if(end == -1) {
3844 							if(strict)
3845 								throw new MarkupException("Unclosed CDATA section");
3846 							end = pos;
3847 							cdataEnd = pos;
3848 						} else {
3849 							cdataEnd = pos + end;
3850 							pos = cdataEnd + 3;
3851 						}
3852 
3853 						return Ele(0, new TextNode(this, data[cdataStart .. cdataEnd]), null);
3854 					} else {
3855 						auto start = pos;
3856 						while(pos < data.length && data[pos] != '>')
3857 							pos++;
3858 
3859 						auto bangEnds = pos;
3860 						if(pos == data.length) {
3861 							if(strict)
3862 								throw new MarkupException("unclosed processing instruction (<!xxx>)");
3863 						} else pos++; // skipping the >
3864 
3865 						if(parseSawBangInstruction !is null)
3866 							if(parseSawBangInstruction(data[start .. bangEnds])) {
3867 								// FIXME: these should be able to modify the parser state,
3868 								// doing things like adding entities, somehow.
3869 
3870 								return Ele(3, new BangInstruction(this, data[start .. bangEnds]), null);
3871 							}
3872 					}
3873 
3874 					/*
3875 					if(pos < data.length && data[pos] == '>')
3876 						pos++; // skip the >
3877 					else
3878 						assert(!strict);
3879 					*/
3880 				break;
3881 				case '%':
3882 				case '?':
3883 					/*
3884 						Here's what we want to support:
3885 
3886 						<% asp code %>
3887 						<%= asp code %>
3888 						<?php php code ?>
3889 						<?= php code ?>
3890 
3891 						The contents don't really matter, just if it opens with
3892 						one of the above for, it ends on the two char terminator.
3893 
3894 						<?something>
3895 							this is NOT php code
3896 							because I've seen this in the wild: <?EM-dummyText>
3897 
3898 							This could be php with shorttags which would be cut off
3899 							prematurely because if(a >) - that > counts as the close
3900 							of the tag, but since dom.d can't tell the difference
3901 							between that and the <?EM> real world example, it will
3902 							not try to look for the ?> ending.
3903 
3904 						The difference between this and the asp/php stuff is that it
3905 						ends on >, not ?>. ONLY <?php or <?= ends on ?>. The rest end
3906 						on >.
3907 					*/
3908 
3909 					char end = data[pos];
3910 					auto started = pos;
3911 					bool isAsp = end == '%';
3912 					int currentIndex = 0;
3913 					bool isPhp = false;
3914 					bool isEqualTag = false;
3915 					int phpCount = 0;
3916 
3917 				    more:
3918 					pos++; // skip the start
3919 					if(pos == data.length) {
3920 						if(strict)
3921 							throw new MarkupException("Unclosed <"~end~" by end of file");
3922 					} else {
3923 						currentIndex++;
3924 						if(currentIndex == 1 && data[pos] == '=') {
3925 							if(!isAsp)
3926 								isPhp = true;
3927 							isEqualTag = true;
3928 							goto more;
3929 						}
3930 						if(currentIndex == 1 && data[pos] == 'p')
3931 							phpCount++;
3932 						if(currentIndex == 2 && data[pos] == 'h')
3933 							phpCount++;
3934 						if(currentIndex == 3 && data[pos] == 'p' && phpCount == 2)
3935 							isPhp = true;
3936 
3937 						if(data[pos] == '>') {
3938 							if((isAsp || isPhp) && data[pos - 1] != end)
3939 								goto more;
3940 							// otherwise we're done
3941 						} else
3942 							goto more;
3943 					}
3944 
3945 					//writefln("%s: %s", isAsp ? "ASP" : isPhp ? "PHP" : "<? ", data[started .. pos]);
3946 					auto code = data[started .. pos];
3947 
3948 
3949 					assert((pos < data.length && data[pos] == '>') || (!strict && pos == data.length));
3950 					if(pos < data.length)
3951 						pos++; // get past the >
3952 
3953 					if(isAsp && parseSawAspCode !is null) {
3954 						if(parseSawAspCode(code)) {
3955 							return Ele(3, new AspCode(this, code), null);
3956 						}
3957 					} else if(isPhp && parseSawPhpCode !is null) {
3958 						if(parseSawPhpCode(code)) {
3959 							return Ele(3, new PhpCode(this, code), null);
3960 						}
3961 					} else if(!isAsp && !isPhp && parseSawQuestionInstruction !is null) {
3962 						if(parseSawQuestionInstruction(code)) {
3963 							return Ele(3, new QuestionInstruction(this, code), null);
3964 						}
3965 					}
3966 				break;
3967 				case '/': // closing an element
3968 					pos++; // skip the start
3969 					auto p = pos;
3970 					while(pos < data.length && data[pos] != '>')
3971 						pos++;
3972 					//writefln("</%s>", data[p..pos]);
3973 					if(pos == data.length && data[pos-1] != '>') {
3974 						if(strict)
3975 							throw new MarkupException("File ended before closing tag had a required >");
3976 						else
3977 							data ~= ">"; // just hack it in
3978 					}
3979 					pos++; // skip the '>'
3980 
3981 					string tname = data[p..pos-1];
3982 					if(!caseSensitive)
3983 						tname = tname.toLower();
3984 
3985 				return Ele(1, null, tname); // closing tag reports itself here
3986 				case ' ': // assume it isn't a real element...
3987 					if(strict)
3988 						parseError("bad markup - improperly placed <");
3989 					else
3990 						return Ele(0, TextNode.fromUndecodedString(this, "<"), null);
3991 				break;
3992 				default:
3993 
3994 					if(!strict) {
3995 						// what about something that kinda looks like a tag, but isn't?
3996 						auto nextTag = data[pos .. $].indexOf("<");
3997 						auto closeTag = data[pos .. $].indexOf(">");
3998 						if(closeTag != -1 && nextTag != -1)
3999 							if(nextTag < closeTag) {
4000 								// since attribute names cannot possibly have a < in them, we'll look for an equal since it might be an attribute value... and even in garbage mode, it'd have to be a quoted one realistically
4001 
4002 								auto equal = data[pos .. $].indexOf("=\"");
4003 								if(equal != -1 && equal < closeTag) {
4004 									// this MIGHT be ok, soldier on
4005 								} else {
4006 									// definitely no good, this must be a (horribly distorted) text node
4007 									pos++; // skip the < we're on - don't want text node to end prematurely
4008 									auto node = readTextNode();
4009 									node.contents = "<" ~ node.contents; // put this back
4010 									return Ele(0, node, null);
4011 								}
4012 							}
4013 					}
4014 
4015 					string tagName = readTagName();
4016 					string[string] attributes;
4017 
4018 					Ele addTag(bool selfClosed) {
4019 						if(selfClosed)
4020 							pos++;
4021 						else {
4022 							if(!strict)
4023 								if(tagName.isInArray(selfClosedElements))
4024 									// these are de-facto self closed
4025 									selfClosed = true;
4026 						}
4027 
4028 						if(strict)
4029 						enforce(data[pos] == '>');//, format("got %s when expecting >\nContext:\n%s", data[pos], data[pos - 100 .. pos + 100]));
4030 						else {
4031 							// if we got here, it's probably because a slash was in an
4032 							// unquoted attribute - don't trust the selfClosed value
4033 							if(!selfClosed)
4034 								selfClosed = tagName.isInArray(selfClosedElements);
4035 
4036 							while(pos < data.length && data[pos] != '>')
4037 								pos++;
4038 						}
4039 
4040 						auto whereThisTagStarted = pos; // for better error messages
4041 
4042 						pos++;
4043 
4044 						auto e = createElement(tagName);
4045 						e.attributes = attributes;
4046 						version(dom_node_indexes) {
4047 							if(e.dataset.nodeIndex.length == 0)
4048 								e.dataset.nodeIndex = to!string(&(e.attributes));
4049 						}
4050 						e.selfClosed = selfClosed;
4051 						e.parseAttributes();
4052 
4053 
4054 						// HACK to handle script and style as a raw data section as it is in HTML browsers
4055 						if(tagName == "script" || tagName == "style") {
4056 							if(!selfClosed) {
4057 								string closer = "</" ~ tagName ~ ">";
4058 								ptrdiff_t ending;
4059 								if(pos >= data.length)
4060 									ending = -1;
4061 								else
4062 									ending = indexOf(data[pos..$], closer);
4063 
4064 								if(loose && ending == -1 && pos < data.length)
4065 									ending = indexOf(data[pos..$], closer.toUpper());
4066 								if(ending == -1) {
4067 									if(strict)
4068 										throw new Exception("tag " ~ tagName ~ " never closed");
4069 									else {
4070 										// let's call it totally empty and do the rest of the file as text. doing it as html could still result in some weird stuff like if(a<4) being read as <4 being a tag so it comes out if(a<4></4> and other weirdness) It is either a closed script tag or the rest of the file is forfeit.
4071 										if(pos < data.length) {
4072 											e = new TextNode(this, data[pos .. $]);
4073 											pos = data.length;
4074 										}
4075 									}
4076 								} else {
4077 									ending += pos;
4078 									e.innerRawSource = data[pos..ending];
4079 									pos = ending + closer.length;
4080 								}
4081 							}
4082 							return Ele(0, e, null);
4083 						}
4084 
4085 						bool closed = selfClosed;
4086 
4087 						void considerHtmlParagraphHack(Element n) {
4088 							assert(!strict);
4089 							if(e.tagName == "p" && e.tagName == n.tagName) {
4090 								// html lets you write <p> para 1 <p> para 1
4091 								// but in the dom tree, they should be siblings, not children.
4092 								paragraphHackfixRequired = true;
4093 							}
4094 						}
4095 
4096 						//writef("<%s>", tagName);
4097 						while(!closed) {
4098 							Ele n;
4099 							if(strict)
4100 								n = readElement();
4101 							else
4102 								n = readElement(parentChain ~ tagName);
4103 
4104 							if(n.type == 4) return n; // the document is empty
4105 
4106 							if(n.type == 3 && n.element !is null) {
4107 								// special node, append if possible
4108 								if(e !is null)
4109 									e.appendChild(n.element);
4110 								else
4111 									piecesBeforeRoot ~= n.element;
4112 							} else if(n.type == 0) {
4113 								if(!strict)
4114 									considerHtmlParagraphHack(n.element);
4115 								e.appendChild(n.element);
4116 							} else if(n.type == 1) {
4117 								bool found = false;
4118 								if(n.payload != tagName) {
4119 									if(strict)
4120 										parseError(format("mismatched tag: </%s> != <%s> (opened on line %d)", n.payload, tagName, getLineNumber(whereThisTagStarted)));
4121 									else {
4122 										sawImproperNesting = true;
4123 										// this is so we don't drop several levels of awful markup
4124 										if(n.element) {
4125 											if(!strict)
4126 												considerHtmlParagraphHack(n.element);
4127 											e.appendChild(n.element);
4128 											n.element = null;
4129 										}
4130 
4131 										// is the element open somewhere up the chain?
4132 										foreach(i, parent; parentChain)
4133 											if(parent == n.payload) {
4134 												recentAutoClosedTags ~= tagName;
4135 												// just rotating it so we don't inadvertently break stuff with vile crap
4136 												if(recentAutoClosedTags.length > 4)
4137 													recentAutoClosedTags = recentAutoClosedTags[1 .. $];
4138 
4139 												n.element = e;
4140 												return n;
4141 											}
4142 
4143 										// if not, this is a text node; we can't fix it up...
4144 
4145 										// If it's already in the tree somewhere, assume it is closed by algorithm
4146 										// and we shouldn't output it - odds are the user just flipped a couple tags
4147 										foreach(ele; e.tree) {
4148 											if(ele.tagName == n.payload) {
4149 												found = true;
4150 												break;
4151 											}
4152 										}
4153 
4154 										foreach(ele; recentAutoClosedTags) {
4155 											if(ele == n.payload) {
4156 												found = true;
4157 												break;
4158 											}
4159 										}
4160 
4161 										if(!found) // if not found in the tree though, it's probably just text
4162 										e.appendChild(TextNode.fromUndecodedString(this, "</"~n.payload~">"));
4163 									}
4164 								} else {
4165 									if(n.element) {
4166 										if(!strict)
4167 											considerHtmlParagraphHack(n.element);
4168 										e.appendChild(n.element);
4169 									}
4170 								}
4171 
4172 								if(n.payload == tagName) // in strict mode, this is always true
4173 									closed = true;
4174 							} else { /*throw new Exception("wtf " ~ tagName);*/ }
4175 						}
4176 						//writef("</%s>\n", tagName);
4177 						return Ele(0, e, null);
4178 					}
4179 
4180 					// if a tag was opened but not closed by end of file, we can arrive here
4181 					if(!strict && pos >= data.length)
4182 						return addTag(false);
4183 					//else if(strict) assert(0); // should be caught before
4184 
4185 					switch(data[pos]) {
4186 						default: assert(0);
4187 						case '/': // self closing tag
4188 							return addTag(true);
4189 						case '>':
4190 							return addTag(false);
4191 						case ' ':
4192 						case '\t':
4193 						case '\n':
4194 							// there might be attributes...
4195 							moreAttributes:
4196 							eatWhitespace();
4197 
4198 							// same deal as above the switch....
4199 							if(!strict && pos >= data.length)
4200 								return addTag(false);
4201 
4202 							if(strict && pos >= data.length)
4203 								throw new MarkupException("tag open, didn't find > before end of file");
4204 
4205 							switch(data[pos]) {
4206 								case '/': // self closing tag
4207 									return addTag(true);
4208 								case '>': // closed tag; open -- we now read the contents
4209 									return addTag(false);
4210 								default: // it is an attribute
4211 									string attrName = readAttributeName();
4212 									string attrValue = attrName;
4213 									if(pos >= data.length) {
4214 										if(strict)
4215 											assert(0, "this should have thrown in readAttributeName");
4216 										else {
4217 											data ~= ">";
4218 											goto blankValue;
4219 										}
4220 									}
4221 									if(data[pos] == '=') {
4222 										pos++;
4223 										attrValue = readAttributeValue();
4224 									}
4225 
4226 									blankValue:
4227 
4228 									if(strict && attrName in attributes)
4229 										throw new MarkupException("Repeated attribute: " ~ attrName);
4230 
4231 									if(attrName.strip().length)
4232 										attributes[attrName] = attrValue;
4233 									else if(strict) throw new MarkupException("wtf, zero length attribute name");
4234 
4235 									if(!strict && pos < data.length && data[pos] == '<') {
4236 										// this is the broken tag that doesn't have a > at the end
4237 										data = data[0 .. pos] ~ ">" ~ data[pos.. $];
4238 										// let's insert one as a hack
4239 										goto case '>';
4240 									}
4241 
4242 									goto moreAttributes;
4243 							}
4244 					}
4245 			}
4246 
4247 			return Ele(2, null, null); // this is a <! or <? thing that got ignored prolly.
4248 			//assert(0);
4249 		}
4250 
4251 		eatWhitespace();
4252 		Ele r;
4253 		do {
4254 			r = readElement(); // there SHOULD only be one element...
4255 
4256 			if(r.type == 3 && r.element !is null)
4257 				piecesBeforeRoot ~= r.element;
4258 
4259 			if(r.type == 4)
4260 				break; // the document is completely empty...
4261 		} while (r.type != 0 || r.element.nodeType != 1); // we look past the xml prologue and doctype; root only begins on a regular node
4262 
4263 		root = r.element;
4264 
4265 		if(!strict) // in strict mode, we'll just ignore stuff after the xml
4266 		while(r.type != 4) {
4267 			r = readElement();
4268 			if(r.type != 4 && r.type != 2) { // if not empty and not ignored
4269 				if(r.element !is null)
4270 					piecesAfterRoot ~= r.element;
4271 			}
4272 		}
4273 
4274 		if(root is null)
4275 		{
4276 			if(strict)
4277 				assert(0, "empty document should be impossible in strict mode");
4278 			else
4279 				parse(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do
4280 		}
4281 
4282 		if(paragraphHackfixRequired) {
4283 			assert(!strict); // this should never happen in strict mode; it ought to never set the hack flag...
4284 
4285 			// in loose mode, we can see some "bad" nesting (it's valid html, but poorly formed xml).
4286 			// It's hard to handle above though because my code sucks. So, we'll fix it here.
4287 
4288 			auto iterator = root.tree;
4289 			foreach(ele; iterator) {
4290 				if(ele.parentNode is null)
4291 					continue;
4292 
4293 				if(ele.tagName == "p" && ele.parentNode.tagName == ele.tagName) {
4294 					auto shouldBePreviousSibling = ele.parentNode;
4295 					auto holder = shouldBePreviousSibling.parentNode; // this is the two element's mutual holder...
4296 					holder.insertAfter(shouldBePreviousSibling, ele.removeFromTree());
4297 					iterator.currentKilled(); // the current branch can be skipped; we'll hit it soon anyway since it's now next up.
4298 				}
4299 			}
4300 		}
4301 	}
4302 
4303 	/* end massive parse function */
4304 
4305 	/// Gets the <title> element's innerText, if one exists
4306 	@property string title() {
4307 		bool doesItMatch(Element e) {
4308 			return (e.tagName == "title");
4309 		}
4310 
4311 		auto e = findFirst(&doesItMatch);
4312 		if(e)
4313 			return e.innerText();
4314 		return "";
4315 	}
4316 
4317 	/// Sets the title of the page, creating a <title> element if needed.
4318 	@property void title(string t) {
4319 		bool doesItMatch(Element e) {
4320 			return (e.tagName == "title");
4321 		}
4322 
4323 		auto e = findFirst(&doesItMatch);
4324 
4325 		if(!e) {
4326 			e = createElement("title");
4327 			auto heads = getElementsByTagName("head");
4328 			if(heads.length)
4329 				heads[0].appendChild(e);
4330 		}
4331 
4332 		if(e)
4333 			e.innerText = t;
4334 	}
4335 
4336 	// FIXME: would it work to alias root this; ???? might be a good idea
4337 	/// These functions all forward to the root element. See the documentation in the Element class.
4338 	Element getElementById(string id) {
4339 		return root.getElementById(id);
4340 	}
4341 
4342 	/// ditto
4343 	final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__)
4344 		if( is(SomeElementType : Element))
4345 		out(ret) { assert(ret !is null); }
4346 	body {
4347 		return root.requireElementById!(SomeElementType)(id, file, line);
4348 	}
4349 
4350 	/// ditto
4351 	final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__)
4352 		if( is(SomeElementType : Element))
4353 		out(ret) { assert(ret !is null); }
4354 	body {
4355 		return root.requireSelector!(SomeElementType)(selector, file, line);
4356 	}
4357 
4358 
4359 	/// ditto
4360 	Element querySelector(string selector) {
4361 		return root.querySelector(selector);
4362 	}
4363 
4364 	/// ditto
4365 	Element[] querySelectorAll(string selector) {
4366 		return root.querySelectorAll(selector);
4367 	}
4368 
4369 	/// ditto
4370 	Element[] getElementsBySelector(string selector) {
4371 		return root.getElementsBySelector(selector);
4372 	}
4373 
4374 	/// ditto
4375 	Element[] getElementsByTagName(string tag) {
4376 		return root.getElementsByTagName(tag);
4377 	}
4378 
4379 	/** FIXME: btw, this could just be a lazy range...... */
4380 	Element getFirstElementByTagName(string tag) {
4381 		if(loose)
4382 			tag = tag.toLower();
4383 		bool doesItMatch(Element e) {
4384 			return e.tagName == tag;
4385 		}
4386 		return findFirst(&doesItMatch);
4387 	}
4388 
4389 	/// This returns the <body> element, if there is one. (It different than Javascript, where it is called 'body', because body is a keyword in D.)
4390 	Element mainBody() {
4391 		return getFirstElementByTagName("body");
4392 	}
4393 
4394 	/// this uses a weird thing... it's [name=] if no colon and
4395 	/// [property=] if colon
4396 	string getMeta(string name) {
4397 		string thing = name.indexOf(":") == -1 ? "name" : "property";
4398 		auto e = querySelector("head meta["~thing~"="~name~"]");
4399 		if(e is null)
4400 			return null;
4401 		return e.content;
4402 	}
4403 	
4404 	/// Sets a meta tag in the document header. It is kinda hacky to work easily for both Facebook open graph and traditional html meta tags/
4405 	void setMeta(string name, string value) {
4406 		string thing = name.indexOf(":") == -1 ? "name" : "property";
4407 		auto e = querySelector("head meta["~thing~"="~name~"]");
4408 		if(e is null) {
4409 			e = requireSelector("head").addChild("meta");
4410 			e.setAttribute(thing, name);
4411 		}
4412 
4413 		e.content = value;
4414 	}
4415 
4416 	///.
4417 	Form[] forms() {
4418 		return cast(Form[]) getElementsByTagName("form");
4419 	}
4420 
4421 	///.
4422 	Form createForm() 
4423 		out(ret) {
4424 			assert(ret !is null);
4425 		}
4426 	body {
4427 		return cast(Form) createElement("form");
4428 	}
4429 
4430 	///.
4431 	Element createElement(string name) {
4432 		if(loose)
4433 			name = name.toLower();
4434 	
4435 		auto e = Element.make(name);
4436 		e.parentDocument = this;
4437 
4438 		return e;
4439 
4440 //		return new Element(this, name, null, selfClosed);
4441 	}
4442 
4443 	///.
4444 	Element createFragment() {
4445 		return new DocumentFragment(this);
4446 	}
4447 
4448 	///.
4449 	Element createTextNode(string content) {
4450 		return new TextNode(this, content);
4451 	}
4452 
4453 
4454 	///.
4455 	Element findFirst(bool delegate(Element) doesItMatch) {
4456 		Element result;
4457 
4458 		bool goThroughElement(Element e) {
4459 			if(doesItMatch(e)) {
4460 				result = e;
4461 				return true;
4462 			}
4463 
4464 			foreach(child; e.children) {
4465 				if(goThroughElement(child))
4466 					return true;
4467 			}
4468 
4469 			return false;
4470 		}
4471 
4472 		goThroughElement(root);
4473 
4474 		return result;
4475 	}
4476 
4477 	///.
4478 	void clear() {
4479 		root = null;
4480 		loose = false;
4481 	}
4482 
4483 	///.
4484 	void setProlog(string d) {
4485 		_prolog = d;
4486 		prologWasSet = true;
4487 	}
4488 
4489 	///.
4490 	private string _prolog = "<!DOCTYPE html>\n";
4491 	private bool prologWasSet = false; // set to true if the user changed it
4492 
4493 	@property string prolog() const {
4494 		// if the user explicitly changed it, do what they want
4495 		// or if we didn't keep/find stuff from the document itself,
4496 		// we'll use the builtin one as a default.
4497 		if(prologWasSet || piecesBeforeRoot.length == 0)
4498 			return _prolog;
4499 
4500 		string p;
4501 		foreach(e; piecesBeforeRoot)
4502 			p ~= e.toString() ~ "\n";
4503 		return p;
4504 	}
4505 
4506 	///.
4507 	override string toString() const {
4508 		return prolog ~ root.toString();
4509 	}
4510 
4511 	///.
4512 	Element root;
4513 
4514 	/// if these were kept, this is stuff that appeared before the root element, such as <?xml version ?> decls and <!DOCTYPE>s
4515 	Element[] piecesBeforeRoot;
4516 
4517 	/// stuff after the root, only stored in non-strict mode and not used in toString, but available in case you want it
4518 	Element[] piecesAfterRoot;
4519 
4520 	///.
4521 	bool loose;
4522 
4523 
4524 
4525 	// what follows are for mutation events that you can observe
4526 	void delegate(DomMutationEvent)[] eventObservers;
4527 
4528 	void dispatchMutationEvent(DomMutationEvent e) {
4529 		foreach(o; eventObservers)
4530 			o(e);
4531 	}
4532 }
4533 
4534 
4535 // FIXME: since Document loosens the input requirements, it should probably be the sub class...
4536 /// Specializes Document for handling generic XML. (always uses strict mode, uses xml mime type and file header)
4537 class XmlDocument : Document {
4538 	this(string data) {
4539 		contentType = "text/xml; charset=utf-8";
4540 		_prolog = `<?xml version="1.0" encoding="UTF-8"?>` ~ "\n";
4541 
4542 		parse(data, true, true);
4543 	}
4544 }
4545 
4546 
4547 
4548 // for the observers
4549 enum DomMutationOperations {
4550 	setAttribute,
4551 	removeAttribute,
4552 	appendChild, // tagname, attributes[], innerHTML
4553 	insertBefore,
4554 	truncateChildren,
4555 	removeChild,
4556 	appendHtml,
4557 	replaceHtml,
4558 	appendText,
4559 	replaceText,
4560 	replaceTextOnly
4561 }
4562 
4563 // and for observers too
4564 struct DomMutationEvent {
4565 	DomMutationOperations operation;
4566 	Element target;
4567 	Element related; // what this means differs with the operation
4568 	Element related2;
4569 	string relatedString;
4570 	string relatedString2;
4571 }
4572 
4573 
4574 private enum static string[] selfClosedElements = [
4575 	// html 4
4576 	"img", "hr", "input", "br", "col", "link", "meta",
4577 	// html 5
4578 	"source" ];
4579 
4580 static import std.conv;
4581 
4582 ///.
4583 int intFromHex(string hex) {
4584 	int place = 1;
4585 	int value = 0;
4586 	for(sizediff_t a = hex.length - 1; a >= 0; a--) {
4587 		int v;
4588 		char q = hex[a];
4589 		if( q >= '0' && q <= '9')
4590 			v = q - '0';
4591 		else if (q >= 'a' && q <= 'f')
4592 			v = q - 'a' + 10;
4593 		else throw new Exception("Illegal hex character: " ~ q);
4594 
4595 		value += v * place;
4596 
4597 		place *= 16;
4598 	}
4599 
4600 	return value;
4601 }
4602 
4603 
4604 // CSS selector handling
4605 
4606 // EXTENSIONS
4607 // dd - dt means get the dt directly before that dd (opposite of +)                  NOT IMPLEMENTED
4608 // dd -- dt means rewind siblings until you hit a dt, go as far as you need to       NOT IMPLEMENTED
4609 // dt < dl means get the parent of that dt iff it is a dl (usable for "get a dt that are direct children of dl")
4610 // dt << dl  means go as far up as needed to find a dl (you have an element and want its containers)      NOT IMPLEMENTED
4611 // :first  means to stop at the first hit, don't do more (so p + p == p ~ p:first
4612 
4613 
4614 
4615 // CSS4 draft currently says you can change the subject (the element actually returned) by putting a ! at the end of it.
4616 // That might be useful to implement, though I do have parent selectors too.
4617 
4618 		///.
4619 		static immutable string[] selectorTokens = [
4620 			// It is important that the 2 character possibilities go first here for accurate lexing
4621 		    "~=", "*=", "|=", "^=", "$=", "!=", // "::" should be there too for full standard
4622 		    "<<", // my any-parent extension (reciprocal of whitespace)
4623 		    " - ", // previous-sibling extension (whitespace required to disambiguate tag-names)
4624 		    ".", ">", "+", "*", ":", "[", "]", "=", "\"", "#", ",", " ", "~", "<"
4625 		]; // other is white space or a name.
4626 
4627 		///.
4628 		sizediff_t idToken(string str, sizediff_t position) {
4629 			sizediff_t tid = -1;
4630 			char c = str[position];
4631 			foreach(a, token; selectorTokens)
4632 
4633 				if(c == token[0]) {
4634 					if(token.length > 1) {
4635 						if(position + 1 >= str.length   ||   str[position+1] != token[1])
4636 							continue; // not this token
4637 					}
4638 					tid = a;
4639 					break;
4640 				}
4641 			return tid;
4642 		}
4643 	
4644 	///.
4645 	string[] lexSelector(string selector) {
4646 
4647 		// FIXME: it doesn't support quoted attributes
4648 		// FIXME: it doesn't support backslash escaped characters
4649 		// FIXME: it should ignore /* comments */
4650 		string[] tokens;
4651 		sizediff_t start = -1;
4652 		bool skip = false;
4653 		// get rid of useless, non-syntax whitespace
4654 
4655 		selector = selector.strip();
4656 		selector = selector.replace("\n", " "); // FIXME hack
4657 
4658 		selector = selector.replace(" >", ">");
4659 		selector = selector.replace("> ", ">");
4660 		selector = selector.replace(" +", "+");
4661 		selector = selector.replace("+ ", "+");
4662 		selector = selector.replace(" ~", "~");
4663 		selector = selector.replace("~ ", "~");
4664 		selector = selector.replace(" <", "<");
4665 		selector = selector.replace("< ", "<");
4666 			// FIXME: this is ugly ^^^^^. It should just ignore that whitespace somewhere else.
4667 
4668 		// FIXME: another ugly hack. maybe i should just give in and do this the right way......
4669 		string fixupEscaping(string input) {
4670 			auto lol = input.replace("\\", "\u00ff");
4671 			lol = lol.replace("\u00ff\u00ff", "\\");
4672 			return lol.replace("\u00ff", "");
4673 		}
4674 
4675 		bool escaping = false;
4676 		foreach(i, c; selector) { // kill useless leading/trailing whitespace too
4677 			if(skip) {
4678 				skip = false;
4679 				continue;
4680 			}
4681 
4682 			sizediff_t tid = -1;
4683 
4684 			if(escaping)
4685 				escaping = false;
4686 			else if(c == '\\')
4687 				escaping = true;
4688 			else
4689 				tid = idToken(selector, i);
4690 
4691 			if(tid == -1) {
4692 				if(start == -1)
4693 					start = i;
4694 			} else {
4695 				if(start != -1) {
4696 					tokens ~= fixupEscaping(selector[start..i]);
4697 					start = -1;
4698 				}
4699 				tokens ~= selectorTokens[tid];
4700 			}
4701 
4702 			if (tid != -1 && selectorTokens[tid].length == 2)
4703 				skip = true;
4704 		}
4705 		if(start != -1)
4706 			tokens ~= fixupEscaping(selector[start..$]);
4707 
4708 		return tokens;
4709 	}
4710 
4711 	///.
4712 	struct SelectorPart {
4713 		string tagNameFilter; ///.
4714 		string[] attributesPresent; /// [attr]
4715 		string[2][] attributesEqual; /// [attr=value]
4716 		string[2][] attributesStartsWith; /// [attr^=value]
4717 		string[2][] attributesEndsWith; /// [attr$=value]
4718 		// split it on space, then match to these
4719 		string[2][] attributesIncludesSeparatedBySpaces; /// [attr~=value]
4720 		// split it on dash, then match to these
4721 		string[2][] attributesIncludesSeparatedByDashes; /// [attr|=value]
4722 		string[2][] attributesInclude; /// [attr*=value]
4723 		string[2][] attributesNotEqual; /// [attr!=value] -- extension by me
4724 
4725 		bool firstChild; ///.
4726 		bool lastChild; ///.
4727 
4728 		bool emptyElement; ///.
4729 		bool oddChild; ///.
4730 		bool evenChild; ///.
4731 
4732 		bool rootElement; ///.
4733 
4734 		int separation = -1; /// -1 == only itself; the null selector, 0 == tree, 1 == childNodes, 2 == childAfter, 3 == youngerSibling, 4 == parentOf
4735 
4736 		///.
4737 		string toString() {
4738 			string ret;
4739 			switch(separation) {
4740 				default: assert(0);
4741 				case -1: break;
4742 				case 0: ret ~= " "; break;
4743 				case 1: ret ~= ">"; break;
4744 				case 2: ret ~= "+"; break;
4745 				case 3: ret ~= "~"; break;
4746 				case 4: ret ~= "<"; break;
4747 			}
4748 			ret ~= tagNameFilter;
4749 			foreach(a; attributesPresent) ret ~= "[" ~ a ~ "]";
4750 			foreach(a; attributesEqual) ret ~= "[" ~ a[0] ~ "=" ~ a[1] ~ "]";
4751 			foreach(a; attributesEndsWith) ret ~= "[" ~ a[0] ~ "$=" ~ a[1] ~ "]";
4752 			foreach(a; attributesStartsWith) ret ~= "[" ~ a[0] ~ "^=" ~ a[1] ~ "]";
4753 			foreach(a; attributesNotEqual) ret ~= "[" ~ a[0] ~ "!=" ~ a[1] ~ "]";
4754 			foreach(a; attributesInclude) ret ~= "[" ~ a[0] ~ "*=" ~ a[1] ~ "]";
4755 			foreach(a; attributesIncludesSeparatedByDashes) ret ~= "[" ~ a[0] ~ "|=" ~ a[1] ~ "]";
4756 			foreach(a; attributesIncludesSeparatedBySpaces) ret ~= "[" ~ a[0] ~ "~=" ~ a[1] ~ "]";
4757 
4758 			if(firstChild) ret ~= ":first-child";
4759 			if(lastChild) ret ~= ":last-child";
4760 			if(emptyElement) ret ~= ":empty";
4761 			if(oddChild) ret ~= ":odd-child";
4762 			if(evenChild) ret ~= ":even-child";
4763 			if(rootElement) ret ~= ":root";
4764 
4765 			return ret;
4766 		}
4767 
4768 		// USEFUL
4769 		///.
4770 		bool matchElement(Element e) {
4771 			// FIXME: this can be called a lot of times, and really add up in times according to the profiler.
4772 			// Each individual call is reasonably fast already, but it adds up.
4773 			if(e is null) return false;
4774 			if(e.nodeType != 1) return false;
4775 
4776 			if(tagNameFilter != "" && tagNameFilter != "*")
4777 				if(e.tagName != tagNameFilter)
4778 					return false;
4779 			if(firstChild) {
4780 				if(e.parentNode is null)
4781 					return false;
4782 				if(e.parentNode.childElements[0] !is e)
4783 					return false;
4784 			}
4785 			if(lastChild) {
4786 				if(e.parentNode is null)
4787 					return false;
4788 				auto ce = e.parentNode.childElements;
4789 				if(ce[$-1] !is e)
4790 					return false;
4791 			}
4792 			if(emptyElement) {
4793 				if(e.children.length)
4794 					return false;
4795 			}
4796 			if(rootElement) {
4797 				if(e.parentNode !is null)
4798 					return false;
4799 			}
4800 			if(oddChild || evenChild) {
4801 				if(e.parentNode is null)
4802 					return false;
4803 				foreach(i, child; e.parentNode.childElements) {
4804 					if(child is e) {
4805 						if(oddChild && !(i&1))
4806 							return false;
4807 						if(evenChild && (i&1))
4808 							return false;
4809 						break;
4810 					}
4811 				}
4812 			}
4813 
4814 			bool matchWithSeparator(string attr, string value, string separator) {
4815 				foreach(s; attr.split(separator))
4816 					if(s == value)
4817 						return true;
4818 				return false;
4819 			}
4820 
4821 			foreach(a; attributesPresent)
4822 				if(a !in e.attributes)
4823 					return false;
4824 			foreach(a; attributesEqual)
4825 				if(a[0] !in e.attributes || e.attributes[a[0]] != a[1])
4826 					return false;
4827 			foreach(a; attributesNotEqual)
4828 				// FIXME: maybe it should say null counts... this just bit me.
4829 				// I did [attr][attr!=value] to work around.
4830 				//
4831 				// if it's null, it's not equal, right?
4832 				//if(a[0] !in e.attributes || e.attributes[a[0]] == a[1])
4833 				if(e.getAttribute(a[0]) == a[1])
4834 					return false;
4835 			foreach(a; attributesInclude)
4836 				if(a[0] !in e.attributes || (e.attributes[a[0]].indexOf(a[1]) == -1))
4837 					return false;
4838 			foreach(a; attributesStartsWith)
4839 				if(a[0] !in e.attributes || !e.attributes[a[0]].startsWith(a[1]))
4840 					return false;
4841 			foreach(a; attributesEndsWith)
4842 				if(a[0] !in e.attributes || !e.attributes[a[0]].endsWith(a[1]))
4843 					return false;
4844 			foreach(a; attributesIncludesSeparatedBySpaces)
4845 				if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], " "))
4846 					return false;
4847 			foreach(a; attributesIncludesSeparatedByDashes)
4848 				if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], "-"))
4849 					return false;
4850 
4851 			return true;
4852 		}
4853 	}
4854 
4855 	// USEFUL
4856 	///.
4857 	Element[] getElementsBySelectorParts(Element start, SelectorPart[] parts) {
4858 		Element[] ret;
4859 		if(!parts.length) {
4860 			return [start]; // the null selector only matches the start point; it
4861 				// is what terminates the recursion
4862 		}
4863 
4864 		auto part = parts[0];
4865 		switch(part.separation) {
4866 			default: assert(0);
4867 			case -1:
4868 			case 0: // tree
4869 				foreach(e; start.tree) {
4870 					if(part.separation == 0 && start is e)
4871 						continue; // space doesn't match itself!
4872 					if(part.matchElement(e)) {
4873 						ret ~= getElementsBySelectorParts(e, parts[1..$]);
4874 					}
4875 				}
4876 			break;
4877 			case 1: // children
4878 				foreach(e; start.childNodes) {
4879 					if(part.matchElement(e)) {
4880 						ret ~= getElementsBySelectorParts(e, parts[1..$]);
4881 					}
4882 				}
4883 			break;
4884 			case 2: // next-sibling
4885 				auto tmp = start.parentNode;
4886 				if(tmp !is null) {
4887 					sizediff_t pos = -1;
4888 					auto children = tmp.childElements;
4889 					foreach(i, child; children) {
4890 						if(child is start) {
4891 							pos = i;
4892 							break;
4893 						}
4894 					}
4895 					assert(pos != -1);
4896 					if(pos + 1 < children.length) {
4897 						auto e = children[pos+1];
4898 						if(part.matchElement(e))
4899 							ret ~= getElementsBySelectorParts(e, parts[1..$]);
4900 					}
4901 				}
4902 			break;
4903 			case 3: // younger sibling
4904 				auto tmp = start.parentNode;
4905 				if(tmp !is null) {
4906 					sizediff_t pos = -1;
4907 					auto children = tmp.childElements;
4908 					foreach(i, child; children) {
4909 						if(child is start) {
4910 							pos = i;
4911 							break;
4912 						}
4913 					}
4914 					assert(pos != -1);
4915 					foreach(e; children[pos+1..$]) {
4916 						if(part.matchElement(e))
4917 							ret ~= getElementsBySelectorParts(e, parts[1..$]);
4918 					}
4919 				}
4920 			break;
4921 			case 4: // immediate parent node, an extension of mine to walk back up the tree
4922 				auto e = start.parentNode;
4923 				if(part.matchElement(e)) {
4924 					ret ~= getElementsBySelectorParts(e, parts[1..$]);
4925 				}
4926 				/*
4927 					Example of usefulness:
4928 
4929 					Consider you have an HTML table. If you want to get all rows that have a th, you can do:
4930 
4931 					table th < tr
4932 
4933 					Get all th descendants of the table, then walk back up the tree to fetch their parent tr nodes
4934 				*/
4935 			break;
4936 			case 5: // any parent note, another extension of mine to go up the tree (backward of the whitespace operator)
4937 				/*
4938 					Like with the < operator, this is best used to find some parent of a particular known element.
4939 
4940 					Say you have an anchor inside a 
4941 				*/
4942 		}
4943 
4944 		return ret;
4945 	}
4946 
4947 	///.
4948 	struct Selector {
4949 		///.
4950 		SelectorPart[] parts;
4951 
4952 		///.
4953 		string toString() {
4954 			string ret;
4955 			foreach(part; parts)
4956 				ret ~= part.toString();
4957 			return ret;
4958 		}
4959 
4960 		// USEFUL
4961 		///.
4962 		Element[] getElements(Element start) {
4963 			return removeDuplicates(getElementsBySelectorParts(start, parts));
4964 		}
4965 
4966 		// USEFUL (but not implemented)
4967 		/// If relativeTo == null, it assumes the root of the parent document.
4968 		bool matchElement(Element e, Element relativeTo = null) {
4969 			// FIXME
4970 			/+
4971 			Element where = e;
4972 			foreach(part; retro(parts)) {
4973 				if(where is relativeTo)
4974 					return false; // at end of line, if we aren't done by now, the match fails
4975 				if(!part.matchElement(where))
4976 					return false; // didn't match
4977 
4978 				if(part.selection == 1) // the > operator
4979 					where = where.parentNode;
4980 				else if(part.selection == 0) { // generic parent
4981 					// need to go up the whole chain
4982 				}
4983 			}
4984 			+/
4985 			return true; // if we got here, it is a success
4986 		}
4987 
4988 		// the string should NOT have commas. Use parseSelectorString for that instead
4989 		///.
4990 		static Selector fromString(string selector) {
4991 			return parseSelector(lexSelector(selector));
4992 		}
4993 	}
4994 
4995 	///.
4996 	Selector[] parseSelectorString(string selector, bool caseSensitiveTags = true) {
4997 		Selector[] ret;
4998 		foreach(s; selector.split(",")) {
4999 			ret ~= parseSelector(lexSelector(s), caseSensitiveTags);
5000 		}
5001 
5002 		return ret;
5003 	}
5004 
5005 	///.
5006 	Selector parseSelector(string[] tokens, bool caseSensitiveTags = true) {
5007 		Selector s;
5008 
5009 		SelectorPart current;
5010 		void commit() {
5011 			// might as well skip null items
5012 			if(current != current.init) {
5013 				s.parts ~= current;
5014 
5015 				current = current.init; // start right over
5016 			}
5017 		}
5018 		enum State {
5019 			Starting,
5020 			ReadingClass,
5021 			ReadingId,
5022 			ReadingAttributeSelector,
5023 			ReadingAttributeComparison,
5024 			ExpectingAttributeCloser,
5025 			ReadingPseudoClass,
5026 			ReadingAttributeValue
5027 		}
5028 		State state = State.Starting;
5029 		string attributeName, attributeValue, attributeComparison;
5030 		foreach(token; tokens) {
5031 			sizediff_t tid = -1;
5032 			foreach(i, item; selectorTokens)
5033 				if(token == item) {
5034 					tid = i;
5035 					break;
5036 				}
5037 			final switch(state) {
5038 				case State.Starting: // fresh, might be reading an operator or a tagname
5039 					if(tid == -1) {
5040 						if(!caseSensitiveTags)
5041 							token = token.toLower();
5042 						current.tagNameFilter = token;
5043 					} else {
5044 						// Selector operators
5045 						switch(token) {
5046 							case "*":
5047 								current.tagNameFilter = "*";
5048 							break;
5049 							case " ":
5050 								commit();
5051 								current.separation = 0; // tree
5052 							break;
5053 							case ">":
5054 								commit();
5055 								current.separation = 1; // child
5056 							break;
5057 							case "+":
5058 								commit();
5059 								current.separation = 2; // sibling directly after
5060 							break;
5061 							case "~":
5062 								commit();
5063 								current.separation = 3; // any sibling after
5064 							break;
5065 							case "<":
5066 								commit();
5067 								current.separation = 4; // immediate parent of
5068 							break;
5069 							case "[":
5070 								state = State.ReadingAttributeSelector;
5071 							break;
5072 							case ".":
5073 								state = State.ReadingClass;
5074 							break;
5075 							case "#":
5076 								state = State.ReadingId;
5077 							break;
5078 							case ":":
5079 								state = State.ReadingPseudoClass;
5080 							break;
5081 
5082 							default:
5083 								assert(0, token);
5084 						}
5085 					}
5086 				break;
5087 				case State.ReadingClass:
5088 					current.attributesIncludesSeparatedBySpaces ~= ["class", token];
5089 					state = State.Starting;
5090 				break;
5091 				case State.ReadingId:
5092 					current.attributesEqual ~= ["id", token];
5093 					state = State.Starting;
5094 				break;
5095 				case State.ReadingPseudoClass:
5096 					switch(token) {
5097 						case "first-child":
5098 							current.firstChild = true;
5099 						break;
5100 						case "last-child":
5101 							current.lastChild = true;
5102 						break;
5103 						case "only-child":
5104 							current.firstChild = true;
5105 							current.lastChild = true;
5106 						break;
5107 						case "empty":
5108 							// one with no children
5109 							current.emptyElement = true;
5110 						break;
5111 						case "link":
5112 							current.attributesPresent ~= "href";
5113 						break;
5114 						case "root":
5115 							current.rootElement = true;
5116 						break;
5117 						// FIXME: add :not()
5118 						// My extensions
5119 						case "odd-child":
5120 							current.oddChild = true;
5121 						break;
5122 						case "even-child":
5123 							current.evenChild = true;
5124 						break;
5125 
5126 						case "visited", "active", "hover", "target", "focus", "checked", "selected":
5127 							current.attributesPresent ~= "nothing";
5128 							// FIXME
5129 						/*
5130 						// defined in the standard, but I don't implement it
5131 						case "not":
5132 						*/
5133 						/+
5134 						// extensions not implemented
5135 						//case "text": // takes the text in the element and wraps it in an element, returning it
5136 						+/
5137 							goto case;
5138 						case "before", "after":
5139 							current.attributesPresent ~= "FIXME";
5140 
5141 						break;
5142 						default:
5143 							//if(token.indexOf("lang") == -1)
5144 							//assert(0, token);
5145 						break;
5146 					}
5147 					state = State.Starting;
5148 				break;
5149 				case State.ReadingAttributeSelector:
5150 					attributeName = token;
5151 					attributeComparison = null;
5152 					attributeValue = null;
5153 					state = State.ReadingAttributeComparison;
5154 				break;
5155 				case State.ReadingAttributeComparison:
5156 					// FIXME: these things really should be quotable in the proper lexer...
5157 					if(token != "]") {
5158 						if(token.indexOf("=") == -1) {
5159 							// not a comparison; consider it
5160 							// part of the attribute
5161 							attributeValue ~= token;
5162 						} else {
5163 							attributeComparison = token;
5164 							state = State.ReadingAttributeValue;
5165 						}
5166 						break;
5167 					}
5168 					goto case;
5169 				case State.ExpectingAttributeCloser:
5170 					if(token != "]") {
5171 						// not the closer; consider it part of comparison
5172 						if(attributeComparison == "")
5173 							attributeName ~= token;
5174 						else
5175 							attributeValue ~= token;
5176 						break;
5177 					}
5178 
5179 					// FIXME: HACK this chops off quotes from the outside for the comparison
5180 					// for compatibility with real CSS. The lexer should be properly fixed, though.
5181 					// FIXME: when the lexer is fixed, remove this lest you break it moar.
5182 					if(attributeValue.length > 2 && attributeValue[0] == '"' && attributeValue[$-1] == '"')
5183 						attributeValue = attributeValue[1 .. $-1];
5184 
5185 					// Selector operators
5186 					switch(attributeComparison) {
5187 						default: assert(0);
5188 						case "":
5189 							current.attributesPresent ~= attributeName;
5190 						break;
5191 						case "=":
5192 							current.attributesEqual ~= [attributeName, attributeValue];
5193 						break;
5194 						case "|=":
5195 							current.attributesIncludesSeparatedByDashes ~= [attributeName, attributeValue];
5196 						break;
5197 						case "~=":
5198 							current.attributesIncludesSeparatedBySpaces ~= [attributeName, attributeValue];
5199 						break;
5200 						case "$=":
5201 							current.attributesEndsWith ~= [attributeName, attributeValue];
5202 						break;
5203 						case "^=":
5204 							current.attributesStartsWith ~= [attributeName, attributeValue];
5205 						break;
5206 						case "*=":
5207 							current.attributesInclude ~= [attributeName, attributeValue];
5208 						break;
5209 						case "!=":
5210 							current.attributesNotEqual ~= [attributeName, attributeValue];
5211 						break;
5212 					}
5213 
5214 					state = State.Starting;
5215 				break;
5216 				case State.ReadingAttributeValue:
5217 					attributeValue = token;
5218 					state = State.ExpectingAttributeCloser;
5219 				break;
5220 			}
5221 		}
5222 
5223 		commit();
5224 
5225 		return s;
5226 	}
5227 
5228 ///.
5229 Element[] removeDuplicates(Element[] input) {
5230 	Element[] ret;
5231 
5232 	bool[Element] already;
5233 	foreach(e; input) {
5234 		if(e in already) continue;
5235 		already[e] = true;
5236 		ret ~= e;
5237 	}
5238 
5239 	return ret;
5240 }
5241 
5242 // done with CSS selector handling
5243 
5244 
5245 // FIXME: use the better parser from html.d
5246 /// This is probably not useful to you unless you're writing a browser or something like that.
5247 /// It represents a *computed* style, like what the browser gives you after applying stylesheets, inline styles, and html attributes.
5248 /// From here, you can start to make a layout engine for the box model and have a css aware browser.
5249 class CssStyle {
5250 	///.
5251 	this(string rule, string content) {
5252 		rule = rule.strip();
5253 		content = content.strip();
5254 
5255 		if(content.length == 0)
5256 			return;
5257 
5258 		originatingRule = rule;
5259 		originatingSpecificity = getSpecificityOfRule(rule); // FIXME: if there's commas, this won't actually work!
5260 
5261 		foreach(part; content.split(";")) {
5262 			part = part.strip();
5263 			if(part.length == 0)
5264 				continue;
5265 			auto idx = part.indexOf(":");
5266 			if(idx == -1)
5267 				continue;
5268 				//throw new Exception("Bad css rule (no colon): " ~ part);
5269 
5270 			Property p;
5271 
5272 			p.name = part[0 .. idx].strip();
5273 			p.value = part[idx + 1 .. $].replace("! important", "!important").replace("!important", "").strip(); // FIXME don't drop important
5274 			p.givenExplicitly = true;
5275 			p.specificity = originatingSpecificity;
5276 
5277 			properties ~= p;
5278 		}
5279 
5280 		foreach(property; properties)
5281 			expandShortForm(property, originatingSpecificity);
5282 	}
5283 
5284 	///.
5285 	Specificity getSpecificityOfRule(string rule) {
5286 		Specificity s;
5287 		if(rule.length == 0) { // inline
5288 		//	s.important = 2;
5289 		} else {
5290 			// FIXME
5291 		}
5292 
5293 		return s;
5294 	}
5295 
5296 	string originatingRule; ///.
5297 	Specificity originatingSpecificity; ///.
5298 
5299 	///.
5300 	union Specificity {
5301 		uint score; ///.
5302 		// version(little_endian)
5303 		///.
5304 		struct {
5305 			ubyte tags; ///.
5306 			ubyte classes; ///.
5307 			ubyte ids; ///.
5308 			ubyte important; /// 0 = none, 1 = stylesheet author, 2 = inline style, 3 = user important
5309 		}
5310 	}
5311 
5312 	///.
5313 	struct Property {
5314 		bool givenExplicitly; /// this is false if for example the user said "padding" and this is "padding-left"
5315 		string name; ///.
5316 		string value; ///.
5317 		Specificity specificity; ///.
5318 		// do we care about the original source rule?
5319 	}
5320 
5321 	///.
5322 	Property[] properties;
5323 
5324 	///.
5325 	string opDispatch(string nameGiven)(string value = null) if(nameGiven != "popFront") {
5326 		string name = unCamelCase(nameGiven);
5327 		if(value is null)
5328 			return getValue(name);
5329 		else
5330 			return setValue(name, value, 0x02000000 /* inline specificity */);
5331 	}
5332 
5333 	/// takes dash style name
5334 	string getValue(string name) {
5335 		foreach(property; properties)
5336 			if(property.name == name)
5337 				return property.value;
5338 		return null;
5339 	}
5340 
5341 	/// takes dash style name
5342 	string setValue(string name, string value, Specificity newSpecificity, bool explicit = true) {
5343 		value = value.replace("! important", "!important");
5344 		if(value.indexOf("!important") != -1) {
5345 			newSpecificity.important = 1; // FIXME
5346 			value = value.replace("!important", "").strip();
5347 		}
5348 
5349 		foreach(ref property; properties)
5350 			if(property.name == name) {
5351 				if(newSpecificity.score >= property.specificity.score) {
5352 					property.givenExplicitly = explicit;
5353 					expandShortForm(property, newSpecificity);
5354 					return (property.value = value);
5355 				} else {
5356 					if(name == "display")
5357 					{}//writeln("Not setting ", name, " to ", value, " because ", newSpecificity.score, " < ", property.specificity.score);
5358 					return value; // do nothing - the specificity is too low
5359 				}
5360 			}
5361 
5362 		// it's not here...
5363 
5364 		Property p;
5365 		p.givenExplicitly = true;
5366 		p.name = name;
5367 		p.value = value;
5368 		p.specificity = originatingSpecificity;
5369 
5370 		properties ~= p;
5371 		expandShortForm(p, originatingSpecificity);
5372 
5373 		return value;
5374 	}
5375 
5376 	private void expandQuadShort(string name, string value, Specificity specificity) {
5377 		auto parts = value.split(" ");
5378 		switch(parts.length) {
5379 			case 1:
5380 				setValue(name ~"-left", parts[0], specificity, false);
5381 				setValue(name ~"-right", parts[0], specificity, false);
5382 				setValue(name ~"-top", parts[0], specificity, false);
5383 				setValue(name ~"-bottom", parts[0], specificity, false);
5384 			break;
5385 			case 2:
5386 				setValue(name ~"-left", parts[1], specificity, false);
5387 				setValue(name ~"-right", parts[1], specificity, false);
5388 				setValue(name ~"-top", parts[0], specificity, false);
5389 				setValue(name ~"-bottom", parts[0], specificity, false);
5390 			break;
5391 			case 3:
5392 				setValue(name ~"-top", parts[0], specificity, false);
5393 				setValue(name ~"-right", parts[1], specificity, false);
5394 				setValue(name ~"-bottom", parts[2], specificity, false);
5395 				setValue(name ~"-left", parts[2], specificity, false);
5396 
5397 			break;
5398 			case 4:
5399 				setValue(name ~"-top", parts[0], specificity, false);
5400 				setValue(name ~"-right", parts[1], specificity, false);
5401 				setValue(name ~"-bottom", parts[2], specificity, false);
5402 				setValue(name ~"-left", parts[3], specificity, false);
5403 			break;
5404 			default:
5405 				assert(0, value);
5406 		}
5407 	}
5408 
5409 	///.
5410 	void expandShortForm(Property p, Specificity specificity) {
5411 		switch(p.name) {
5412 			case "margin":
5413 			case "padding":
5414 				expandQuadShort(p.name, p.value, specificity);
5415 			break;
5416 			case "border":
5417 			case "outline":
5418 				setValue(p.name ~ "-left", p.value, specificity, false);
5419 				setValue(p.name ~ "-right", p.value, specificity, false);
5420 				setValue(p.name ~ "-top", p.value, specificity, false);
5421 				setValue(p.name ~ "-bottom", p.value, specificity, false);
5422 			break;
5423 
5424 			case "border-top":
5425 			case "border-bottom":
5426 			case "border-left":
5427 			case "border-right":
5428 			case "outline-top":
5429 			case "outline-bottom":
5430 			case "outline-left":
5431 			case "outline-right":
5432 
5433 			default: {}
5434 		}
5435 	}
5436 
5437 	///.
5438 	override string toString() {
5439 		string ret;
5440 		if(originatingRule.length)
5441 			ret = originatingRule ~ " {";
5442 
5443 		foreach(property; properties) {
5444 			if(!property.givenExplicitly)
5445 				continue; // skip the inferred shit
5446 
5447 			if(originatingRule.length)
5448 				ret ~= "\n\t";
5449 			else
5450 				ret ~= " ";
5451 
5452 			ret ~= property.name ~ ": " ~ property.value ~ ";";
5453 		}
5454 
5455 		if(originatingRule.length)
5456 			ret ~= "\n}\n";
5457 
5458 		return ret;
5459 	}
5460 }
5461 
5462 string cssUrl(string url) {
5463 	return "url(\"" ~ url ~ "\")";
5464 }
5465 
5466 /// This probably isn't useful, unless you're writing a browser or something like that.
5467 /// You might want to look at arsd.html for css macro, nesting, etc., or just use standard css
5468 /// as text.
5469 ///
5470 /// The idea, however, is to represent a kind of CSS object model, complete with specificity,
5471 /// that you can apply to your documents to build the complete computedStyle object.
5472 class StyleSheet {
5473 	///.
5474 	CssStyle[] rules;
5475 
5476 	///.
5477 	this(string source) {
5478 		// FIXME: handle @ rules and probably could improve lexer
5479 		// add nesting?
5480 		int state;
5481 		string currentRule;
5482 		string currentValue;
5483 
5484 		string* currentThing = &currentRule;
5485 		foreach(c; source) {
5486 			handle: switch(state) {
5487 				default: assert(0);
5488 				case 0: // starting - we assume we're reading a rule
5489 					switch(c) {
5490 						case '@':
5491 							state = 4;
5492 						break;
5493 						case '/':
5494 							state = 1;
5495 						break;
5496 						case '{':
5497 							currentThing = &currentValue;
5498 						break;
5499 						case '}':
5500 							if(currentThing is &currentValue) {
5501 								rules ~= new CssStyle(currentRule, currentValue);
5502 
5503 								currentRule = "";
5504 								currentValue = "";
5505 
5506 								currentThing = &currentRule;
5507 							} else {
5508 								// idk what is going on here.
5509 								// check sveit.com to reproduce
5510 								currentRule = "";
5511 								currentValue = "";
5512 							}
5513 						break;
5514 						default:
5515 							(*currentThing) ~= c;
5516 					}
5517 				break;
5518 				case 1: // expecting *
5519 					if(c == '*')
5520 						state = 2;
5521 					else {
5522 						state = 0;
5523 						(*currentThing) ~= "/" ~ c;
5524 					}
5525 				break;
5526 				case 2: // inside comment
5527 					if(c == '*')
5528 						state = 3;
5529 				break;
5530 				case 3: // expecting / to end comment
5531 					if(c == '/')
5532 						state = 0;
5533 					else
5534 						state = 2; // it's just a comment so no need to append
5535 				break;
5536 				case 4:
5537 					if(c == '{')
5538 						state = 5;
5539 					if(c == ';')
5540 						state = 0; // just skipping import
5541 				break;
5542 				case 5:
5543 					if(c == '}')
5544 						state = 0; // skipping font face probably
5545 			}
5546 		}
5547 	}
5548 
5549 	/// Run through the document and apply this stylesheet to it. The computedStyle member will be accurate after this call
5550 	void apply(Document document) {
5551 		foreach(rule; rules) {
5552 			if(rule.originatingRule.length == 0)
5553 				continue; // this shouldn't happen here in a stylesheet
5554 			foreach(element; document.querySelectorAll(rule.originatingRule)) {
5555 				// note: this should be a different object than the inline style
5556 				// since givenExplicitly is likely destroyed here
5557 				auto current = element.computedStyle;
5558 
5559 				foreach(item; rule.properties)
5560 					current.setValue(item.name, item.value, item.specificity);
5561 			}
5562 		}
5563 	}
5564 }
5565 
5566 
5567 /// This is kinda private; just a little utility container for use by the ElementStream class.
5568 final class Stack(T) {
5569 	this() {
5570 		internalLength = 0;
5571 		arr = initialBuffer[];
5572 	}
5573 
5574 	///.
5575 	void push(T t) {
5576 		if(internalLength >= arr.length) {
5577 			auto oldarr = arr;
5578 			if(arr.length < 4096)
5579 				arr = new T[arr.length * 2];
5580 			else
5581 				arr = new T[arr.length + 4096];
5582 			arr[0 .. oldarr.length] = oldarr[];
5583 		}
5584 
5585 		arr[internalLength] = t;
5586 		internalLength++;
5587 	}
5588 
5589 	///.
5590 	T pop() {
5591 		assert(internalLength);
5592 		internalLength--;
5593 		return arr[internalLength];
5594 	}
5595 
5596 	///.
5597 	T peek() {
5598 		assert(internalLength);
5599 		return arr[internalLength - 1];
5600 	}
5601 
5602 	///.
5603 	@property bool empty() {
5604 		return internalLength ? false : true;
5605 	}
5606 
5607 	///.
5608 	private T[] arr;
5609 	private size_t internalLength;
5610 	private T[64] initialBuffer;
5611 	// the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep),
5612 	// using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push()
5613 	// function thanks to this, and push() was actually one of the slowest individual functions in the code!
5614 }
5615 
5616 /// This is the lazy range that walks the tree for you. It tries to go in the lexical order of the source: node, then children from first to last, each recursively.
5617 final class ElementStream {
5618 
5619 	///.
5620 	@property Element front() {
5621 		return current.element;
5622 	}
5623 
5624 	/// Use Element.tree instead.
5625 	this(Element start) {
5626 		current.element = start;
5627 		current.childPosition = -1;
5628 		isEmpty = false;
5629 		stack = new Stack!(Current);
5630 	}
5631 
5632 	/*
5633 		Handle it
5634 		handle its children
5635 
5636 	*/
5637 
5638 	///.
5639 	void popFront() {
5640 	    more:
5641 	    	if(isEmpty) return;
5642 
5643 		// FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times)
5644 
5645 		current.childPosition++;
5646 		if(current.childPosition >= current.element.children.length) {
5647 			if(stack.empty())
5648 				isEmpty = true;
5649 			else {
5650 				current = stack.pop();
5651 				goto more;
5652 			}
5653 		} else {
5654 			stack.push(current);
5655 			current.element = current.element.children[current.childPosition];
5656 			current.childPosition = -1;
5657 		}
5658 	}
5659 
5660 	/// You should call this when you remove an element from the tree. It then doesn't recurse into that node and adjusts the current position, keeping the range stable.
5661 	void currentKilled() {
5662 		if(stack.empty) // should never happen
5663 			isEmpty = true;
5664 		else {
5665 			current = stack.pop();
5666 			current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right
5667 		}
5668 	}
5669 
5670 	///.
5671 	@property bool empty() {
5672 		return isEmpty;
5673 	}
5674 
5675 	private:
5676 
5677 	struct Current {
5678 		Element element;
5679 		int childPosition;
5680 	}
5681 
5682 	Current current;
5683 
5684 	Stack!(Current) stack;
5685 
5686 	bool isEmpty;
5687 }
5688 
5689 
5690 
5691 // unbelievable.
5692 // Don't use any of these in your own code. Instead, try to use phobos or roll your own, as I might kill these at any time.
5693 sizediff_t indexOfBytes(immutable(ubyte)[] haystack, immutable(ubyte)[] needle) {
5694 	auto found = std.algorithm.find(haystack, needle);
5695 	if(found.length == 0)
5696 		return -1;
5697 	return haystack.length - found.length;
5698 }
5699 
5700 private T[] insertAfter(T)(T[] arr, int position, T[] what) {
5701 	assert(position < arr.length);
5702 	T[] ret;
5703 	ret.length = arr.length + what.length;
5704 	int a = 0;
5705 	foreach(i; arr[0..position+1])
5706 		ret[a++] = i;
5707 	
5708 	foreach(i; what)
5709 		ret[a++] = i;
5710 
5711 	foreach(i; arr[position+1..$])
5712 		ret[a++] = i;
5713 
5714 	return ret;
5715 }
5716 
5717 package bool isInArray(T)(T item, T[] arr) {
5718 	foreach(i; arr)
5719 		if(item == i)
5720 			return true;
5721 	return false;
5722 }
5723 
5724 private string[string] dup(in string[string] arr) {
5725 	string[string] ret;
5726 	foreach(k, v; arr)
5727 		ret[k] = v;
5728 	return ret;
5729 }
5730 
5731 // dom event support, if you want to use it
5732 
5733 /// used for DOM events
5734 alias void delegate(Element handlerAttachedTo, Event event) EventHandler;
5735 
5736 /// This is a DOM event, like in javascript. Note that this library never fires events - it is only here for you to use if you want it.
5737 class Event {
5738 	this(string eventName, Element target) {
5739 		this.eventName = eventName;
5740 		this.srcElement = target;
5741 	}
5742 
5743 	/// Prevents the default event handler (if there is one) from being called
5744 	void preventDefault() {
5745 		defaultPrevented = true;
5746 	}
5747 
5748 	/// Stops the event propagation immediately.
5749 	void stopPropagation() {
5750 		propagationStopped = true;
5751 	}
5752 
5753 	bool defaultPrevented;
5754 	bool propagationStopped;
5755 	string eventName;
5756 
5757 	Element srcElement;
5758 	alias srcElement target;
5759 
5760 	Element relatedTarget;
5761 
5762 	int clientX;
5763 	int clientY;
5764 
5765 	int button;
5766 
5767 	bool isBubbling;
5768 
5769 	/// this sends it only to the target. If you want propagation, use dispatch() instead.
5770 	void send() {
5771 		if(srcElement is null)
5772 			return;
5773 
5774 		auto e = srcElement;
5775 
5776 		if(eventName in e.bubblingEventHandlers)
5777 		foreach(handler; e.bubblingEventHandlers[eventName])
5778 			handler(e, this);
5779 
5780 		if(!defaultPrevented)
5781 			if(eventName in e.defaultEventHandlers)
5782 				e.defaultEventHandlers[eventName](e, this);
5783 	}
5784 
5785 	/// this dispatches the element using the capture -> target -> bubble process
5786 	void dispatch() {
5787 		if(srcElement is null)
5788 			return;
5789 
5790 		// first capture, then bubble
5791 
5792 		Element[] chain;
5793 		Element curr = srcElement;
5794 		while(curr) {
5795 			auto l = curr;
5796 			chain ~= l;
5797 			curr = curr.parentNode;
5798 
5799 		}
5800 
5801 		isBubbling = false;
5802 
5803 		foreach(e; chain.retro()) {
5804 			if(eventName in e.capturingEventHandlers)
5805 			foreach(handler; e.capturingEventHandlers[eventName])
5806 				handler(e, this);
5807 
5808 			// the default on capture should really be to always do nothing
5809 
5810 			//if(!defaultPrevented)
5811 			//	if(eventName in e.defaultEventHandlers)
5812 			//		e.defaultEventHandlers[eventName](e.element, this);
5813 
5814 			if(propagationStopped)
5815 				break;
5816 		}
5817 
5818 		isBubbling = true;
5819 		if(!propagationStopped)
5820 		foreach(e; chain) {
5821 			if(eventName in e.bubblingEventHandlers)
5822 			foreach(handler; e.bubblingEventHandlers[eventName])
5823 				handler(e, this);
5824 
5825 			if(!defaultPrevented)
5826 				if(eventName in e.defaultEventHandlers)
5827 					e.defaultEventHandlers[eventName](e, this);
5828 
5829 			if(propagationStopped)
5830 				break;
5831 		}
5832 	}
5833 }
5834 
5835 struct FormFieldOptions {
5836 	// usable for any
5837 
5838 	/// this is a regex pattern used to validate the field
5839 	string pattern;
5840 	/// must the field be filled in? Even with a regex, it can be submitted blank if this is false.
5841 	bool isRequired;
5842 	/// this is displayed as an example to the user
5843 	string placeholder;
5844 
5845 	// usable for numeric ones
5846 
5847 
5848 	// convenience methods to quickly get some options
5849 	@property static FormFieldOptions none() {
5850 		FormFieldOptions f;
5851 		return f;
5852 	}
5853 
5854 	static FormFieldOptions required() {
5855 		FormFieldOptions f;
5856 		f.isRequired = true;
5857 		return f;
5858 	}
5859 
5860 	static FormFieldOptions regex(string pattern, bool required = false) {
5861 		FormFieldOptions f;
5862 		f.pattern = pattern;
5863 		f.isRequired = required;
5864 		return f;
5865 	}
5866 
5867 	static FormFieldOptions fromElement(Element e) {
5868 		FormFieldOptions f;
5869 		if(e.hasAttribute("required"))
5870 			f.isRequired = true;
5871 		if(e.hasAttribute("pattern"))
5872 			f.pattern = e.pattern;
5873 		if(e.hasAttribute("placeholder"))
5874 			f.placeholder = e.placeholder;
5875 		return f;
5876 	}
5877 
5878 	Element applyToElement(Element e) {
5879 		if(this.isRequired)
5880 			e.required = "required";
5881 		if(this.pattern.length)
5882 			e.pattern = this.pattern;
5883 		if(this.placeholder.length)
5884 			e.placeholder = this.placeholder;
5885 		return e;
5886 	}
5887 }
5888 
5889 // this needs to look just like a string, but can expand as needed
5890 version(no_dom_stream)
5891 alias string Utf8Stream;
5892 else
5893 class Utf8Stream {
5894 	protected:
5895 		// these two should be overridden in subclasses to actually do the stream magic
5896 		string getMore() {
5897 			if(getMoreHelper !is null)
5898 				return getMoreHelper();
5899 			return null;
5900 		}
5901 
5902 		bool hasMore() {
5903 			if(hasMoreHelper !is null)
5904 				return hasMoreHelper();
5905 			return false;
5906 		}
5907 		// the rest should be ok
5908 
5909 	public:
5910 		this(string d) {
5911 			this.data = d;
5912 		}
5913 
5914 		this(string delegate() getMoreHelper, bool delegate() hasMoreHelper) {
5915 			this.getMoreHelper = getMoreHelper;
5916 			this.hasMoreHelper = hasMoreHelper;
5917 
5918 			if(hasMore())
5919 				this.data ~= getMore();
5920 
5921 			stdout.flush();
5922 		}
5923 
5924 		@property final size_t length() {
5925 			// the parser checks length primarily directly before accessing the next character
5926 			// so this is the place we'll hook to append more if possible and needed.
5927 			if(lastIdx + 1 >= data.length && hasMore()) {
5928 				data ~= getMore();
5929 			}
5930 			return data.length;
5931 		}
5932 
5933 		final char opIndex(size_t idx) {
5934 			if(idx > lastIdx)
5935 				lastIdx = idx;
5936 			return data[idx];
5937 		}
5938 
5939 		final string opSlice(size_t start, size_t end) {
5940 			if(end > lastIdx)
5941 				lastIdx = end;
5942 			return data[start .. end];
5943 		}
5944 
5945 		final size_t opDollar() {
5946 			return length();
5947 		}
5948 
5949 		final Utf8Stream opBinary(string op : "~")(string s) {
5950 			this.data ~= s;
5951 			return this;
5952 		}
5953 
5954 		final Utf8Stream opOpAssign(string op : "~")(string s) {
5955 			this.data ~= s;
5956 			return this;
5957 		}
5958 
5959 		final Utf8Stream opAssign(string rhs) {
5960 			this.data = rhs;
5961 			return this;
5962 		}
5963 	private:
5964 		string data;
5965 
5966 		size_t lastIdx;
5967 
5968 		bool delegate() hasMoreHelper;
5969 		string delegate() getMoreHelper;
5970 
5971 
5972 		/+
5973 		// used to maybe clear some old stuff
5974 		// you might have to remove elements parsed with it too since they can hold slices into the
5975 		// old stuff, preventing gc
5976 		void dropFront(int bytes) {
5977 			posAdjustment += bytes;
5978 			data = data[bytes .. $];
5979 		}
5980 
5981 		int posAdjustment;
5982 		+/
5983 }
5984 
5985 /*
5986 Copyright: Adam D. Ruppe, 2010 - 2013
5987 License:   <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>.
5988 Authors: Adam D. Ruppe, with contributions by Nick Sabalausky and Trass3r
5989 
5990         Copyright Adam D. Ruppe 2010-2013.
5991 Distributed under the Boost Software License, Version 1.0.
5992    (See accompanying file LICENSE_1_0.txt or copy at
5993         http://www.boost.org/LICENSE_1_0.txt)
5994 */