1 /** 2 This is an html DOM implementation, started with cloning 3 what the browser offers in Javascript, but going well beyond 4 it in convenience. 5 6 If you can do it in Javascript, you can probably do it with 7 this module. 8 9 And much more. 10 11 12 Note: some of the documentation here writes html with added 13 spaces. That's because ddoc doesn't bother encoding html output, 14 and adding spaces is easier than using LT macros everywhere. 15 16 17 BTW: this file depends on arsd.characterencodings, so help it 18 correctly read files from the internet. You should be able to 19 get characterencodings.d from the same place you got this file. 20 */ 21 module arsd.dom; 22 23 // FIXME: might be worth doing Element.attrs and taking opDispatch off that 24 // so more UFCS works. 25 26 27 // FIXME: something like <ol>spam <ol> with no closing </ol> should read the second tag as the closer in garbage mode 28 // FIXME: failing to close a paragraph sometimes messes things up too 29 30 // FIXME: it would be kinda cool to have some support for internal DTDs 31 // and maybe XPath as well, to some extent 32 /* 33 we could do 34 meh this sux 35 36 auto xpath = XPath(element); 37 38 // get the first p 39 xpath.p[0].a["href"] 40 */ 41 42 // public import arsd.domconvenience; // merged for now 43 44 /* domconvenience follows { */ 45 46 47 import std.string; 48 import std.container; 49 50 // the reason this is separated is so I can plug it into D->JS as well, which uses a different base Element class 51 52 import arsd.dom; 53 54 mixin template DomConvenienceFunctions() { 55 56 /// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done. 57 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 58 if( 59 is(SomeElementType : Element) 60 ) 61 out(ret) { 62 assert(ret !is null); 63 } 64 body { 65 auto e = cast(SomeElementType) getElementById(id); 66 if(e is null) 67 throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id, file, line); 68 return e; 69 } 70 71 /// ditto but with selectors instead of ids 72 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 73 if( 74 is(SomeElementType : Element) 75 ) 76 out(ret) { 77 assert(ret !is null); 78 } 79 body { 80 auto e = cast(SomeElementType) querySelector(selector); 81 if(e is null) 82 throw new ElementNotFoundException(SomeElementType.stringof, selector, file, line); 83 return e; 84 } 85 86 87 88 89 /// get all the classes on this element 90 @property string[] classes() { 91 return split(className, " "); 92 } 93 94 /// Adds a string to the class attribute. The class attribute is used a lot in CSS. 95 Element addClass(string c) { 96 if(hasClass(c)) 97 return this; // don't add it twice 98 99 string cn = getAttribute("class"); 100 if(cn.length == 0) { 101 setAttribute("class", c); 102 return this; 103 } else { 104 setAttribute("class", cn ~ " " ~ c); 105 } 106 107 return this; 108 } 109 110 /// Removes a particular class name. 111 Element removeClass(string c) { 112 if(!hasClass(c)) 113 return this; 114 string n; 115 foreach(name; classes) { 116 if(c == name) 117 continue; // cut it out 118 if(n.length) 119 n ~= " "; 120 n ~= name; 121 } 122 123 className = n.strip(); 124 125 return this; 126 } 127 128 /// Returns whether the given class appears in this element. 129 bool hasClass(string c) { 130 auto cn = className; 131 132 auto idx = cn.indexOf(c); 133 if(idx == -1) 134 return false; 135 136 foreach(cla; cn.split(" ")) 137 if(cla == c) 138 return true; 139 return false; 140 141 /* 142 int rightSide = idx + c.length; 143 144 bool checkRight() { 145 if(rightSide == cn.length) 146 return true; // it's the only class 147 else if(iswhite(cn[rightSide])) 148 return true; 149 return false; // this is a substring of something else.. 150 } 151 152 if(idx == 0) { 153 return checkRight(); 154 } else { 155 if(!iswhite(cn[idx - 1])) 156 return false; // substring 157 return checkRight(); 158 } 159 160 assert(0); 161 */ 162 } 163 164 165 /* ******************************* 166 DOM Mutation 167 *********************************/ 168 169 /// Removes all inner content from the tag; all child text and elements are gone. 170 void removeAllChildren() 171 out { 172 assert(this.children.length == 0); 173 } 174 body { 175 children = null; 176 } 177 /// convenience function to quickly add a tag with some text or 178 /// other relevant info (for example, it's a src for an <img> element 179 /// instead of inner text) 180 Element addChild(string tagName, string childInfo = null, string childInfo2 = null) 181 in { 182 assert(tagName !is null); 183 } 184 out(e) { 185 assert(e.parentNode is this); 186 assert(e.parentDocument is this.parentDocument); 187 } 188 body { 189 auto e = Element.make(tagName, childInfo, childInfo2); 190 // FIXME (maybe): if the thing is self closed, we might want to go ahead and 191 // return the parent. That will break existing code though. 192 return appendChild(e); 193 } 194 195 /// Another convenience function. Adds a child directly after the current one, returning 196 /// the new child. 197 /// 198 /// Between this, addChild, and parentNode, you can build a tree as a single expression. 199 Element addSibling(string tagName, string childInfo = null, string childInfo2 = null) 200 in { 201 assert(tagName !is null); 202 assert(parentNode !is null); 203 } 204 out(e) { 205 assert(e.parentNode is this.parentNode); 206 assert(e.parentDocument is this.parentDocument); 207 } 208 body { 209 auto e = Element.make(tagName, childInfo, childInfo2); 210 return parentNode.insertAfter(this, e); 211 } 212 213 Element addSibling(Element e) { 214 return parentNode.insertAfter(this, e); 215 } 216 217 Element addChild(Element e) { 218 return this.appendChild(e); 219 } 220 221 /// Convenience function to append text intermixed with other children. 222 /// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), "."); 223 /// or div.addChildren("Hello, ", user.name, "!"); 224 225 /// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping. 226 void addChildren(T...)(T t) { 227 foreach(item; t) { 228 static if(is(item : Element)) 229 appendChild(item); 230 else static if (is(isSomeString!(item))) 231 appendText(to!string(item)); 232 else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren"); 233 } 234 } 235 236 ///. 237 Element addChild(string tagName, Element firstChild, string info2 = null) 238 in { 239 assert(firstChild !is null); 240 } 241 out(ret) { 242 assert(ret !is null); 243 assert(ret.parentNode is this); 244 assert(firstChild.parentNode is ret); 245 246 assert(ret.parentDocument is this.parentDocument); 247 //assert(firstChild.parentDocument is this.parentDocument); 248 } 249 body { 250 auto e = Element.make(tagName, "", info2); 251 e.appendChild(firstChild); 252 this.appendChild(e); 253 return e; 254 } 255 256 Element addChild(string tagName, in Html innerHtml, string info2 = null) 257 in { 258 } 259 out(ret) { 260 assert(ret !is null); 261 assert(ret.parentNode is this); 262 assert(ret.parentDocument is this.parentDocument); 263 } 264 body { 265 auto e = Element.make(tagName, "", info2); 266 this.appendChild(e); 267 e.innerHTML = innerHtml.source; 268 return e; 269 } 270 271 272 /// . 273 void appendChildren(Element[] children) { 274 foreach(ele; children) 275 appendChild(ele); 276 } 277 278 ///. 279 void reparent(Element newParent) 280 in { 281 assert(newParent !is null); 282 assert(parentNode !is null); 283 } 284 out { 285 assert(this.parentNode is newParent); 286 //assert(isInArray(this, newParent.children)); 287 } 288 body { 289 parentNode.removeChild(this); 290 newParent.appendChild(this); 291 } 292 293 /** 294 Strips this tag out of the document, putting its inner html 295 as children of the parent. 296 297 For example, given: <p>hello <b>there</b></p>, if you 298 call stripOut() on the b element, you'll be left with 299 <p>hello there<p>. 300 301 The idea here is to make it easy to get rid of garbage 302 markup you aren't interested in. 303 */ 304 void stripOut() 305 in { 306 assert(parentNode !is null); 307 } 308 out { 309 assert(parentNode is null); 310 assert(children.length == 0); 311 } 312 body { 313 foreach(c; children) 314 c.parentNode = null; // remove the parent 315 if(children.length) 316 parentNode.replaceChild(this, this.children); 317 else 318 parentNode.removeChild(this); 319 this.children.length = 0; // we reparented them all above 320 } 321 322 /// shorthand for this.parentNode.removeChild(this) with parentNode null check 323 /// if the element already isn't in a tree, it does nothing. 324 Element removeFromTree() 325 in { 326 327 } 328 out(var) { 329 assert(this.parentNode is null); 330 assert(var is this); 331 } 332 body { 333 if(this.parentNode is null) 334 return this; 335 336 this.parentNode.removeChild(this); 337 338 return this; 339 } 340 341 /// Wraps this element inside the given element. 342 /// It's like this.replaceWith(what); what.appendchild(this); 343 /// 344 /// Given: < b >cool</ b >, if you call b.wrapIn(new Link("site.com", "my site is ")); 345 /// you'll end up with: < a href="site.com">my site is < b >cool< /b ></ a >. 346 Element wrapIn(Element what) 347 in { 348 assert(what !is null); 349 } 350 out(ret) { 351 assert(this.parentNode is what); 352 assert(ret is what); 353 } 354 body { 355 this.replaceWith(what); 356 what.appendChild(this); 357 358 return what; 359 } 360 361 /// Replaces this element with something else in the tree. 362 Element replaceWith(Element e) 363 in { 364 assert(this.parentNode !is null); 365 } 366 body { 367 e.removeFromTree(); 368 this.parentNode.replaceChild(this, e); 369 return e; 370 } 371 372 /** 373 Splits the className into an array of each class given 374 */ 375 string[] classNames() const { 376 return className().split(" "); 377 } 378 379 /** 380 Fetches the first consecutive nodes, if text nodes, concatenated together 381 382 If the first node is not text, returns null. 383 384 See also: directText, innerText 385 */ 386 string firstInnerText() const { 387 string s; 388 foreach(child; children) { 389 if(child.nodeType != NodeType.Text) 390 break; 391 392 s ~= child.nodeValue(); 393 } 394 return s; 395 } 396 397 398 /** 399 Returns the text directly under this element, 400 not recursively like innerText. 401 402 See also: firstInnerText 403 */ 404 @property string directText() { 405 string ret; 406 foreach(e; children) { 407 if(e.nodeType == NodeType.Text) 408 ret ~= e.nodeValue(); 409 } 410 411 return ret; 412 } 413 414 /** 415 Sets the direct text, keeping the same place. 416 417 Unlike innerText, this does *not* remove existing 418 elements in the element. 419 420 It only replaces the first text node it sees. 421 422 If there are no text nodes, it calls appendText 423 424 So, given (ignore the spaces in the tags): 425 < div > < img > text here < /div > 426 427 it will keep the img, and replace the "text here". 428 */ 429 @property void directText(string text) { 430 foreach(e; children) { 431 if(e.nodeType == NodeType.Text) { 432 auto it = cast(TextNode) e; 433 it.contents = text; 434 return; 435 } 436 } 437 438 appendText(text); 439 } 440 } 441 442 /// finds comments that match the given txt. Case insensitive, strips whitespace. 443 Element[] findComments(Document document, string txt) { 444 return findComments(document.root, txt); 445 } 446 447 /// ditto 448 Element[] findComments(Element element, string txt) { 449 txt = txt.strip().toLower(); 450 Element[] ret; 451 452 foreach(comment; element.getElementsByTagName("#comment")) { 453 string t = comment.nodeValue().strip().toLower(); 454 if(t == txt) 455 ret ~= comment; 456 } 457 458 return ret; 459 } 460 461 // I'm just dicking around with this 462 struct ElementCollection { 463 this(Element e) { 464 elements = [e]; 465 } 466 467 this(Element e, string selector) { 468 elements = e.querySelectorAll(selector); 469 } 470 471 this(Element[] e) { 472 elements = e; 473 } 474 475 Element[] elements; 476 //alias elements this; // let it implicitly convert to the underlying array 477 478 ElementCollection opIndex(string selector) { 479 ElementCollection ec; 480 foreach(e; elements) 481 ec.elements ~= e.getElementsBySelector(selector); 482 return ec; 483 } 484 485 /// Forward method calls to each individual element of the collection 486 /// returns this so it can be chained. 487 ElementCollection opDispatch(string name, T...)(T t) { 488 foreach(e; elements) { 489 mixin("e." ~ name)(t); 490 } 491 return this; 492 } 493 494 ElementCollection opBinary(string op : "~")(ElementCollection rhs) { 495 return ElementCollection(this.elements ~ rhs.elements); 496 } 497 } 498 499 500 // this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions. 501 mixin template JavascriptStyleDispatch() { 502 string opDispatch(string name)(string v = null) if(name != "popFront") { // popFront will make this look like a range. Do not want. 503 if(v !is null) 504 return set(name, v); 505 return get(name); 506 } 507 508 string opIndex(string key) const { 509 return get(key); 510 } 511 512 string opIndexAssign(string value, string field) { 513 return set(field, value); 514 } 515 516 // FIXME: doesn't seem to work 517 string* opBinary(string op)(string key) if(op == "in") { 518 return key in fields; 519 } 520 } 521 522 /// A proxy object to do the Element class' dataset property. See Element.dataset for more info. 523 /// 524 /// Do not create this object directly. 525 struct DataSet { 526 this(Element e) { 527 this._element = e; 528 } 529 530 private Element _element; 531 string set(string name, string value) { 532 _element.setAttribute("data-" ~ unCamelCase(name), value); 533 return value; 534 } 535 536 string get(string name) const { 537 return _element.getAttribute("data-" ~ unCamelCase(name)); 538 } 539 540 mixin JavascriptStyleDispatch!(); 541 } 542 543 /// for style, i want to be able to set it with a string like a plain attribute, 544 /// but also be able to do properties Javascript style. 545 546 struct ElementStyle { 547 this(Element parent) { 548 _element = parent; 549 } 550 551 Element _element; 552 553 @property ref inout(string) _attribute() inout { 554 auto s = "style" in _element.attributes; 555 if(s is null) { 556 auto e = cast() _element; // const_cast 557 e.attributes["style"] = ""; // we need something to reference 558 s = cast(inout) ("style" in e.attributes); 559 } 560 561 assert(s !is null); 562 return *s; 563 } 564 565 alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work. 566 567 string set(string name, string value) { 568 if(name.length == 0) 569 return value; 570 if(name == "cssFloat") 571 name = "float"; 572 else 573 name = unCamelCase(name); 574 auto r = rules(); 575 r[name] = value; 576 577 _attribute = ""; 578 foreach(k, v; r) { 579 if(v is null || v.length == 0) /* css can't do empty rules anyway so we'll use that to remove */ 580 continue; 581 if(_attribute.length) 582 _attribute ~= " "; 583 _attribute ~= k ~ ": " ~ v ~ ";"; 584 } 585 586 _element.setAttribute("style", _attribute); // this is to trigger the observer call 587 588 return value; 589 } 590 string get(string name) const { 591 if(name == "cssFloat") 592 name = "float"; 593 else 594 name = unCamelCase(name); 595 auto r = rules(); 596 if(name in r) 597 return r[name]; 598 return null; 599 } 600 601 string[string] rules() const { 602 string[string] ret; 603 foreach(rule; _attribute.split(";")) { 604 rule = rule.strip(); 605 if(rule.length == 0) 606 continue; 607 auto idx = rule.indexOf(":"); 608 if(idx == -1) 609 ret[rule] = ""; 610 else { 611 auto name = rule[0 .. idx].strip(); 612 auto value = rule[idx + 1 .. $].strip(); 613 614 ret[name] = value; 615 } 616 } 617 618 return ret; 619 } 620 621 mixin JavascriptStyleDispatch!(); 622 } 623 624 /// Converts a camel cased propertyName to a css style dashed property-name 625 string unCamelCase(string a) { 626 string ret; 627 foreach(c; a) 628 if((c >= 'A' && c <= 'Z')) 629 ret ~= "-" ~ toLower("" ~ c)[0]; 630 else 631 ret ~= c; 632 return ret; 633 } 634 635 /// Translates a css style property-name to a camel cased propertyName 636 string camelCase(string a) { 637 string ret; 638 bool justSawDash = false; 639 foreach(c; a) 640 if(c == '-') { 641 justSawDash = true; 642 } else { 643 if(justSawDash) { 644 justSawDash = false; 645 ret ~= toUpper("" ~ c); 646 } else 647 ret ~= c; 648 } 649 return ret; 650 } 651 652 653 654 655 656 657 658 659 660 // domconvenience ends } 661 662 663 664 665 666 667 668 669 670 671 672 // @safe: 673 674 // NOTE: do *NOT* override toString on Element subclasses. It won't work. 675 // Instead, override writeToAppender(); 676 677 // FIXME: should I keep processing instructions like <?blah ?> and <!-- blah --> (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too. 678 679 // Stripping them is useful for reading php as html.... but adding them 680 // is good for building php. 681 682 // I need to maintain compatibility with the way it is now too. 683 684 import arsd.characterencodings; 685 686 import std.string; 687 import std.exception; 688 import std.uri; 689 import std.array; 690 import std.range; 691 692 //import std.stdio; 693 694 // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh 695 // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's 696 // most likely a typo so I say kill kill kill. 697 698 699 /// This might belong in another module, but it represents a file with a mime type and some data. 700 /// Document implements this interface with type = text/html (see Document.contentType for more info) 701 /// and data = document.toString, so you can return Documents anywhere web.d expects FileResources. 702 interface FileResource { 703 @property string contentType() const; /// the content-type of the file. e.g. "text/html; charset=utf-8" or "image/png" 704 immutable(ubyte)[] getData() const; /// the data 705 } 706 707 708 709 710 ///. 711 enum NodeType { Text = 3 } 712 713 714 /// You can use this to do an easy null check or a dynamic cast+null check on any element. 715 T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) if(is(T : Element)) 716 in {} 717 out(ret) { assert(ret !is null); } 718 body { 719 auto ret = cast(T) e; 720 if(ret is null) 721 throw new ElementNotFoundException(T.stringof, "passed value", file, line); 722 return ret; 723 } 724 725 /// This represents almost everything in the DOM. 726 class Element { 727 mixin DomConvenienceFunctions!(); 728 729 // this is a thing so i can remove observer support if it gets slow 730 // I have not implemented all these yet 731 private void sendObserverEvent(DomMutationOperations operation, string s1 = null, string s2 = null, Element r = null, Element r2 = null) { 732 if(parentDocument is null) return; 733 DomMutationEvent me; 734 me.operation = operation; 735 me.target = this; 736 me.relatedString = s1; 737 me.relatedString2 = s2; 738 me.related = r; 739 me.related2 = r2; 740 parentDocument.dispatchMutationEvent(me); 741 } 742 743 // putting all the members up front 744 745 // this ought to be private. don't use it directly. 746 Element[] children; 747 748 /// The name of the tag. Remember, changing this doesn't change the dynamic type of the object. 749 string tagName; 750 751 /// This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead. 752 string[string] attributes; 753 754 /// In XML, it is valid to write <tag /> for all elements with no children, but that breaks HTML, so I don't do it here. 755 /// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list. 756 private bool selfClosed; 757 758 /// Get the parent Document object that contains this element. 759 /// It may be null, so remember to check for that. 760 Document parentDocument; 761 762 ///. 763 Element parentNode; 764 765 // the next few methods are for implementing interactive kind of things 766 private CssStyle _computedStyle; 767 768 // these are here for event handlers. Don't forget that this library never fires events. 769 // (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.) 770 EventHandler[][string] bubblingEventHandlers; 771 EventHandler[][string] capturingEventHandlers; 772 EventHandler[string] defaultEventHandlers; 773 774 void addEventListener(string event, EventHandler handler, bool useCapture = false) { 775 if(event.length > 2 && event[0..2] == "on") 776 event = event[2 .. $]; 777 778 if(useCapture) 779 capturingEventHandlers[event] ~= handler; 780 else 781 bubblingEventHandlers[event] ~= handler; 782 } 783 784 785 // and now methods 786 787 /// Convenience function to try to do the right thing for HTML. This is the main 788 /// way I create elements. 789 static Element make(string tagName, string childInfo = null, string childInfo2 = null) { 790 bool selfClosed = tagName.isInArray(selfClosedElements); 791 792 Element e; 793 // want to create the right kind of object for the given tag... 794 switch(tagName) { 795 case "#text": 796 e = new TextNode(null, childInfo); 797 return e; 798 // break; 799 case "table": 800 e = new Table(null); 801 break; 802 case "a": 803 e = new Link(null); 804 break; 805 case "form": 806 e = new Form(null); 807 break; 808 case "tr": 809 e = new TableRow(null); 810 break; 811 case "td", "th": 812 e = new TableCell(null, tagName); 813 break; 814 default: 815 e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere 816 } 817 818 // make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too 819 e.tagName = tagName; 820 e.selfClosed = selfClosed; 821 822 if(childInfo !is null) 823 switch(tagName) { 824 /* html5 convenience tags */ 825 case "audio": 826 if(childInfo.length) 827 e.addChild("source", childInfo); 828 if(childInfo2 !is null) 829 e.appendText(childInfo2); 830 break; 831 case "source": 832 e.src = childInfo; 833 if(childInfo2 !is null) 834 e.type = childInfo2; 835 break; 836 /* regular html 4 stuff */ 837 case "img": 838 e.src = childInfo; 839 if(childInfo2 !is null) 840 e.alt = childInfo2; 841 break; 842 case "link": 843 e.href = childInfo; 844 if(childInfo2 !is null) 845 e.rel = childInfo2; 846 break; 847 case "option": 848 e.innerText = childInfo; 849 if(childInfo2 !is null) 850 e.value = childInfo2; 851 break; 852 case "input": 853 e.type = "hidden"; 854 e.name = childInfo; 855 if(childInfo2 !is null) 856 e.value = childInfo2; 857 break; 858 case "button": 859 e.innerText = childInfo; 860 if(childInfo2 !is null) 861 e.type = childInfo2; 862 break; 863 case "a": 864 e.innerText = childInfo; 865 if(childInfo2 !is null) 866 e.href = childInfo2; 867 break; 868 case "script": 869 case "style": 870 e.innerRawSource = childInfo; 871 break; 872 case "meta": 873 e.name = childInfo; 874 if(childInfo2 !is null) 875 e.content = childInfo2; 876 break; 877 /* generically, assume we were passed text and perhaps class */ 878 default: 879 e.innerText = childInfo; 880 if(childInfo2.length) 881 e.className = childInfo2; 882 } 883 884 return e; 885 } 886 887 static Element make(string tagName, in Html innerHtml, string childInfo2 = null) { 888 // FIXME: childInfo2 is ignored when info1 is null 889 auto m = Element.make(tagName, cast(string) null, childInfo2); 890 m.innerHTML = innerHtml.source; 891 return m; 892 } 893 894 static Element make(string tagName, Element child, string childInfo2 = null) { 895 auto m = Element.make(tagName, cast(string) null, childInfo2); 896 m.appendChild(child); 897 return m; 898 } 899 900 901 /// Generally, you don't want to call this yourself - use Element.make or document.createElement instead. 902 this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) { 903 parentDocument = _parentDocument; 904 tagName = _tagName; 905 if(_attributes !is null) 906 attributes = _attributes; 907 selfClosed = _selfClosed; 908 909 version(dom_node_indexes) 910 this.dataset.nodeIndex = to!string(&(this.attributes)); 911 912 assert(_tagName.indexOf(" ") == -1);//, "<" ~ _tagName ~ "> is invalid"); 913 } 914 915 /// Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. 916 /// Note also that without a parent document, elements are always in strict, case-sensitive mode. 917 this(string _tagName, string[string] _attributes = null) { 918 tagName = _tagName; 919 if(_attributes !is null) 920 attributes = _attributes; 921 selfClosed = tagName.isInArray(selfClosedElements); 922 923 // this is meant to reserve some memory. It makes a small, but consistent improvement. 924 //children.length = 8; 925 //children.length = 0; 926 927 version(dom_node_indexes) 928 this.dataset.nodeIndex = to!string(&(this.attributes)); 929 } 930 931 private this(Document _parentDocument) { 932 parentDocument = _parentDocument; 933 934 version(dom_node_indexes) 935 this.dataset.nodeIndex = to!string(&(this.attributes)); 936 } 937 938 939 /* ******************************* 940 Navigating the DOM 941 *********************************/ 942 943 /// Returns the first child of this element. If it has no children, returns null. 944 /// Remember, text nodes are children too. 945 @property Element firstChild() { 946 return children.length ? children[0] : null; 947 } 948 949 /// 950 @property Element lastChild() { 951 return children.length ? children[$ - 1] : null; 952 } 953 954 955 ///. 956 @property Element previousSibling(string tagName = null) { 957 if(this.parentNode is null) 958 return null; 959 Element ps = null; 960 foreach(e; this.parentNode.childNodes) { 961 if(e is this) 962 break; 963 if(tagName == "*" && e.nodeType != NodeType.Text) { 964 ps = e; 965 break; 966 } 967 if(tagName is null || e.tagName == tagName) 968 ps = e; 969 } 970 971 return ps; 972 } 973 974 ///. 975 @property Element nextSibling(string tagName = null) { 976 if(this.parentNode is null) 977 return null; 978 Element ns = null; 979 bool mightBe = false; 980 foreach(e; this.parentNode.childNodes) { 981 if(e is this) { 982 mightBe = true; 983 continue; 984 } 985 if(mightBe) { 986 if(tagName == "*" && e.nodeType != NodeType.Text) { 987 ns = e; 988 break; 989 } 990 if(tagName is null || e.tagName == tagName) { 991 ns = e; 992 break; 993 } 994 } 995 } 996 997 return ns; 998 } 999 1000 1001 /// Gets the nearest node, going up the chain, with the given tagName 1002 /// May return null or throw. 1003 T getParent(T = Element)(string tagName = null) if(is(T : Element)) { 1004 if(tagName is null) { 1005 static if(is(T == Form)) 1006 tagName = "form"; 1007 else static if(is(T == Table)) 1008 tagName = "table"; 1009 else static if(is(T == Link)) 1010 tagName == "a"; 1011 } 1012 1013 auto par = this.parentNode; 1014 while(par !is null) { 1015 if(tagName is null || par.tagName == tagName) 1016 break; 1017 par = par.parentNode; 1018 } 1019 1020 static if(!is(T == Element)) { 1021 auto t = cast(T) par; 1022 if(t is null) 1023 throw new ElementNotFoundException("", tagName ~ " parent not found"); 1024 } else 1025 auto t = par; 1026 1027 return t; 1028 } 1029 1030 ///. 1031 Element getElementById(string id) { 1032 // FIXME: I use this function a lot, and it's kinda slow 1033 // not terribly slow, but not great. 1034 foreach(e; tree) 1035 if(e.id == id) 1036 return e; 1037 return null; 1038 } 1039 1040 /// Note: you can give multiple selectors, separated by commas. 1041 /// It will return the first match it finds. 1042 Element querySelector(string selector) { 1043 // FIXME: inefficient; it gets all results just to discard most of them 1044 auto list = getElementsBySelector(selector); 1045 if(list.length == 0) 1046 return null; 1047 return list[0]; 1048 } 1049 1050 /// a more standards-compliant alias for getElementsBySelector 1051 Element[] querySelectorAll(string selector) { 1052 return getElementsBySelector(selector); 1053 } 1054 1055 /** 1056 Does a CSS selector 1057 1058 * -- all, default if nothing else is there 1059 1060 tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector 1061 1062 It is all additive 1063 1064 OP 1065 1066 space = descendant 1067 > = direct descendant 1068 + = sibling (E+F Matches any F element immediately preceded by a sibling element E) 1069 1070 [foo] Foo is present as an attribute 1071 [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". 1072 E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" 1073 E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". 1074 1075 [item$=sdas] ends with 1076 [item^-sdsad] begins with 1077 1078 Quotes are optional here. 1079 1080 Pseudos: 1081 :first-child 1082 :last-child 1083 :link (same as a[href] for our purposes here) 1084 1085 1086 There can be commas separating the selector. A comma separated list result is OR'd onto the main. 1087 1088 1089 1090 This ONLY cares about elements. text, etc, are ignored 1091 1092 1093 There should be two functions: given element, does it match the selector? and given a selector, give me all the elements 1094 */ 1095 Element[] getElementsBySelector(string selector) { 1096 // FIXME: this function could probably use some performance attention 1097 // ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app. 1098 1099 1100 bool caseSensitiveTags = true; 1101 if(parentDocument && parentDocument.loose) 1102 caseSensitiveTags = false; 1103 1104 Element[] ret; 1105 foreach(sel; parseSelectorString(selector, caseSensitiveTags)) 1106 ret ~= sel.getElements(this); 1107 return ret; 1108 } 1109 1110 /// . 1111 Element[] getElementsByClassName(string cn) { 1112 // is this correct? 1113 return getElementsBySelector("." ~ cn); 1114 } 1115 1116 ///. 1117 Element[] getElementsByTagName(string tag) { 1118 if(parentDocument && parentDocument.loose) 1119 tag = tag.toLower(); 1120 Element[] ret; 1121 foreach(e; tree) 1122 if(e.tagName == tag) 1123 ret ~= e; 1124 return ret; 1125 } 1126 1127 1128 /* ******************************* 1129 Attributes 1130 *********************************/ 1131 1132 /** 1133 Gets the given attribute value, or null if the 1134 attribute is not set. 1135 1136 Note that the returned string is decoded, so it no longer contains any xml entities. 1137 */ 1138 string getAttribute(string name) const { 1139 if(parentDocument && parentDocument.loose) 1140 name = name.toLower(); 1141 auto e = name in attributes; 1142 if(e) 1143 return *e; 1144 else 1145 return null; 1146 } 1147 1148 /** 1149 Sets an attribute. Returns this for easy chaining 1150 */ 1151 Element setAttribute(string name, string value) { 1152 if(parentDocument && parentDocument.loose) 1153 name = name.toLower(); 1154 1155 // I never use this shit legitimately and neither should you 1156 auto it = name.toLower(); 1157 if(it == "href" || it == "src") { 1158 auto v = value.strip().toLower(); 1159 if(v.startsWith("vbscript:")) 1160 value = value[9..$]; 1161 if(v.startsWith("javascript:")) 1162 value = value[11..$]; 1163 } 1164 1165 attributes[name] = value; 1166 1167 sendObserverEvent(DomMutationOperations.setAttribute, name, value); 1168 1169 return this; 1170 } 1171 1172 /** 1173 Returns if the attribute exists. 1174 */ 1175 bool hasAttribute(string name) { 1176 if(parentDocument && parentDocument.loose) 1177 name = name.toLower(); 1178 1179 if(name in attributes) 1180 return true; 1181 else 1182 return false; 1183 } 1184 1185 /** 1186 Removes the given attribute from the element. 1187 */ 1188 Element removeAttribute(string name) 1189 out(ret) { 1190 assert(ret is this); 1191 } 1192 body { 1193 if(parentDocument && parentDocument.loose) 1194 name = name.toLower(); 1195 if(name in attributes) 1196 attributes.remove(name); 1197 1198 sendObserverEvent(DomMutationOperations.removeAttribute, name); 1199 return this; 1200 } 1201 1202 /** 1203 Gets the class attribute's contents. Returns 1204 an empty string if it has no class. 1205 */ 1206 @property string className() const { 1207 auto c = getAttribute("class"); 1208 if(c is null) 1209 return ""; 1210 return c; 1211 } 1212 1213 ///. 1214 @property Element className(string c) { 1215 setAttribute("class", c); 1216 return this; 1217 } 1218 1219 /** 1220 Provides easy access to attributes, object style. 1221 1222 auto element = Element.make("a"); 1223 a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html"); 1224 string where = a.href; // same as a.getAttribute("href"); 1225 */ 1226 // name != "popFront" is so duck typing doesn't think it's a range 1227 @property string opDispatch(string name)(string v = null) if(name != "popFront") { 1228 if(v !is null) 1229 setAttribute(name, v); 1230 return getAttribute(name); 1231 } 1232 1233 /* 1234 // this would be nice for convenience, but it broke the getter above. 1235 @property void opDispatch(string name)(bool boolean) if(name != "popFront") { 1236 if(boolean) 1237 setAttribute(name, name); 1238 else 1239 removeAttribute(name); 1240 } 1241 */ 1242 1243 /** 1244 Returns the element's children. 1245 */ 1246 @property const(Element[]) childNodes() const { 1247 return children; 1248 } 1249 1250 /// Mutable version of the same 1251 @property Element[] childNodes() { // FIXME: the above should be inout 1252 return children; 1253 } 1254 1255 /// HTML5's dataset property. It is an alternate view into attributes with the data- prefix. 1256 /// 1257 /// Given: <a data-my-property="cool" /> 1258 /// 1259 /// We get: assert(a.dataset.myProperty == "cool"); 1260 DataSet dataset() { 1261 return DataSet(this); 1262 } 1263 1264 /// Provides both string and object style (like in Javascript) access to the style attribute. 1265 @property ElementStyle style() { 1266 return ElementStyle(this); 1267 } 1268 1269 /// This sets the style attribute with a string. 1270 @property ElementStyle style(string s) { 1271 this.setAttribute("style", s); 1272 return this.style(); 1273 } 1274 1275 private void parseAttributes(string[] whichOnes = null) { 1276 /+ 1277 if(whichOnes is null) 1278 whichOnes = attributes.keys; 1279 foreach(attr; whichOnes) { 1280 switch(attr) { 1281 case "id": 1282 1283 break; 1284 case "class": 1285 1286 break; 1287 case "style": 1288 1289 break; 1290 default: 1291 // we don't care about it 1292 } 1293 } 1294 +/ 1295 } 1296 1297 1298 // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. 1299 ///. 1300 @property CssStyle computedStyle() { 1301 if(_computedStyle is null) { 1302 auto style = this.getAttribute("style"); 1303 /* we'll treat shitty old html attributes as css here */ 1304 if(this.hasAttribute("width")) 1305 style ~= "; width: " ~ this.width; 1306 if(this.hasAttribute("height")) 1307 style ~= "; height: " ~ this.height; 1308 if(this.hasAttribute("bgcolor")) 1309 style ~= "; background-color: " ~ this.bgcolor; 1310 if(this.tagName == "body" && this.hasAttribute("text")) 1311 style ~= "; color: " ~ this.text; 1312 if(this.hasAttribute("color")) 1313 style ~= "; color: " ~ this.color; 1314 /* done */ 1315 1316 1317 _computedStyle = new CssStyle(null, style); // gives at least something to work with 1318 } 1319 return _computedStyle; 1320 } 1321 1322 /// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good 1323 version(browser) { 1324 void* expansionHook; ///ditto 1325 int offsetWidth; ///ditto 1326 int offsetHeight; ///ditto 1327 int offsetLeft; ///ditto 1328 int offsetTop; ///ditto 1329 Element offsetParent; ///ditto 1330 bool hasLayout; ///ditto 1331 int zIndex; ///ditto 1332 1333 ///ditto 1334 int absoluteLeft() { 1335 int a = offsetLeft; 1336 auto p = offsetParent; 1337 while(p) { 1338 a += p.offsetLeft; 1339 p = p.offsetParent; 1340 } 1341 1342 return a; 1343 } 1344 1345 ///ditto 1346 int absoluteTop() { 1347 int a = offsetTop; 1348 auto p = offsetParent; 1349 while(p) { 1350 a += p.offsetTop; 1351 p = p.offsetParent; 1352 } 1353 1354 return a; 1355 } 1356 } 1357 1358 // Back to the regular dom functions 1359 1360 public: 1361 1362 1363 /* ******************************* 1364 DOM Mutation 1365 *********************************/ 1366 1367 /// Removes all inner content from the tag; all child text and elements are gone. 1368 void removeAllChildren() 1369 out { 1370 assert(this.children.length == 0); 1371 } 1372 body { 1373 children = null; 1374 } 1375 1376 1377 /// Appends the given element to this one. The given element must not have a parent already. 1378 Element appendChild(Element e) 1379 in { 1380 assert(e !is null); 1381 assert(e.parentNode is null); 1382 } 1383 out (ret) { 1384 assert(e.parentNode is this); 1385 assert(e.parentDocument is this.parentDocument); 1386 assert(e is ret); 1387 } 1388 body { 1389 selfClosed = false; 1390 e.parentNode = this; 1391 e.parentDocument = this.parentDocument; 1392 children ~= e; 1393 1394 sendObserverEvent(DomMutationOperations.appendChild, null, null, e); 1395 1396 return e; 1397 } 1398 1399 /// Inserts the second element to this node, right before the first param 1400 Element insertBefore(in Element where, Element what) 1401 in { 1402 assert(where !is null); 1403 assert(where.parentNode is this); 1404 assert(what !is null); 1405 assert(what.parentNode is null); 1406 } 1407 out (ret) { 1408 assert(where.parentNode is this); 1409 assert(what.parentNode is this); 1410 1411 assert(what.parentDocument is this.parentDocument); 1412 assert(ret is what); 1413 } 1414 body { 1415 foreach(i, e; children) { 1416 if(e is where) { 1417 children = children[0..i] ~ what ~ children[i..$]; 1418 what.parentDocument = this.parentDocument; 1419 what.parentNode = this; 1420 return what; 1421 } 1422 } 1423 1424 return what; 1425 1426 assert(0); 1427 } 1428 1429 ///. 1430 Element insertAfter(in Element where, Element what) 1431 in { 1432 assert(where !is null); 1433 assert(where.parentNode is this); 1434 assert(what !is null); 1435 assert(what.parentNode is null); 1436 } 1437 out (ret) { 1438 assert(where.parentNode is this); 1439 assert(what.parentNode is this); 1440 assert(what.parentDocument is this.parentDocument); 1441 assert(ret is what); 1442 } 1443 body { 1444 foreach(i, e; children) { 1445 if(e is where) { 1446 children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; 1447 what.parentNode = this; 1448 what.parentDocument = this.parentDocument; 1449 return what; 1450 } 1451 } 1452 1453 return what; 1454 1455 assert(0); 1456 } 1457 1458 /// swaps one child for a new thing. Returns the old child which is now parentless. 1459 Element swapNode(Element child, Element replacement) 1460 in { 1461 assert(child !is null); 1462 assert(replacement !is null); 1463 assert(child.parentNode is this); 1464 } 1465 out(ret) { 1466 assert(ret is child); 1467 assert(ret.parentNode is null); 1468 assert(replacement.parentNode is this); 1469 assert(replacement.parentDocument is this.parentDocument); 1470 } 1471 body { 1472 foreach(ref c; this.children) 1473 if(c is child) { 1474 c.parentNode = null; 1475 c = replacement; 1476 c.parentNode = this; 1477 c.parentDocument = this.parentDocument; 1478 return child; 1479 } 1480 assert(0); 1481 } 1482 1483 1484 ///. 1485 Element appendText(string text) { 1486 Element e = new TextNode(parentDocument, text); 1487 appendChild(e); 1488 return this; 1489 } 1490 1491 ///. 1492 @property Element[] childElements() { 1493 Element[] ret; 1494 foreach(c; children) 1495 if(c.nodeType == 1) 1496 ret ~= c; 1497 return ret; 1498 } 1499 1500 /// Appends the given html to the element, returning the elements appended 1501 Element[] appendHtml(string html) { 1502 Document d = new Document("<root>" ~ html ~ "</root>"); 1503 return stealChildren(d.root); 1504 } 1505 1506 1507 ///. 1508 void insertChildAfter(Element child, Element where) 1509 in { 1510 assert(child !is null); 1511 assert(where !is null); 1512 assert(where.parentNode is this); 1513 assert(!selfClosed); 1514 //assert(isInArray(where, children)); 1515 } 1516 out { 1517 assert(child.parentNode is this); 1518 assert(where.parentNode is this); 1519 //assert(isInArray(where, children)); 1520 //assert(isInArray(child, children)); 1521 } 1522 body { 1523 foreach(ref i, c; children) { 1524 if(c is where) { 1525 i++; 1526 children = children[0..i] ~ child ~ children[i..$]; 1527 child.parentNode = this; 1528 child.parentDocument = this.parentDocument; 1529 break; 1530 } 1531 } 1532 } 1533 1534 ///. 1535 Element[] stealChildren(Element e, Element position = null) 1536 in { 1537 assert(!selfClosed); 1538 assert(e !is null); 1539 //if(position !is null) 1540 //assert(isInArray(position, children)); 1541 } 1542 out (ret) { 1543 assert(e.children.length == 0); 1544 debug foreach(child; ret) { 1545 assert(child.parentNode is this); 1546 assert(child.parentDocument is this.parentDocument); 1547 } 1548 } 1549 body { 1550 foreach(c; e.children) { 1551 c.parentNode = this; 1552 c.parentDocument = this.parentDocument; 1553 } 1554 if(position is null) 1555 children ~= e.children; 1556 else { 1557 foreach(i, child; children) { 1558 if(child is position) { 1559 children = children[0..i] ~ 1560 e.children ~ 1561 children[i..$]; 1562 break; 1563 } 1564 } 1565 } 1566 1567 auto ret = std.container.dup(e.children); 1568 e.children.length = 0; 1569 1570 return ret; 1571 } 1572 1573 /// Puts the current element first in our children list. The given element must not have a parent already. 1574 Element prependChild(Element e) 1575 in { 1576 assert(e.parentNode is null); 1577 assert(!selfClosed); 1578 } 1579 out { 1580 assert(e.parentNode is this); 1581 assert(e.parentDocument is this.parentDocument); 1582 assert(children[0] is e); 1583 } 1584 body { 1585 e.parentNode = this; 1586 e.parentDocument = this.parentDocument; 1587 children = e ~ children; 1588 return e; 1589 } 1590 1591 1592 /** 1593 Returns a string containing all child elements, formatted such that it could be pasted into 1594 an XML file. 1595 */ 1596 @property string innerHTML(Appender!string where = appender!string()) const { 1597 if(children is null) 1598 return ""; 1599 1600 auto start = where.data.length; 1601 1602 foreach(child; children) { 1603 assert(child !is null); 1604 1605 child.writeToAppender(where); 1606 } 1607 1608 return where.data[start .. $]; 1609 } 1610 1611 /** 1612 Takes some html and replaces the element's children with the tree made from the string. 1613 */ 1614 @property Element innerHTML(string html, bool strict = false) { 1615 if(html.length) 1616 selfClosed = false; 1617 1618 if(html.length == 0) { 1619 // I often say innerHTML = ""; as a shortcut to clear it out, 1620 // so let's optimize that slightly. 1621 removeAllChildren(); 1622 return this; 1623 } 1624 1625 auto doc = new Document(); 1626 doc.parse("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document 1627 1628 children = doc.root.children; 1629 foreach(c; children) { 1630 c.parentNode = this; 1631 c.parentDocument = this.parentDocument; 1632 } 1633 1634 reparentTreeDocuments(); 1635 1636 doc.root.children = null; 1637 1638 return this; 1639 } 1640 1641 /// ditto 1642 @property Element innerHTML(Html html) { 1643 return this.innerHTML(html.source); 1644 } 1645 1646 private void reparentTreeDocuments() { 1647 foreach(c; this.tree) 1648 c.parentDocument = this.parentDocument; 1649 } 1650 1651 /** 1652 Replaces this node with the given html string, which is parsed 1653 1654 Note: this invalidates the this reference, since it is removed 1655 from the tree. 1656 1657 Returns the new children that replace this. 1658 */ 1659 @property Element[] outerHTML(string html) { 1660 auto doc = new Document(); 1661 doc.parse("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness 1662 1663 children = doc.root.children; 1664 foreach(c; children) { 1665 c.parentNode = this; 1666 c.parentDocument = this.parentDocument; 1667 } 1668 1669 1670 reparentTreeDocuments(); 1671 1672 1673 stripOut(); 1674 1675 return doc.root.children; 1676 } 1677 1678 /// Returns all the html for this element, including the tag itself. 1679 /// This is equivalent to calling toString(). 1680 @property string outerHTML() { 1681 return this.toString(); 1682 } 1683 1684 /// This sets the inner content of the element *without* trying to parse it. 1685 /// You can inject any code in there; this serves as an escape hatch from the dom. 1686 /// 1687 /// The only times you might actually need it are for < style > and < script > tags in html. 1688 /// Other than that, innerHTML and/or innerText should do the job. 1689 @property void innerRawSource(string rawSource) { 1690 children.length = 0; 1691 auto rs = new RawSource(parentDocument, rawSource); 1692 rs.parentNode = this; 1693 1694 children ~= rs; 1695 } 1696 1697 ///. 1698 Element replaceChild(Element find, Element replace) 1699 in { 1700 assert(find !is null); 1701 assert(replace !is null); 1702 assert(replace.parentNode is null); 1703 } 1704 out(ret) { 1705 assert(ret is replace); 1706 assert(replace.parentNode is this); 1707 assert(replace.parentDocument is this.parentDocument); 1708 assert(find.parentNode is null); 1709 } 1710 body { 1711 for(int i = 0; i < children.length; i++) { 1712 if(children[i] is find) { 1713 replace.parentNode = this; 1714 children[i].parentNode = null; 1715 children[i] = replace; 1716 replace.parentDocument = this.parentDocument; 1717 return replace; 1718 } 1719 } 1720 1721 throw new Exception("no such child"); 1722 } 1723 1724 /** 1725 Replaces the given element with a whole group. 1726 */ 1727 void replaceChild(Element find, Element[] replace) 1728 in { 1729 assert(find !is null); 1730 assert(replace !is null); 1731 assert(find.parentNode is this); 1732 debug foreach(r; replace) 1733 assert(r.parentNode is null); 1734 } 1735 out { 1736 assert(find.parentNode is null); 1737 assert(children.length >= replace.length); 1738 debug foreach(child; children) 1739 assert(child !is find); 1740 debug foreach(r; replace) 1741 assert(r.parentNode is this); 1742 } 1743 body { 1744 if(replace.length == 0) { 1745 removeChild(find); 1746 return; 1747 } 1748 assert(replace.length); 1749 for(int i = 0; i < children.length; i++) { 1750 if(children[i] is find) { 1751 children[i].parentNode = null; // this element should now be dead 1752 children[i] = replace[0]; 1753 foreach(e; replace) { 1754 e.parentNode = this; 1755 e.parentDocument = this.parentDocument; 1756 } 1757 1758 children = .insertAfter(children, i, replace[1..$]); 1759 1760 return; 1761 } 1762 } 1763 1764 throw new Exception("no such child"); 1765 } 1766 1767 1768 /** 1769 Removes the given child from this list. 1770 1771 Returns the removed element. 1772 */ 1773 Element removeChild(Element c) 1774 in { 1775 assert(c !is null); 1776 assert(c.parentNode is this); 1777 } 1778 out { 1779 debug foreach(child; children) 1780 assert(child !is c); 1781 assert(c.parentNode is null); 1782 } 1783 body { 1784 foreach(i, e; children) { 1785 if(e is c) { 1786 children = children[0..i] ~ children [i+1..$]; 1787 c.parentNode = null; 1788 return c; 1789 } 1790 } 1791 1792 throw new Exception("no such child"); 1793 } 1794 1795 /// This removes all the children from this element, returning the old list. 1796 Element[] removeChildren() 1797 out (ret) { 1798 assert(children.length == 0); 1799 debug foreach(r; ret) 1800 assert(r.parentNode is null); 1801 } 1802 body { 1803 Element[] oldChildren = std.container.dup(children); 1804 foreach(c; oldChildren) 1805 c.parentNode = null; 1806 1807 children.length = 0; 1808 1809 return oldChildren; 1810 } 1811 1812 /** 1813 Fetch the inside text, with all tags stripped out. 1814 1815 <p>cool <b>api</b> & code dude<p> 1816 innerText of that is "cool api & code dude". 1817 */ 1818 @property string innerText() const { 1819 string s; 1820 foreach(child; children) { 1821 if(child.nodeType != NodeType.Text) 1822 s ~= child.innerText; 1823 else 1824 s ~= child.nodeValue(); 1825 } 1826 return s; 1827 } 1828 1829 /** 1830 Sets the inside text, replacing all children. You don't 1831 have to worry about entity encoding. 1832 */ 1833 @property void innerText(string text) { 1834 selfClosed = false; 1835 Element e = new TextNode(parentDocument, text); 1836 e.parentNode = this; 1837 children = [e]; 1838 } 1839 1840 /** 1841 Strips this node out of the document, replacing it with the given text 1842 */ 1843 @property void outerText(string text) { 1844 parentNode.replaceChild(this, new TextNode(parentDocument, text)); 1845 } 1846 1847 /** 1848 Same result as innerText; the tag with all inner tags stripped out 1849 */ 1850 string outerText() const { 1851 return innerText(); 1852 } 1853 1854 1855 /* ******************************* 1856 Miscellaneous 1857 *********************************/ 1858 1859 /// This is a full clone of the element 1860 @property Element cloned() 1861 /+ 1862 out(ret) { 1863 // FIXME: not sure why these fail... 1864 assert(ret.children.length == this.children.length, format("%d %d", ret.children.length, this.children.length)); 1865 assert(ret.tagName == this.tagName); 1866 } 1867 body { 1868 +/ 1869 { 1870 auto e = new Element(parentDocument, tagName, attributes.dup, selfClosed); 1871 foreach(child; children) { 1872 e.appendChild(child.cloned); 1873 } 1874 1875 return e; 1876 } 1877 1878 /// Clones the node. If deepClone is true, clone all inner tags too. If false, only do this tag (and its attributes), but it will have no contents. 1879 Element cloneNode(bool deepClone) { 1880 if(deepClone) 1881 return this.cloned; 1882 1883 // shallow clone 1884 auto e = new Element(parentDocument, tagName, attributes.dup, selfClosed); 1885 return e; 1886 } 1887 1888 ///. 1889 string nodeValue() const { 1890 return ""; 1891 } 1892 1893 // should return int 1894 ///. 1895 @property int nodeType() const { 1896 return 1; 1897 } 1898 1899 1900 invariant () { 1901 assert(tagName.indexOf(" ") == -1); 1902 1903 if(children !is null) 1904 debug foreach(child; children) { 1905 // assert(parentNode !is null); 1906 assert(child !is null); 1907 assert(child.parentNode is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parentNode is null ? "null" : child.parentNode.tagName)); 1908 assert(child !is this); 1909 assert(child !is parentNode); 1910 } 1911 1912 /+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out 1913 if(parentNode !is null) { 1914 // if you have a parent, you should share the same parentDocument; this is appendChild()'s job 1915 auto lol = cast(TextNode) this; 1916 assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents); 1917 } 1918 +/ 1919 //assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required 1920 // reason is so you can create these without needing a reference to the document 1921 } 1922 1923 /** 1924 Turns the whole element, including tag, attributes, and children, into a string which could be pasted into 1925 an XML file. 1926 */ 1927 override string toString() const { 1928 return writeToAppender(); 1929 } 1930 1931 /// This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time. 1932 /// Returns the string it creates. 1933 string writeToAppender(Appender!string where = appender!string()) const { 1934 assert(tagName !is null); 1935 1936 where.reserve((this.children.length + 1) * 512); 1937 1938 auto start = where.data.length; 1939 1940 where.put("<"); 1941 where.put(tagName); 1942 1943 foreach(n, v ; attributes) { 1944 assert(n !is null); 1945 //assert(v !is null); 1946 where.put(" "); 1947 where.put(n); 1948 where.put("=\""); 1949 htmlEntitiesEncode(v, where); 1950 where.put("\""); 1951 } 1952 1953 if(selfClosed){ 1954 where.put(" />"); 1955 return where.data[start .. $]; 1956 } 1957 1958 where.put('>'); 1959 1960 innerHTML(where); 1961 1962 where.put("</"); 1963 where.put(tagName); 1964 where.put('>'); 1965 1966 return where.data[start .. $]; 1967 } 1968 1969 /** 1970 Returns a lazy range of all its children, recursively. 1971 */ 1972 @property ElementStream tree() { 1973 return new ElementStream(this); 1974 } 1975 1976 // I moved these from Form because they are generally useful. 1977 // Ideally, I'd put them in arsd.html and use UFCS, but that doesn't work with the opDispatch here. 1978 /// Tags: HTML, HTML5 1979 // FIXME: add overloads for other label types... 1980 Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 1981 auto fs = this; 1982 auto i = fs.addChild("label"); 1983 i.addChild("span", label); 1984 Element input; 1985 if(type == "textarea") 1986 input = i.addChild("textarea"). 1987 setAttribute("name", name). 1988 setAttribute("rows", "6"); 1989 else 1990 input = i.addChild("input"). 1991 setAttribute("name", name). 1992 setAttribute("type", type); 1993 1994 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 1995 fieldOptions.applyToElement(input); 1996 return i; 1997 } 1998 1999 Element addField(Element label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 2000 auto fs = this; 2001 auto i = fs.addChild("label"); 2002 i.addChild(label); 2003 Element input; 2004 if(type == "textarea") 2005 input = i.addChild("textarea"). 2006 setAttribute("name", name). 2007 setAttribute("rows", "6"); 2008 else 2009 input = i.addChild("input"). 2010 setAttribute("name", name). 2011 setAttribute("type", type); 2012 2013 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 2014 fieldOptions.applyToElement(input); 2015 return i; 2016 } 2017 2018 Element addField(string label, string name, FormFieldOptions fieldOptions) { 2019 return addField(label, name, "text", fieldOptions); 2020 } 2021 2022 Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 2023 auto fs = this; 2024 auto i = fs.addChild("label"); 2025 i.addChild("span", label); 2026 auto sel = i.addChild("select").setAttribute("name", name); 2027 2028 foreach(k, opt; options) 2029 sel.addChild("option", opt, k); 2030 2031 // FIXME: implement requirements somehow 2032 2033 return i; 2034 } 2035 2036 Element addSubmitButton(string label = null) { 2037 auto t = this; 2038 auto holder = t.addChild("div"); 2039 holder.addClass("submit-holder"); 2040 auto i = holder.addChild("input"); 2041 i.type = "submit"; 2042 if(label.length) 2043 i.value = label; 2044 return holder; 2045 } 2046 2047 } 2048 2049 ///. 2050 class DocumentFragment : Element { 2051 ///. 2052 this(Document _parentDocument) { 2053 tagName = "#fragment"; 2054 super(_parentDocument); 2055 } 2056 2057 ///. 2058 override string writeToAppender(Appender!string where = appender!string()) const { 2059 return this.innerHTML(where); 2060 } 2061 } 2062 2063 /// Given text, encode all html entities on it - &, <, >, and ". This function also 2064 /// encodes all 8 bit characters as entities, thus ensuring the resultant text will work 2065 /// even if your charset isn't set right. 2066 /// 2067 /// The output parameter can be given to append to an existing buffer. You don't have to 2068 /// pass one; regardless, the return value will be usable for you, with just the data encoded. 2069 string htmlEntitiesEncode(string data, Appender!string output = appender!string()) { 2070 // if there's no entities, we can save a lot of time by not bothering with the 2071 // decoding loop. This check cuts the net toString time by better than half in my test. 2072 // let me know if it made your tests worse though, since if you use an entity in just about 2073 // every location, the check will add time... but I suspect the average experience is like mine 2074 // since the check gives up as soon as it can anyway. 2075 2076 bool shortcut = true; 2077 foreach(char c; data) { 2078 // non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it. 2079 if(c == '<' || c == '>' || c == '"' || c == '&' || cast(uint) c > 127) { 2080 shortcut = false; // there's actual work to be done 2081 break; 2082 } 2083 } 2084 2085 if(shortcut) { 2086 output.put(data); 2087 return data; 2088 } 2089 2090 auto start = output.data.length; 2091 2092 output.reserve(data.length + 64); // grab some extra space for the encoded entities 2093 2094 foreach(dchar d; data) { 2095 if(d == '&') 2096 output.put("&"); 2097 else if (d == '<') 2098 output.put("<"); 2099 else if (d == '>') 2100 output.put(">"); 2101 else if (d == '\"') 2102 output.put("""); 2103 // else if (d == '\'') 2104 // output.put("'"); // if you are in an attribute, it might be important to encode for the same reason as double quotes 2105 // FIXME: should I encode apostrophes too? as '... I could also do space but if your html is so bad that it doesn't 2106 // quote attributes at all, maybe you deserve the xss. Encoding spaces will make everything really ugly so meh 2107 // idk about apostrophes though. Might be worth it, might not. 2108 else if (d < 128 && d > 0) 2109 output.put(d); 2110 else 2111 output.put("&#" ~ std.conv.to!string(cast(int) d) ~ ";"); 2112 } 2113 2114 //assert(output !is null); // this fails on empty attributes..... 2115 return output.data[start .. $]; 2116 2117 // data = data.replace("\u00a0", " "); 2118 } 2119 2120 /// An alias for htmlEntitiesEncode; it works for xml too 2121 string xmlEntitiesEncode(string data) { 2122 return htmlEntitiesEncode(data); 2123 } 2124 2125 /// This helper function is used for decoding html entities. It has a hard-coded list of entities and characters. 2126 dchar parseEntity(in dchar[] entity) { 2127 switch(entity[1..$-1]) { 2128 case "quot": 2129 return '"'; 2130 case "apos": 2131 return '\''; 2132 case "lt": 2133 return '<'; 2134 case "gt": 2135 return '>'; 2136 case "amp": 2137 return '&'; 2138 // the next are html rather than xml 2139 2140 case "Agrave": return '\u00C0'; 2141 case "Aacute": return '\u00C1'; 2142 case "Acirc": return '\u00C2'; 2143 case "Atilde": return '\u00C3'; 2144 case "Auml": return '\u00C4'; 2145 case "Aring": return '\u00C5'; 2146 case "AElig": return '\u00C6'; 2147 case "Ccedil": return '\u00C7'; 2148 case "Egrave": return '\u00C8'; 2149 case "Eacute": return '\u00C9'; 2150 case "Ecirc": return '\u00CA'; 2151 case "Euml": return '\u00CB'; 2152 case "Igrave": return '\u00CC'; 2153 case "Iacute": return '\u00CD'; 2154 case "Icirc": return '\u00CE'; 2155 case "Iuml": return '\u00CF'; 2156 case "ETH": return '\u00D0'; 2157 case "Ntilde": return '\u00D1'; 2158 case "Ograve": return '\u00D2'; 2159 case "Oacute": return '\u00D3'; 2160 case "Ocirc": return '\u00D4'; 2161 case "Otilde": return '\u00D5'; 2162 case "Ouml": return '\u00D6'; 2163 case "Oslash": return '\u00D8'; 2164 case "Ugrave": return '\u00D9'; 2165 case "Uacute": return '\u00DA'; 2166 case "Ucirc": return '\u00DB'; 2167 case "Uuml": return '\u00DC'; 2168 case "Yacute": return '\u00DD'; 2169 case "THORN": return '\u00DE'; 2170 case "szlig": return '\u00DF'; 2171 case "agrave": return '\u00E0'; 2172 case "aacute": return '\u00E1'; 2173 case "acirc": return '\u00E2'; 2174 case "atilde": return '\u00E3'; 2175 case "auml": return '\u00E4'; 2176 case "aring": return '\u00E5'; 2177 case "aelig": return '\u00E6'; 2178 case "ccedil": return '\u00E7'; 2179 case "egrave": return '\u00E8'; 2180 case "eacute": return '\u00E9'; 2181 case "ecirc": return '\u00EA'; 2182 case "euml": return '\u00EB'; 2183 case "igrave": return '\u00EC'; 2184 case "iacute": return '\u00ED'; 2185 case "icirc": return '\u00EE'; 2186 case "iuml": return '\u00EF'; 2187 case "eth": return '\u00F0'; 2188 case "ntilde": return '\u00F1'; 2189 case "ograve": return '\u00F2'; 2190 case "oacute": return '\u00F3'; 2191 case "ocirc": return '\u00F4'; 2192 case "otilde": return '\u00F5'; 2193 case "ouml": return '\u00F6'; 2194 case "oslash": return '\u00F8'; 2195 case "ugrave": return '\u00F9'; 2196 case "uacute": return '\u00FA'; 2197 case "ucirc": return '\u00FB'; 2198 case "uuml": return '\u00FC'; 2199 case "yacute": return '\u00FD'; 2200 case "thorn": return '\u00FE'; 2201 case "yuml": return '\u00FF'; 2202 case "nbsp": return '\u00A0'; 2203 case "iexcl": return '\u00A1'; 2204 case "cent": return '\u00A2'; 2205 case "pound": return '\u00A3'; 2206 case "curren": return '\u00A4'; 2207 case "yen": return '\u00A5'; 2208 case "brvbar": return '\u00A6'; 2209 case "sect": return '\u00A7'; 2210 case "uml": return '\u00A8'; 2211 case "copy": return '\u00A9'; 2212 case "ordf": return '\u00AA'; 2213 case "laquo": return '\u00AB'; 2214 case "not": return '\u00AC'; 2215 case "shy": return '\u00AD'; 2216 case "reg": return '\u00AE'; 2217 case "ldquo": return '\u201c'; 2218 case "rdquo": return '\u201d'; 2219 case "macr": return '\u00AF'; 2220 case "deg": return '\u00B0'; 2221 case "plusmn": return '\u00B1'; 2222 case "sup2": return '\u00B2'; 2223 case "sup3": return '\u00B3'; 2224 case "acute": return '\u00B4'; 2225 case "micro": return '\u00B5'; 2226 case "para": return '\u00B6'; 2227 case "middot": return '\u00B7'; 2228 case "cedil": return '\u00B8'; 2229 case "sup1": return '\u00B9'; 2230 case "ordm": return '\u00BA'; 2231 case "raquo": return '\u00BB'; 2232 case "frac14": return '\u00BC'; 2233 case "frac12": return '\u00BD'; 2234 case "frac34": return '\u00BE'; 2235 case "iquest": return '\u00BF'; 2236 case "times": return '\u00D7'; 2237 case "divide": return '\u00F7'; 2238 case "OElig": return '\u0152'; 2239 case "oelig": return '\u0153'; 2240 case "Scaron": return '\u0160'; 2241 case "scaron": return '\u0161'; 2242 case "Yuml": return '\u0178'; 2243 case "fnof": return '\u0192'; 2244 case "circ": return '\u02C6'; 2245 case "tilde": return '\u02DC'; 2246 case "trade": return '\u2122'; 2247 2248 2249 /* 2250 case "cent": 2251 case "pound": 2252 case "sect": 2253 case "deg": 2254 case "micro" 2255 */ 2256 /* 2257 case "egrave": 2258 return '\u0038'; 2259 case "Egrave": 2260 return '\u00c8'; 2261 case "times": 2262 return '\u00d7'; 2263 case "hellip": 2264 return '\u2026'; 2265 case "laquo": 2266 return '\u00ab'; 2267 case "raquo": 2268 return '\u00bb'; 2269 case "lsquo": 2270 return '\u2018'; 2271 case "rsquo": 2272 return '\u2019'; 2273 case "ldquo": 2274 return '\u201c'; 2275 case "rdquo": 2276 return '\u201d'; 2277 case "reg": 2278 return '\u00ae'; 2279 case "trade": 2280 return '\u2122'; 2281 case "nbsp": 2282 return '\u00a0'; 2283 case "copy": 2284 return '\u00a9'; 2285 case "eacute": 2286 return '\u00e9'; 2287 case "mdash": 2288 return '\u2014'; 2289 case "ndash": 2290 return '\u2013'; 2291 case "Omicron": 2292 return '\u039f'; 2293 case "omicron": 2294 return '\u03bf'; 2295 case "middot": 2296 return '\u00b7'; 2297 */ 2298 // and handling numeric entities 2299 default: 2300 if(entity[1] == '#') { 2301 if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) { 2302 auto hex = entity[3..$-1]; 2303 2304 auto p = intFromHex(to!string(hex).toLower()); 2305 return cast(dchar) p; 2306 } else { 2307 auto decimal = entity[2..$-1]; 2308 2309 // dealing with broken html entities 2310 while(decimal.length && (decimal[0] < '0' || decimal[0] > '9')) 2311 decimal = decimal[1 .. $]; 2312 2313 if(decimal.length == 0) 2314 return ' '; // this is really broken html 2315 // done with dealing with broken stuff 2316 2317 auto p = std.conv.to!int(decimal); 2318 return cast(dchar) p; 2319 } 2320 } else 2321 return '?'; 2322 } 2323 2324 assert(0); 2325 } 2326 2327 import std.utf; 2328 import std.stdio; 2329 2330 /// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string. 2331 /// By default, it uses loose mode - it will try to return a useful string from garbage input too. 2332 /// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input. 2333 string htmlEntitiesDecode(string data, bool strict = false) { 2334 // this check makes a *big* difference; about a 50% improvement of parse speed on my test. 2335 if(data.indexOf("&") == -1) // all html entities begin with & 2336 return data; // if there are no entities in here, we can return the original slice and save some time 2337 2338 char[] a; // this seems to do a *better* job than appender! 2339 2340 char[4] buffer; 2341 2342 bool tryingEntity = false; 2343 dchar[] entityBeingTried; 2344 int entityAttemptIndex = 0; 2345 2346 foreach(dchar ch; data) { 2347 if(tryingEntity) { 2348 entityAttemptIndex++; 2349 entityBeingTried ~= ch; 2350 2351 // I saw some crappy html in the wild that looked like &0ї this tries to handle that. 2352 if(ch == '&') { 2353 if(strict) 2354 throw new Exception("unterminated entity; & inside another at " ~ to!string(entityBeingTried)); 2355 2356 // if not strict, let's try to parse both. 2357 2358 if(entityBeingTried == "&&") 2359 a ~= "&"; // double amp means keep the first one, still try to parse the next one 2360 else 2361 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried))]; 2362 2363 // tryingEntity is still true 2364 entityBeingTried = entityBeingTried[0 .. 1]; // keep the & 2365 entityAttemptIndex = 0; // restarting o this 2366 } else 2367 if(ch == ';') { 2368 tryingEntity = false; 2369 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried))]; 2370 } else if(ch == ' ') { 2371 // e.g. you & i 2372 if(strict) 2373 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried)); 2374 else { 2375 tryingEntity = false; 2376 a ~= to!(char[])(entityBeingTried); 2377 } 2378 } else { 2379 if(entityAttemptIndex >= 9) { 2380 if(strict) 2381 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried)); 2382 else { 2383 tryingEntity = false; 2384 a ~= to!(char[])(entityBeingTried); 2385 } 2386 } 2387 } 2388 } else { 2389 if(ch == '&') { 2390 tryingEntity = true; 2391 entityBeingTried = null; 2392 entityBeingTried ~= ch; 2393 entityAttemptIndex = 0; 2394 } else { 2395 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 2396 } 2397 } 2398 } 2399 2400 if(tryingEntity) { 2401 if(strict) 2402 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried)); 2403 2404 // otherwise, let's try to recover, at least so we don't drop any data 2405 a ~= to!string(entityBeingTried); 2406 // FIXME: what if we have "cool &"? should we try to parse it? 2407 } 2408 2409 return cast(string) a; // assumeUnique is actually kinda slow, lol 2410 } 2411 2412 abstract class SpecialElement : Element { 2413 this(Document _parentDocument) { 2414 super(_parentDocument); 2415 } 2416 2417 ///. 2418 override Element appendChild(Element e) { 2419 assert(0, "Cannot append to a special node"); 2420 } 2421 2422 ///. 2423 @property override int nodeType() const { 2424 return 100; 2425 } 2426 } 2427 2428 ///. 2429 class RawSource : SpecialElement { 2430 ///. 2431 this(Document _parentDocument, string s) { 2432 super(_parentDocument); 2433 source = s; 2434 tagName = "#raw"; 2435 } 2436 2437 ///. 2438 override string nodeValue() const { 2439 return this.toString(); 2440 } 2441 2442 ///. 2443 override string writeToAppender(Appender!string where = appender!string()) const { 2444 where.put(source); 2445 return source; 2446 } 2447 2448 ///. 2449 string source; 2450 } 2451 2452 abstract class ServerSideCode : SpecialElement { 2453 this(Document _parentDocument, string type) { 2454 super(_parentDocument); 2455 tagName = "#" ~ type; 2456 } 2457 2458 ///. 2459 override string nodeValue() const { 2460 return this.source; 2461 } 2462 2463 ///. 2464 override string writeToAppender(Appender!string where = appender!string()) const { 2465 auto start = where.data.length; 2466 where.put("<"); 2467 where.put(source); 2468 where.put(">"); 2469 return where.data[start .. $]; 2470 } 2471 2472 ///. 2473 string source; 2474 } 2475 2476 ///. 2477 class PhpCode : ServerSideCode { 2478 ///. 2479 this(Document _parentDocument, string s) { 2480 super(_parentDocument, "php"); 2481 source = s; 2482 } 2483 } 2484 2485 ///. 2486 class AspCode : ServerSideCode { 2487 ///. 2488 this(Document _parentDocument, string s) { 2489 super(_parentDocument, "asp"); 2490 source = s; 2491 } 2492 } 2493 2494 ///. 2495 class BangInstruction : SpecialElement { 2496 ///. 2497 this(Document _parentDocument, string s) { 2498 super(_parentDocument); 2499 source = s; 2500 tagName = "#bpi"; 2501 } 2502 2503 ///. 2504 override string nodeValue() const { 2505 return this.source; 2506 } 2507 2508 ///. 2509 override string writeToAppender(Appender!string where = appender!string()) const { 2510 auto start = where.data.length; 2511 where.put("<!"); 2512 where.put(source); 2513 where.put(">"); 2514 return where.data[start .. $]; 2515 } 2516 2517 ///. 2518 string source; 2519 } 2520 2521 ///. 2522 class QuestionInstruction : SpecialElement { 2523 ///. 2524 this(Document _parentDocument, string s) { 2525 super(_parentDocument); 2526 source = s; 2527 tagName = "#qpi"; 2528 } 2529 2530 ///. 2531 override string nodeValue() const { 2532 return this.source; 2533 } 2534 2535 ///. 2536 override string writeToAppender(Appender!string where = appender!string()) const { 2537 auto start = where.data.length; 2538 where.put("<"); 2539 where.put(source); 2540 where.put(">"); 2541 return where.data[start .. $]; 2542 } 2543 2544 ///. 2545 string source; 2546 } 2547 2548 ///. 2549 class HtmlComment : SpecialElement { 2550 ///. 2551 this(Document _parentDocument, string s) { 2552 super(_parentDocument); 2553 source = s; 2554 tagName = "#comment"; 2555 } 2556 2557 ///. 2558 override string nodeValue() const { 2559 return this.source; 2560 } 2561 2562 ///. 2563 override string writeToAppender(Appender!string where = appender!string()) const { 2564 auto start = where.data.length; 2565 where.put("<!--"); 2566 where.put(source); 2567 where.put("-->"); 2568 return where.data[start .. $]; 2569 } 2570 2571 ///. 2572 string source; 2573 } 2574 2575 2576 2577 2578 ///. 2579 class TextNode : Element { 2580 public: 2581 ///. 2582 this(Document _parentDocument, string e) { 2583 super(_parentDocument); 2584 contents = e; 2585 tagName = "#text"; 2586 } 2587 2588 string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes 2589 2590 ///. 2591 static TextNode fromUndecodedString(Document _parentDocument, string html) { 2592 auto e = new TextNode(_parentDocument, ""); 2593 e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose); 2594 return e; 2595 } 2596 2597 ///. 2598 override @property Element cloned() { 2599 auto n = new TextNode(parentDocument, contents); 2600 return n; 2601 } 2602 2603 ///. 2604 override string nodeValue() const { 2605 return this.contents; //toString(); 2606 } 2607 2608 ///. 2609 @property override int nodeType() const { 2610 return NodeType.Text; 2611 } 2612 2613 ///. 2614 override string writeToAppender(Appender!string where = appender!string()) const { 2615 string s; 2616 if(contents.length) 2617 s = htmlEntitiesEncode(contents, where); 2618 else 2619 s = ""; 2620 2621 assert(s !is null); 2622 return s; 2623 } 2624 2625 ///. 2626 override Element appendChild(Element e) { 2627 assert(0, "Cannot append to a text node"); 2628 } 2629 2630 ///. 2631 string contents; 2632 // alias contents content; // I just mistype this a lot, 2633 } 2634 2635 /** 2636 There are subclasses of Element offering improved helper 2637 functions for the element in HTML. 2638 */ 2639 2640 ///. 2641 class Link : Element { 2642 2643 ///. 2644 this(Document _parentDocument) { 2645 super(_parentDocument); 2646 this.tagName = "a"; 2647 } 2648 2649 2650 ///. 2651 this(string href, string text) { 2652 super("a"); 2653 setAttribute("href", href); 2654 innerText = text; 2655 } 2656 /+ 2657 /// Returns everything in the href EXCEPT the query string 2658 @property string targetSansQuery() { 2659 2660 } 2661 2662 ///. 2663 @property string domainName() { 2664 2665 } 2666 2667 ///. 2668 @property string path 2669 +/ 2670 /// This gets a variable from the URL's query string. 2671 string getValue(string name) { 2672 auto vars = variablesHash(); 2673 if(name in vars) 2674 return vars[name]; 2675 return null; 2676 } 2677 2678 private string[string] variablesHash() { 2679 string href = getAttribute("href"); 2680 if(href is null) 2681 return null; 2682 2683 auto ques = href.indexOf("?"); 2684 string str = ""; 2685 if(ques != -1) { 2686 str = href[ques+1..$]; 2687 2688 auto fragment = str.indexOf("#"); 2689 if(fragment != -1) 2690 str = str[0..fragment]; 2691 } 2692 2693 string[] variables = str.split("&"); 2694 2695 string[string] hash; 2696 2697 foreach(var; variables) { 2698 auto index = var.indexOf("="); 2699 if(index == -1) 2700 hash[var] = ""; 2701 else { 2702 hash[decodeComponent(var[0..index])] = decodeComponent(var[index + 1 .. $]); 2703 } 2704 } 2705 2706 return hash; 2707 } 2708 2709 ///. 2710 /*private*/ void updateQueryString(string[string] vars) { 2711 string href = getAttribute("href"); 2712 2713 auto question = href.indexOf("?"); 2714 if(question != -1) 2715 href = href[0..question]; 2716 2717 string frag = ""; 2718 auto fragment = href.indexOf("#"); 2719 if(fragment != -1) { 2720 frag = href[fragment..$]; 2721 href = href[0..fragment]; 2722 } 2723 2724 string query = "?"; 2725 bool first = true; 2726 foreach(name, value; vars) { 2727 if(!first) 2728 query ~= "&"; 2729 else 2730 first = false; 2731 2732 query ~= encodeComponent(name); 2733 if(value.length) 2734 query ~= "=" ~ encodeComponent(value); 2735 } 2736 2737 if(query != "?") 2738 href ~= query; 2739 2740 href ~= frag; 2741 2742 setAttribute("href", href); 2743 } 2744 2745 /// Sets or adds the variable with the given name to the given value 2746 /// It automatically URI encodes the values and takes care of the ? and &. 2747 void setValue(string name, string variable) { 2748 auto vars = variablesHash(); 2749 vars[name] = variable; 2750 2751 updateQueryString(vars); 2752 } 2753 2754 /// Removes the given variable from the query string 2755 void removeValue(string name) { 2756 auto vars = variablesHash(); 2757 vars.remove(name); 2758 2759 updateQueryString(vars); 2760 } 2761 2762 /* 2763 ///. 2764 override string toString() { 2765 2766 } 2767 2768 ///. 2769 override string getAttribute(string name) { 2770 if(name == "href") { 2771 2772 } else 2773 return super.getAttribute(name); 2774 } 2775 */ 2776 } 2777 2778 ///. 2779 class Form : Element { 2780 2781 ///. 2782 this(Document _parentDocument) { 2783 super(_parentDocument); 2784 tagName = "form"; 2785 } 2786 2787 override Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 2788 auto t = this.querySelector("fieldset div"); 2789 if(t is null) 2790 return super.addField(label, name, type, fieldOptions); 2791 else 2792 return t.addField(label, name, type, fieldOptions); 2793 } 2794 2795 override Element addField(string label, string name, FormFieldOptions fieldOptions) { 2796 auto type = "text"; 2797 auto t = this.querySelector("fieldset div"); 2798 if(t is null) 2799 return super.addField(label, name, type, fieldOptions); 2800 else 2801 return t.addField(label, name, type, fieldOptions); 2802 } 2803 2804 override Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 2805 auto t = this.querySelector("fieldset div"); 2806 if(t is null) 2807 return super.addField(label, name, options, fieldOptions); 2808 else 2809 return t.addField(label, name, options, fieldOptions); 2810 } 2811 2812 // FIXME: doesn't handle arrays; multiple fields can have the same name 2813 2814 /// Set's the form field's value. For input boxes, this sets the value attribute. For 2815 /// textareas, it sets the innerText. For radio boxes and select boxes, it removes 2816 /// the checked/selected attribute from all, and adds it to the one matching the value. 2817 /// For checkboxes, if the value is non-null and not empty, it checks the box. 2818 2819 /// If you set a value that doesn't exist, it throws an exception if makeNew is false. 2820 /// Otherwise, it makes a new input with type=hidden to keep the value. 2821 void setValue(string field, string value, bool makeNew = true) { 2822 auto eles = getField(field); 2823 if(eles.length == 0) { 2824 if(makeNew) { 2825 addInput(field, value); 2826 return; 2827 } else 2828 throw new Exception("form field does not exist"); 2829 } 2830 2831 if(eles.length == 1) { 2832 auto e = eles[0]; 2833 switch(e.tagName) { 2834 default: assert(0); 2835 case "textarea": 2836 e.innerText = value; 2837 break; 2838 case "input": 2839 string type = e.getAttribute("type"); 2840 if(type is null) { 2841 e.value = value; 2842 return; 2843 } 2844 switch(type) { 2845 case "checkbox": 2846 case "radio": 2847 if(value.length) 2848 e.setAttribute("checked", "checked"); 2849 else 2850 e.removeAttribute("checked"); 2851 break; 2852 default: 2853 e.value = value; 2854 return; 2855 } 2856 break; 2857 case "select": 2858 bool found = false; 2859 foreach(child; e.tree) { 2860 if(child.tagName != "option") 2861 continue; 2862 string val = child.getAttribute("value"); 2863 if(val is null) 2864 val = child.innerText; 2865 if(val == value) { 2866 child.setAttribute("selected", "selected"); 2867 found = true; 2868 } else 2869 child.removeAttribute("selected"); 2870 } 2871 2872 if(!found) { 2873 e.addChild("option", value) 2874 .setAttribute("selected", "selected"); 2875 } 2876 break; 2877 } 2878 } else { 2879 // assume radio boxes 2880 foreach(e; eles) { 2881 string val = e.getAttribute("value"); 2882 //if(val is null) 2883 // throw new Exception("don't know what to do with radio boxes with null value"); 2884 if(val == value) 2885 e.setAttribute("checked", "checked"); 2886 else 2887 e.removeAttribute("checked"); 2888 } 2889 } 2890 } 2891 2892 /// This takes an array of strings and adds hidden <input> elements for each one of them. Unlike setValue, 2893 /// it makes no attempt to find and modify existing elements in the form to the new values. 2894 void addValueArray(string key, string[] arrayOfValues) { 2895 foreach(arr; arrayOfValues) 2896 addChild("input", key, arr); 2897 } 2898 2899 /// Gets the value of the field; what would be given if it submitted right now. (so 2900 /// it handles select boxes and radio buttons too). For checkboxes, if a value isn't 2901 /// given, but it is checked, it returns "checked", since null and "" are indistinguishable 2902 string getValue(string field) { 2903 auto eles = getField(field); 2904 if(eles.length == 0) 2905 return ""; 2906 if(eles.length == 1) { 2907 auto e = eles[0]; 2908 switch(e.tagName) { 2909 default: assert(0); 2910 case "input": 2911 if(e.type == "checkbox") { 2912 if(e.checked) 2913 return e.value.length ? e.value : "checked"; 2914 return ""; 2915 } else 2916 return e.value; 2917 case "textarea": 2918 return e.innerText; 2919 case "select": 2920 foreach(child; e.tree) { 2921 if(child.tagName != "option") 2922 continue; 2923 if(child.selected) 2924 return child.value; 2925 } 2926 break; 2927 } 2928 } else { 2929 // assuming radio 2930 foreach(e; eles) { 2931 if(e.checked) 2932 return e.value; 2933 } 2934 } 2935 2936 return ""; 2937 } 2938 2939 // FIXME: doesn't handle multiple elements with the same name (except radio buttons) 2940 ///. 2941 string getPostableData() { 2942 bool[string] namesDone; 2943 2944 string ret; 2945 bool outputted = false; 2946 2947 foreach(e; getElementsBySelector("[name]")) { 2948 if(e.name in namesDone) 2949 continue; 2950 2951 if(outputted) 2952 ret ~= "&"; 2953 else 2954 outputted = true; 2955 2956 ret ~= std.uri.encodeComponent(e.name) ~ "=" ~ std.uri.encodeComponent(getValue(e.name)); 2957 2958 namesDone[e.name] = true; 2959 } 2960 2961 return ret; 2962 } 2963 2964 /// Gets the actual elements with the given name 2965 Element[] getField(string name) { 2966 Element[] ret; 2967 foreach(e; tree) { 2968 if(e.name == name) 2969 ret ~= e; 2970 } 2971 return ret; 2972 } 2973 2974 /// Grabs the <label> with the given for tag, if there is one. 2975 Element getLabel(string forId) { 2976 foreach(e; tree) 2977 if(e.tagName == "label" && e.getAttribute("for") == forId) 2978 return e; 2979 return null; 2980 } 2981 2982 /// Adds a new INPUT field to the end of the form with the given attributes. 2983 Element addInput(string name, string value, string type = "hidden") { 2984 auto e = new Element(parentDocument, "input", null, true); 2985 e.name = name; 2986 e.value = value; 2987 e.type = type; 2988 2989 appendChild(e); 2990 2991 return e; 2992 } 2993 2994 /// Removes the given field from the form. It finds the element and knocks it right out. 2995 void removeField(string name) { 2996 foreach(e; getField(name)) 2997 e.parentNode.removeChild(e); 2998 } 2999 3000 /+ 3001 /// Returns all form members. 3002 @property Element[] elements() { 3003 3004 } 3005 3006 ///. 3007 string opDispatch(string name)(string v = null) 3008 // filter things that should actually be attributes on the form 3009 if( name != "method" && name != "action" && name != "enctype" 3010 && name != "style" && name != "name" && name != "id" && name != "class") 3011 { 3012 3013 } 3014 +/ 3015 /+ 3016 void submit() { 3017 // take its elements and submit them through http 3018 } 3019 +/ 3020 } 3021 3022 import std.conv; 3023 3024 ///. 3025 class Table : Element { 3026 3027 ///. 3028 this(Document _parentDocument) { 3029 super(_parentDocument); 3030 tagName = "table"; 3031 } 3032 3033 ///. 3034 Element th(T)(T t) { 3035 Element e; 3036 if(parentDocument !is null) 3037 e = parentDocument.createElement("th"); 3038 else 3039 e = Element.make("th"); 3040 static if(is(T == Html)) 3041 e.innerHTML = t; 3042 else 3043 e.innerText = to!string(t); 3044 return e; 3045 } 3046 3047 ///. 3048 Element td(T)(T t) { 3049 Element e; 3050 if(parentDocument !is null) 3051 e = parentDocument.createElement("td"); 3052 else 3053 e = Element.make("td"); 3054 static if(is(T == Html)) 3055 e.innerHTML = t; 3056 else 3057 e.innerText = to!string(t); 3058 return e; 3059 } 3060 3061 /// . 3062 Element appendHeaderRow(T...)(T t) { 3063 return appendRowInternal("th", "thead", t); 3064 } 3065 3066 /// . 3067 Element appendFooterRow(T...)(T t) { 3068 return appendRowInternal("td", "tfoot", t); 3069 } 3070 3071 /// . 3072 Element appendRow(T...)(T t) { 3073 return appendRowInternal("td", "tbody", t); 3074 } 3075 3076 void addColumnClasses(string[] classes...) { 3077 auto grid = getGrid(); 3078 foreach(row; grid) 3079 foreach(i, cl; classes) { 3080 if(cl.length) 3081 if(i < row.length) 3082 row[i].addClass(cl); 3083 } 3084 } 3085 3086 private Element appendRowInternal(T...)(string innerType, string findType, T t) { 3087 Element row = Element.make("tr"); 3088 3089 foreach(e; t) { 3090 static if(is(typeof(e) : Element)) { 3091 if(e.tagName == "td" || e.tagName == "th") 3092 row.appendChild(e); 3093 else { 3094 Element a = Element.make(innerType); 3095 3096 a.appendChild(e); 3097 3098 row.appendChild(a); 3099 } 3100 } else static if(is(typeof(e) == Html)) { 3101 Element a = Element.make(innerType); 3102 a.innerHTML = e.source; 3103 row.appendChild(a); 3104 } else static if(is(typeof(e) == Element[])) { 3105 Element a = Element.make(innerType); 3106 foreach(ele; e) 3107 a.appendChild(ele); 3108 row.appendChild(a); 3109 } else { 3110 Element a = Element.make(innerType); 3111 a.innerText = to!string(e); 3112 row.appendChild(a); 3113 } 3114 } 3115 3116 foreach(e; children) { 3117 if(e.tagName == findType) { 3118 e.appendChild(row); 3119 return row; 3120 } 3121 } 3122 3123 // the type was not found if we are here... let's add it so it is well-formed 3124 auto lol = this.addChild(findType); 3125 lol.appendChild(row); 3126 3127 return row; 3128 } 3129 3130 ///. 3131 Element captionElement() { 3132 Element cap; 3133 foreach(c; children) { 3134 if(c.tagName == "caption") { 3135 cap = c; 3136 break; 3137 } 3138 } 3139 3140 if(cap is null) { 3141 cap = Element.make("caption"); 3142 appendChild(cap); 3143 } 3144 3145 return cap; 3146 } 3147 3148 ///. 3149 @property string caption() { 3150 return captionElement().innerText; 3151 } 3152 3153 ///. 3154 @property void caption(string text) { 3155 captionElement().innerText = text; 3156 } 3157 3158 /// Gets the logical layout of the table as a rectangular grid of 3159 /// cells. It considers rowspan and colspan. A cell with a large 3160 /// span is represented in the grid by being referenced several times. 3161 /// The tablePortition parameter can get just a <thead>, <tbody>, or 3162 /// <tfoot> portion if you pass one. 3163 /// 3164 /// Note: the rectangular grid might include null cells. 3165 /// 3166 /// This is kinda expensive so you should call once when you want the grid, 3167 /// then do lookups on the returned array. 3168 TableCell[][] getGrid(Element tablePortition = null) 3169 in { 3170 if(tablePortition is null) 3171 assert(tablePortition is null); 3172 else { 3173 assert(tablePortition !is null); 3174 assert(tablePortition.parentNode is this); 3175 assert( 3176 tablePortition.tagName == "tbody" 3177 || 3178 tablePortition.tagName == "tfoot" 3179 || 3180 tablePortition.tagName == "thead" 3181 ); 3182 } 3183 } 3184 body { 3185 if(tablePortition is null) 3186 tablePortition = this; 3187 3188 TableCell[][] ret; 3189 3190 // FIXME: will also return rows of sub tables! 3191 auto rows = tablePortition.getElementsByTagName("tr"); 3192 ret.length = rows.length; 3193 3194 int maxLength = 0; 3195 3196 int insertCell(int row, int position, TableCell cell) { 3197 if(row >= ret.length) 3198 return position; // not supposed to happen - a rowspan is prolly too big. 3199 3200 if(position == -1) { 3201 position++; 3202 foreach(item; ret[row]) { 3203 if(item is null) 3204 break; 3205 position++; 3206 } 3207 } 3208 3209 if(position < ret[row].length) 3210 ret[row][position] = cell; 3211 else 3212 foreach(i; ret[row].length .. position + 1) { 3213 if(i == position) 3214 ret[row] ~= cell; 3215 else 3216 ret[row] ~= null; 3217 } 3218 return position; 3219 } 3220 3221 foreach(int i, rowElement; rows) { 3222 auto row = cast(TableRow) rowElement; 3223 assert(row !is null); 3224 assert(i < ret.length); 3225 3226 int position = 0; 3227 foreach(cellElement; rowElement.childNodes) { 3228 auto cell = cast(TableCell) cellElement; 3229 if(cell is null) 3230 continue; 3231 3232 // FIXME: colspan == 0 or rowspan == 0 3233 // is supposed to mean fill in the rest of 3234 // the table, not skip it 3235 foreach(int j; 0 .. cell.colspan) { 3236 foreach(int k; 0 .. cell.rowspan) 3237 // if the first row, always append. 3238 insertCell(k + i, k == 0 ? -1 : position, cell); 3239 position++; 3240 } 3241 } 3242 3243 if(ret[i].length > maxLength) 3244 maxLength = cast(int) ret[i].length; 3245 } 3246 3247 // want to ensure it's rectangular 3248 foreach(ref r; ret) { 3249 foreach(i; r.length .. maxLength) 3250 r ~= null; 3251 } 3252 3253 return ret; 3254 } 3255 } 3256 3257 /// Represents a table row element - a <tr> 3258 class TableRow : Element { 3259 ///. 3260 this(Document _parentDocument) { 3261 super(_parentDocument); 3262 tagName = "tr"; 3263 } 3264 3265 // FIXME: the standard says there should be a lot more in here, 3266 // but meh, I never use it and it's a pain to implement. 3267 } 3268 3269 /// Represents anything that can be a table cell - <td> or <th> html. 3270 class TableCell : Element { 3271 ///. 3272 this(Document _parentDocument, string _tagName) { 3273 super(_parentDocument, _tagName); 3274 } 3275 3276 @property int rowspan() const { 3277 int ret = 1; 3278 auto it = getAttribute("rowspan"); 3279 if(it.length) 3280 ret = to!int(it); 3281 return ret; 3282 } 3283 3284 @property int colspan() const { 3285 int ret = 1; 3286 auto it = getAttribute("colspan"); 3287 if(it.length) 3288 ret = to!int(it); 3289 return ret; 3290 } 3291 3292 @property int rowspan(int i) { 3293 setAttribute("rowspan", to!string(i)); 3294 return i; 3295 } 3296 3297 @property int colspan(int i) { 3298 setAttribute("colspan", to!string(i)); 3299 return i; 3300 } 3301 3302 } 3303 3304 3305 ///. 3306 class MarkupException : Exception { 3307 3308 ///. 3309 this(string message, string file = __FILE__, size_t line = __LINE__) { 3310 super(message, file, line); 3311 } 3312 } 3313 3314 /// This is used when you are using one of the require variants of navigation, and no matching element can be found in the tree. 3315 class ElementNotFoundException : Exception { 3316 3317 /// type == kind of element you were looking for and search == a selector describing the search. 3318 this(string type, string search, string file = __FILE__, size_t line = __LINE__) { 3319 super("Element of type '"~type~"' matching {"~search~"} not found.", file, line); 3320 } 3321 } 3322 3323 /// The html struct is used to differentiate between regular text nodes and html in certain functions 3324 /// 3325 /// Easiest way to construct it is like this: auto html = Html("<p>hello</p>"); 3326 struct Html { 3327 /// This string holds the actual html. Use it to retrieve the contents. 3328 string source; 3329 } 3330 3331 /// The main document interface, including a html parser. 3332 class Document : FileResource { 3333 ///. 3334 this(string data, bool caseSensitive = false, bool strict = false) { 3335 parse(data, caseSensitive, strict); 3336 } 3337 3338 /** 3339 Creates an empty document. It has *nothing* in it at all. 3340 */ 3341 this() { 3342 3343 } 3344 3345 /// This is just something I'm toying with. Right now, you use opIndex to put in css selectors. 3346 /// It returns a struct that forwards calls to all elements it holds, and returns itself so you 3347 /// can chain it. 3348 /// 3349 /// Example: document["p"].innerText("hello").addClass("modified"); 3350 /// 3351 /// Equivalent to: foreach(e; document.getElementsBySelector("p")) { e.innerText("hello"); e.addClas("modified"); } 3352 /// 3353 /// Note: always use function calls (not property syntax) and don't use toString in there for best results. 3354 /// 3355 /// You can also do things like: document["p"]["b"] though tbh I'm not sure why since the selector string can do all that anyway. Maybe 3356 /// you could put in some kind of custom filter function tho. 3357 ElementCollection opIndex(string selector) { 3358 auto e = ElementCollection(this.root); 3359 return e[selector]; 3360 } 3361 3362 string _contentType = "text/html; charset=utf-8"; 3363 3364 /// If you're using this for some other kind of XML, you can 3365 /// set the content type here. 3366 /// 3367 /// Note: this has no impact on the function of this class. 3368 /// It is only used if the document is sent via a protocol like HTTP. 3369 /// 3370 /// This may be called by parse() if it recognizes the data. Otherwise, 3371 /// if you don't set it, it assumes text/html; charset=utf-8. 3372 @property string contentType(string mimeType) { 3373 _contentType = mimeType; 3374 return _contentType; 3375 } 3376 3377 /// implementing the FileResource interface, useful for sending via 3378 /// http automatically. 3379 override @property string contentType() const { 3380 return _contentType; 3381 } 3382 3383 /// implementing the FileResource interface; it calls toString. 3384 override immutable(ubyte)[] getData() const { 3385 return cast(immutable(ubyte)[]) this.toString(); 3386 } 3387 3388 3389 /// Concatenates any consecutive text nodes 3390 /* 3391 void normalize() { 3392 3393 } 3394 */ 3395 3396 /// This will set delegates for parseSaw* (note: this overwrites anything else you set, and you setting subsequently will overwrite this) that add those things to the dom tree when it sees them. 3397 /// Call this before calling parse(). 3398 3399 /// Note this will also preserve the prolog and doctype from the original file, if there was one. 3400 void enableAddingSpecialTagsToDom() { 3401 parseSawComment = (string) => true; 3402 parseSawAspCode = (string) => true; 3403 parseSawPhpCode = (string) => true; 3404 parseSawQuestionInstruction = (string) => true; 3405 parseSawBangInstruction = (string) => true; 3406 } 3407 3408 /// If the parser sees a html comment, it will call this callback 3409 /// <!-- comment --> will call parseSawComment(" comment ") 3410 /// Return true if you want the node appended to the document. 3411 bool delegate(string) parseSawComment; 3412 3413 /// If the parser sees <% asp code... %>, it will call this callback. 3414 /// It will be passed "% asp code... %" or "%= asp code .. %" 3415 /// Return true if you want the node appended to the document. 3416 bool delegate(string) parseSawAspCode; 3417 3418 /// If the parser sees <?php php code... ?>, it will call this callback. 3419 /// It will be passed "?php php code... ?" or "?= asp code .. ?" 3420 /// Note: dom.d cannot identify the other php <? code ?> short format. 3421 /// Return true if you want the node appended to the document. 3422 bool delegate(string) parseSawPhpCode; 3423 3424 /// if it sees a <?xxx> that is not php or asp 3425 /// it calls this function with the contents. 3426 /// <?SOMETHING foo> calls parseSawQuestionInstruction("?SOMETHING foo") 3427 /// Unlike the php/asp ones, this ends on the first > it sees, without requiring ?>. 3428 /// Return true if you want the node appended to the document. 3429 bool delegate(string) parseSawQuestionInstruction; 3430 3431 /// if it sees a <! that is not CDATA or comment (CDATA is handled automatically and comments call parseSawComment), 3432 /// it calls this function with the contents. 3433 /// <!SOMETHING foo> calls parseSawBangInstruction("SOMETHING foo") 3434 /// Return true if you want the node appended to the document. 3435 bool delegate(string) parseSawBangInstruction; 3436 3437 /// Given the kind of garbage you find on the Internet, try to make sense of it. 3438 /// Equivalent to document.parse(data, false, false, null); 3439 /// (Case-insensitive, non-strict, determine character encoding from the data.) 3440 3441 /// NOTE: this makes no attempt at added security. 3442 void parseGarbage(string data) { 3443 parse(data, false, false, null); 3444 } 3445 3446 Utf8Stream handleDataEncoding(in string rawdata, string dataEncoding, bool strict) { 3447 // gotta determine the data encoding. If you know it, pass it in above to skip all this. 3448 if(dataEncoding is null) { 3449 dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata); 3450 // it can't tell... probably a random 8 bit encoding. Let's check the document itself. 3451 // Now, XML and HTML can both list encoding in the document, but we can't really parse 3452 // it here without changing a lot of code until we know the encoding. So I'm going to 3453 // do some hackish string checking. 3454 if(dataEncoding is null) { 3455 auto dataAsBytes = cast(immutable(ubyte)[]) rawdata; 3456 // first, look for an XML prolog 3457 auto idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "encoding=\""); 3458 if(idx != -1) { 3459 idx += "encoding=\"".length; 3460 // we're probably past the prolog if it's this far in; we might be looking at 3461 // content. Forget about it. 3462 if(idx > 100) 3463 idx = -1; 3464 } 3465 // if that fails, we're looking for Content-Type http-equiv or a meta charset (see html5).. 3466 if(idx == -1) { 3467 idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "charset="); 3468 if(idx != -1) { 3469 idx += "charset=".length; 3470 if(dataAsBytes[idx] == '"') 3471 idx++; 3472 } 3473 } 3474 3475 // found something in either branch... 3476 if(idx != -1) { 3477 // read till a quote or about 12 chars, whichever comes first... 3478 auto end = idx; 3479 while(end < dataAsBytes.length && dataAsBytes[end] != '"' && end - idx < 12) 3480 end++; 3481 3482 dataEncoding = cast(string) dataAsBytes[idx .. end]; 3483 } 3484 // otherwise, we just don't know. 3485 } 3486 } 3487 3488 if(dataEncoding is null) { 3489 if(strict) 3490 throw new MarkupException("I couldn't figure out the encoding of this document."); 3491 else 3492 // if we really don't know by here, it means we already tried UTF-8, 3493 // looked for utf 16 and 32 byte order marks, and looked for xml or meta 3494 // tags... let's assume it's Windows-1252, since that's probably the most 3495 // common aside from utf that wouldn't be labeled. 3496 3497 dataEncoding = "Windows 1252"; 3498 } 3499 3500 // and now, go ahead and convert it. 3501 3502 string data; 3503 3504 if(!strict) { 3505 // if we're in non-strict mode, we need to check 3506 // the document for mislabeling too; sometimes 3507 // web documents will say they are utf-8, but aren't 3508 // actually properly encoded. If it fails to validate, 3509 // we'll assume it's actually Windows encoding - the most 3510 // likely candidate for mislabeled garbage. 3511 dataEncoding = dataEncoding.toLower(); 3512 dataEncoding = dataEncoding.replace(" ", ""); 3513 dataEncoding = dataEncoding.replace("-", ""); 3514 dataEncoding = dataEncoding.replace("_", ""); 3515 if(dataEncoding == "utf8") { 3516 try { 3517 validate(rawdata); 3518 } catch(UTFException e) { 3519 dataEncoding = "Windows 1252"; 3520 } 3521 } 3522 } 3523 3524 if(dataEncoding != "UTF-8") { 3525 if(strict) 3526 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 3527 else { 3528 try { 3529 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 3530 } catch(Exception e) { 3531 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, "Windows 1252"); 3532 } 3533 } 3534 } else 3535 data = rawdata; 3536 3537 static if(is(Utf8Stream == string)) 3538 return data; 3539 else 3540 return new Utf8Stream(data); 3541 } 3542 3543 /** 3544 Take XMLish data and try to make the DOM tree out of it. 3545 3546 The goal isn't to be perfect, but to just be good enough to 3547 approximate Javascript's behavior. 3548 3549 If strict, it throws on something that doesn't make sense. 3550 (Examples: mismatched tags. It doesn't validate!) 3551 If not strict, it tries to recover anyway, and only throws 3552 when something is REALLY unworkable. 3553 3554 If strict is false, it uses a magic list of tags that needn't 3555 be closed. If you are writing a document specifically for this, 3556 try to avoid such - use self closed tags at least. Easier to parse. 3557 3558 The dataEncoding argument can be used to pass a specific 3559 charset encoding for automatic conversion. If null (which is NOT 3560 the default!), it tries to determine from the data itself, 3561 using the xml prolog or meta tags, and assumes UTF-8 if unsure. 3562 3563 If this assumption is wrong, it can throw on non-ascii 3564 characters! 3565 3566 3567 Note that it previously assumed the data was encoded as UTF-8, which 3568 is why the dataEncoding argument defaults to that. 3569 3570 So it shouldn't break backward compatibility. 3571 3572 But, if you want the best behavior on wild data - figuring it out from the document 3573 instead of assuming - you'll probably want to change that argument to null. 3574 3575 */ 3576 void parse(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") { 3577 auto data = handleDataEncoding(rawdata, dataEncoding, strict); 3578 parseStream(data, caseSensitive, strict); 3579 } 3580 3581 // note: this work best in strict mode, unless data is just a simple string wrapper 3582 void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false) { 3583 // FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler 3584 // of my big app. 3585 3586 assert(data !is null); 3587 3588 // go through character by character. 3589 // if you see a <, consider it a tag. 3590 // name goes until the first non tagname character 3591 // then see if it self closes or has an attribute 3592 3593 // if not in a tag, anything not a tag is a big text 3594 // node child. It ends as soon as it sees a < 3595 3596 // Whitespace in text or attributes is preserved, but not between attributes 3597 3598 // & and friends are converted when I know them, left the same otherwise 3599 3600 3601 // this it should already be done correctly.. so I'm leaving it off to net a ~10% speed boost on my typical test file (really) 3602 //validate(data); // it *must* be UTF-8 for this to work correctly 3603 3604 sizediff_t pos = 0; 3605 3606 clear(); 3607 3608 loose = !caseSensitive; 3609 3610 bool sawImproperNesting = false; 3611 bool paragraphHackfixRequired = false; 3612 3613 int getLineNumber(sizediff_t p) { 3614 int line = 1; 3615 foreach(c; data[0..p]) 3616 if(c == '\n') 3617 line++; 3618 return line; 3619 } 3620 3621 void parseError(string message) { 3622 throw new MarkupException(format("char %d (line %d): %s", pos, getLineNumber(pos), message)); 3623 } 3624 3625 void eatWhitespace() { 3626 while(pos < data.length && (data[pos] == ' ' || data[pos] == '\n' || data[pos] == '\t')) 3627 pos++; 3628 } 3629 3630 string readTagName() { 3631 // remember to include : for namespaces 3632 // basically just keep going until >, /, or whitespace 3633 auto start = pos; 3634 while( data[pos] != '>' && data[pos] != '/' && 3635 data[pos] != ' ' && data[pos] != '\n' && data[pos] != '\t') 3636 { 3637 pos++; 3638 if(pos == data.length) { 3639 if(strict) 3640 throw new Exception("tag name incomplete when file ended"); 3641 else 3642 break; 3643 } 3644 } 3645 3646 if(!caseSensitive) 3647 return toLower(data[start..pos]); 3648 else 3649 return data[start..pos]; 3650 } 3651 3652 string readAttributeName() { 3653 // remember to include : for namespaces 3654 // basically just keep going until >, /, or whitespace 3655 auto start = pos; 3656 while( data[pos] != '>' && data[pos] != '/' && data[pos] != '=' && 3657 data[pos] != ' ' && data[pos] != '\n' && data[pos] != '\t') 3658 { 3659 if(data[pos] == '<') { 3660 if(strict) 3661 throw new MarkupException("The character < can never appear in an attribute name. Line " ~ to!string(getLineNumber(pos))); 3662 else 3663 break; // e.g. <a href="something" <img src="poo" /></a>. The > should have been after the href, but some shitty files don't do that right and the browser handles it, so we will too, by pretending the > was indeed there 3664 } 3665 pos++; 3666 if(pos == data.length) { 3667 if(strict) 3668 throw new Exception("unterminated attribute name"); 3669 else 3670 break; 3671 } 3672 } 3673 3674 if(!caseSensitive) 3675 return toLower(data[start..pos]); 3676 else 3677 return data[start..pos]; 3678 } 3679 3680 string readAttributeValue() { 3681 if(pos >= data.length) { 3682 if(strict) 3683 throw new Exception("no attribute value before end of file"); 3684 else 3685 return null; 3686 } 3687 switch(data[pos]) { 3688 case '\'': 3689 case '"': 3690 auto started = pos; 3691 char end = data[pos]; 3692 pos++; 3693 auto start = pos; 3694 while(pos < data.length && data[pos] != end) 3695 pos++; 3696 if(strict && pos == data.length) 3697 throw new MarkupException("Unclosed attribute value, started on char " ~ to!string(started)); 3698 string v = htmlEntitiesDecode(data[start..pos], strict); 3699 pos++; // skip over the end 3700 return v; 3701 default: 3702 if(strict) 3703 parseError("Attributes must be quoted"); 3704 // read until whitespace or terminator (/ or >) 3705 auto start = pos; 3706 while( 3707 pos < data.length && 3708 data[pos] != '>' && 3709 // unquoted attributes might be urls, so gotta be careful with them and self-closed elements 3710 !(data[pos] == '/' && pos + 1 < data.length && data[pos+1] == '>') && 3711 data[pos] != ' ' && data[pos] != '\n' && data[pos] != '\t') 3712 pos++; 3713 3714 string v = htmlEntitiesDecode(data[start..pos], strict); 3715 // don't skip the end - we'll need it later 3716 return v; 3717 } 3718 } 3719 3720 TextNode readTextNode() { 3721 auto start = pos; 3722 while(pos < data.length && data[pos] != '<') { 3723 pos++; 3724 } 3725 3726 return TextNode.fromUndecodedString(this, data[start..pos]); 3727 } 3728 3729 // this is obsolete! 3730 RawSource readCDataNode() { 3731 auto start = pos; 3732 while(pos < data.length && data[pos] != '<') { 3733 pos++; 3734 } 3735 3736 return new RawSource(this, data[start..pos]); 3737 } 3738 3739 3740 struct Ele { 3741 int type; // element or closing tag or nothing 3742 /* 3743 type == 0 means regular node, self-closed (element is valid) 3744 type == 1 means closing tag (payload is the tag name, element may be valid) 3745 type == 2 means you should ignore it completely 3746 type == 3 means it is a special element that should be appended, if possible, e.g. a <!DOCTYPE> that was chosen to be kept, php code, or comment. It will be appended at the current element if inside the root, and to a special document area if not 3747 type == 4 means the document was totally empty 3748 */ 3749 Element element; // for type == 0 or type == 3 3750 string payload; // for type == 1 3751 } 3752 // recursively read a tag 3753 Ele readElement(string[] parentChain = null) { 3754 // FIXME: this is the slowest function in this module, by far, even in strict mode. 3755 // Loose mode should perform decently, but strict mode is the important one. 3756 if(!strict && parentChain is null) 3757 parentChain = []; 3758 3759 static string[] recentAutoClosedTags; 3760 3761 if(pos >= data.length) 3762 { 3763 if(strict) { 3764 throw new MarkupException("Gone over the input (is there no root element or did it never close?), chain: " ~ to!string(parentChain)); 3765 } else { 3766 if(parentChain.length) 3767 return Ele(1, null, parentChain[0]); // in loose mode, we just assume the document has ended 3768 else 3769 return Ele(4); // signal emptiness upstream 3770 } 3771 } 3772 3773 if(data[pos] != '<') { 3774 return Ele(0, readTextNode(), null); 3775 } 3776 3777 enforce(data[pos] == '<'); 3778 pos++; 3779 if(pos == data.length) { 3780 if(strict) 3781 throw new MarkupException("Found trailing < at end of file"); 3782 // if not strict, we'll just skip the switch 3783 } else 3784 switch(data[pos]) { 3785 // I don't care about these, so I just want to skip them 3786 case '!': // might be a comment, a doctype, or a special instruction 3787 pos++; 3788 3789 // FIXME: we should store these in the tree too 3790 // though I like having it stripped out tbh. 3791 3792 if(pos == data.length) { 3793 if(strict) 3794 throw new MarkupException("<! opened at end of file"); 3795 } else if(data[pos] == '-' && (pos + 1 < data.length) && data[pos+1] == '-') { 3796 // comment 3797 pos += 2; 3798 3799 // FIXME: technically, a comment is anything 3800 // between -- and -- inside a <!> block. 3801 // so in <!-- test -- lol> , the " lol" is NOT a comment 3802 // and should probably be handled differently in here, but for now 3803 // I'll just keep running until --> since that's the common way 3804 3805 auto commentStart = pos; 3806 while(pos+3 < data.length && data[pos..pos+3] != "-->") 3807 pos++; 3808 3809 auto end = commentStart; 3810 3811 if(pos + 3 >= data.length) { 3812 if(strict) 3813 throw new MarkupException("unclosed comment"); 3814 end = data.length; 3815 pos = data.length; 3816 } else { 3817 end = pos; 3818 assert(data[pos] == '-'); 3819 pos++; 3820 assert(data[pos] == '-'); 3821 pos++; 3822 assert(data[pos] == '>'); 3823 pos++; 3824 } 3825 3826 if(parseSawComment !is null) 3827 if(parseSawComment(data[commentStart .. end])) { 3828 return Ele(3, new HtmlComment(this, data[commentStart .. end]), null); 3829 } 3830 } else if(pos + 7 <= data.length && data[pos..pos + 7] == "[CDATA[") { 3831 pos += 7; 3832 3833 auto cdataStart = pos; 3834 3835 ptrdiff_t end = -1; 3836 typeof(end) cdataEnd; 3837 3838 if(pos < data.length) { 3839 // cdata isn't allowed to nest, so this should be generally ok, as long as it is found 3840 end = data[pos .. $].indexOf("]]>"); 3841 } 3842 3843 if(end == -1) { 3844 if(strict) 3845 throw new MarkupException("Unclosed CDATA section"); 3846 end = pos; 3847 cdataEnd = pos; 3848 } else { 3849 cdataEnd = pos + end; 3850 pos = cdataEnd + 3; 3851 } 3852 3853 return Ele(0, new TextNode(this, data[cdataStart .. cdataEnd]), null); 3854 } else { 3855 auto start = pos; 3856 while(pos < data.length && data[pos] != '>') 3857 pos++; 3858 3859 auto bangEnds = pos; 3860 if(pos == data.length) { 3861 if(strict) 3862 throw new MarkupException("unclosed processing instruction (<!xxx>)"); 3863 } else pos++; // skipping the > 3864 3865 if(parseSawBangInstruction !is null) 3866 if(parseSawBangInstruction(data[start .. bangEnds])) { 3867 // FIXME: these should be able to modify the parser state, 3868 // doing things like adding entities, somehow. 3869 3870 return Ele(3, new BangInstruction(this, data[start .. bangEnds]), null); 3871 } 3872 } 3873 3874 /* 3875 if(pos < data.length && data[pos] == '>') 3876 pos++; // skip the > 3877 else 3878 assert(!strict); 3879 */ 3880 break; 3881 case '%': 3882 case '?': 3883 /* 3884 Here's what we want to support: 3885 3886 <% asp code %> 3887 <%= asp code %> 3888 <?php php code ?> 3889 <?= php code ?> 3890 3891 The contents don't really matter, just if it opens with 3892 one of the above for, it ends on the two char terminator. 3893 3894 <?something> 3895 this is NOT php code 3896 because I've seen this in the wild: <?EM-dummyText> 3897 3898 This could be php with shorttags which would be cut off 3899 prematurely because if(a >) - that > counts as the close 3900 of the tag, but since dom.d can't tell the difference 3901 between that and the <?EM> real world example, it will 3902 not try to look for the ?> ending. 3903 3904 The difference between this and the asp/php stuff is that it 3905 ends on >, not ?>. ONLY <?php or <?= ends on ?>. The rest end 3906 on >. 3907 */ 3908 3909 char end = data[pos]; 3910 auto started = pos; 3911 bool isAsp = end == '%'; 3912 int currentIndex = 0; 3913 bool isPhp = false; 3914 bool isEqualTag = false; 3915 int phpCount = 0; 3916 3917 more: 3918 pos++; // skip the start 3919 if(pos == data.length) { 3920 if(strict) 3921 throw new MarkupException("Unclosed <"~end~" by end of file"); 3922 } else { 3923 currentIndex++; 3924 if(currentIndex == 1 && data[pos] == '=') { 3925 if(!isAsp) 3926 isPhp = true; 3927 isEqualTag = true; 3928 goto more; 3929 } 3930 if(currentIndex == 1 && data[pos] == 'p') 3931 phpCount++; 3932 if(currentIndex == 2 && data[pos] == 'h') 3933 phpCount++; 3934 if(currentIndex == 3 && data[pos] == 'p' && phpCount == 2) 3935 isPhp = true; 3936 3937 if(data[pos] == '>') { 3938 if((isAsp || isPhp) && data[pos - 1] != end) 3939 goto more; 3940 // otherwise we're done 3941 } else 3942 goto more; 3943 } 3944 3945 //writefln("%s: %s", isAsp ? "ASP" : isPhp ? "PHP" : "<? ", data[started .. pos]); 3946 auto code = data[started .. pos]; 3947 3948 3949 assert((pos < data.length && data[pos] == '>') || (!strict && pos == data.length)); 3950 if(pos < data.length) 3951 pos++; // get past the > 3952 3953 if(isAsp && parseSawAspCode !is null) { 3954 if(parseSawAspCode(code)) { 3955 return Ele(3, new AspCode(this, code), null); 3956 } 3957 } else if(isPhp && parseSawPhpCode !is null) { 3958 if(parseSawPhpCode(code)) { 3959 return Ele(3, new PhpCode(this, code), null); 3960 } 3961 } else if(!isAsp && !isPhp && parseSawQuestionInstruction !is null) { 3962 if(parseSawQuestionInstruction(code)) { 3963 return Ele(3, new QuestionInstruction(this, code), null); 3964 } 3965 } 3966 break; 3967 case '/': // closing an element 3968 pos++; // skip the start 3969 auto p = pos; 3970 while(pos < data.length && data[pos] != '>') 3971 pos++; 3972 //writefln("</%s>", data[p..pos]); 3973 if(pos == data.length && data[pos-1] != '>') { 3974 if(strict) 3975 throw new MarkupException("File ended before closing tag had a required >"); 3976 else 3977 data ~= ">"; // just hack it in 3978 } 3979 pos++; // skip the '>' 3980 3981 string tname = data[p..pos-1]; 3982 if(!caseSensitive) 3983 tname = tname.toLower(); 3984 3985 return Ele(1, null, tname); // closing tag reports itself here 3986 case ' ': // assume it isn't a real element... 3987 if(strict) 3988 parseError("bad markup - improperly placed <"); 3989 else 3990 return Ele(0, TextNode.fromUndecodedString(this, "<"), null); 3991 break; 3992 default: 3993 3994 if(!strict) { 3995 // what about something that kinda looks like a tag, but isn't? 3996 auto nextTag = data[pos .. $].indexOf("<"); 3997 auto closeTag = data[pos .. $].indexOf(">"); 3998 if(closeTag != -1 && nextTag != -1) 3999 if(nextTag < closeTag) { 4000 // since attribute names cannot possibly have a < in them, we'll look for an equal since it might be an attribute value... and even in garbage mode, it'd have to be a quoted one realistically 4001 4002 auto equal = data[pos .. $].indexOf("=\""); 4003 if(equal != -1 && equal < closeTag) { 4004 // this MIGHT be ok, soldier on 4005 } else { 4006 // definitely no good, this must be a (horribly distorted) text node 4007 pos++; // skip the < we're on - don't want text node to end prematurely 4008 auto node = readTextNode(); 4009 node.contents = "<" ~ node.contents; // put this back 4010 return Ele(0, node, null); 4011 } 4012 } 4013 } 4014 4015 string tagName = readTagName(); 4016 string[string] attributes; 4017 4018 Ele addTag(bool selfClosed) { 4019 if(selfClosed) 4020 pos++; 4021 else { 4022 if(!strict) 4023 if(tagName.isInArray(selfClosedElements)) 4024 // these are de-facto self closed 4025 selfClosed = true; 4026 } 4027 4028 if(strict) 4029 enforce(data[pos] == '>');//, format("got %s when expecting >\nContext:\n%s", data[pos], data[pos - 100 .. pos + 100])); 4030 else { 4031 // if we got here, it's probably because a slash was in an 4032 // unquoted attribute - don't trust the selfClosed value 4033 if(!selfClosed) 4034 selfClosed = tagName.isInArray(selfClosedElements); 4035 4036 while(pos < data.length && data[pos] != '>') 4037 pos++; 4038 } 4039 4040 auto whereThisTagStarted = pos; // for better error messages 4041 4042 pos++; 4043 4044 auto e = createElement(tagName); 4045 e.attributes = attributes; 4046 version(dom_node_indexes) { 4047 if(e.dataset.nodeIndex.length == 0) 4048 e.dataset.nodeIndex = to!string(&(e.attributes)); 4049 } 4050 e.selfClosed = selfClosed; 4051 e.parseAttributes(); 4052 4053 4054 // HACK to handle script and style as a raw data section as it is in HTML browsers 4055 if(tagName == "script" || tagName == "style") { 4056 if(!selfClosed) { 4057 string closer = "</" ~ tagName ~ ">"; 4058 ptrdiff_t ending; 4059 if(pos >= data.length) 4060 ending = -1; 4061 else 4062 ending = indexOf(data[pos..$], closer); 4063 4064 if(loose && ending == -1 && pos < data.length) 4065 ending = indexOf(data[pos..$], closer.toUpper()); 4066 if(ending == -1) { 4067 if(strict) 4068 throw new Exception("tag " ~ tagName ~ " never closed"); 4069 else { 4070 // let's call it totally empty and do the rest of the file as text. doing it as html could still result in some weird stuff like if(a<4) being read as <4 being a tag so it comes out if(a<4></4> and other weirdness) It is either a closed script tag or the rest of the file is forfeit. 4071 if(pos < data.length) { 4072 e = new TextNode(this, data[pos .. $]); 4073 pos = data.length; 4074 } 4075 } 4076 } else { 4077 ending += pos; 4078 e.innerRawSource = data[pos..ending]; 4079 pos = ending + closer.length; 4080 } 4081 } 4082 return Ele(0, e, null); 4083 } 4084 4085 bool closed = selfClosed; 4086 4087 void considerHtmlParagraphHack(Element n) { 4088 assert(!strict); 4089 if(e.tagName == "p" && e.tagName == n.tagName) { 4090 // html lets you write <p> para 1 <p> para 1 4091 // but in the dom tree, they should be siblings, not children. 4092 paragraphHackfixRequired = true; 4093 } 4094 } 4095 4096 //writef("<%s>", tagName); 4097 while(!closed) { 4098 Ele n; 4099 if(strict) 4100 n = readElement(); 4101 else 4102 n = readElement(parentChain ~ tagName); 4103 4104 if(n.type == 4) return n; // the document is empty 4105 4106 if(n.type == 3 && n.element !is null) { 4107 // special node, append if possible 4108 if(e !is null) 4109 e.appendChild(n.element); 4110 else 4111 piecesBeforeRoot ~= n.element; 4112 } else if(n.type == 0) { 4113 if(!strict) 4114 considerHtmlParagraphHack(n.element); 4115 e.appendChild(n.element); 4116 } else if(n.type == 1) { 4117 bool found = false; 4118 if(n.payload != tagName) { 4119 if(strict) 4120 parseError(format("mismatched tag: </%s> != <%s> (opened on line %d)", n.payload, tagName, getLineNumber(whereThisTagStarted))); 4121 else { 4122 sawImproperNesting = true; 4123 // this is so we don't drop several levels of awful markup 4124 if(n.element) { 4125 if(!strict) 4126 considerHtmlParagraphHack(n.element); 4127 e.appendChild(n.element); 4128 n.element = null; 4129 } 4130 4131 // is the element open somewhere up the chain? 4132 foreach(i, parent; parentChain) 4133 if(parent == n.payload) { 4134 recentAutoClosedTags ~= tagName; 4135 // just rotating it so we don't inadvertently break stuff with vile crap 4136 if(recentAutoClosedTags.length > 4) 4137 recentAutoClosedTags = recentAutoClosedTags[1 .. $]; 4138 4139 n.element = e; 4140 return n; 4141 } 4142 4143 // if not, this is a text node; we can't fix it up... 4144 4145 // If it's already in the tree somewhere, assume it is closed by algorithm 4146 // and we shouldn't output it - odds are the user just flipped a couple tags 4147 foreach(ele; e.tree) { 4148 if(ele.tagName == n.payload) { 4149 found = true; 4150 break; 4151 } 4152 } 4153 4154 foreach(ele; recentAutoClosedTags) { 4155 if(ele == n.payload) { 4156 found = true; 4157 break; 4158 } 4159 } 4160 4161 if(!found) // if not found in the tree though, it's probably just text 4162 e.appendChild(TextNode.fromUndecodedString(this, "</"~n.payload~">")); 4163 } 4164 } else { 4165 if(n.element) { 4166 if(!strict) 4167 considerHtmlParagraphHack(n.element); 4168 e.appendChild(n.element); 4169 } 4170 } 4171 4172 if(n.payload == tagName) // in strict mode, this is always true 4173 closed = true; 4174 } else { /*throw new Exception("wtf " ~ tagName);*/ } 4175 } 4176 //writef("</%s>\n", tagName); 4177 return Ele(0, e, null); 4178 } 4179 4180 // if a tag was opened but not closed by end of file, we can arrive here 4181 if(!strict && pos >= data.length) 4182 return addTag(false); 4183 //else if(strict) assert(0); // should be caught before 4184 4185 switch(data[pos]) { 4186 default: assert(0); 4187 case '/': // self closing tag 4188 return addTag(true); 4189 case '>': 4190 return addTag(false); 4191 case ' ': 4192 case '\t': 4193 case '\n': 4194 // there might be attributes... 4195 moreAttributes: 4196 eatWhitespace(); 4197 4198 // same deal as above the switch.... 4199 if(!strict && pos >= data.length) 4200 return addTag(false); 4201 4202 if(strict && pos >= data.length) 4203 throw new MarkupException("tag open, didn't find > before end of file"); 4204 4205 switch(data[pos]) { 4206 case '/': // self closing tag 4207 return addTag(true); 4208 case '>': // closed tag; open -- we now read the contents 4209 return addTag(false); 4210 default: // it is an attribute 4211 string attrName = readAttributeName(); 4212 string attrValue = attrName; 4213 if(pos >= data.length) { 4214 if(strict) 4215 assert(0, "this should have thrown in readAttributeName"); 4216 else { 4217 data ~= ">"; 4218 goto blankValue; 4219 } 4220 } 4221 if(data[pos] == '=') { 4222 pos++; 4223 attrValue = readAttributeValue(); 4224 } 4225 4226 blankValue: 4227 4228 if(strict && attrName in attributes) 4229 throw new MarkupException("Repeated attribute: " ~ attrName); 4230 4231 if(attrName.strip().length) 4232 attributes[attrName] = attrValue; 4233 else if(strict) throw new MarkupException("wtf, zero length attribute name"); 4234 4235 if(!strict && pos < data.length && data[pos] == '<') { 4236 // this is the broken tag that doesn't have a > at the end 4237 data = data[0 .. pos] ~ ">" ~ data[pos.. $]; 4238 // let's insert one as a hack 4239 goto case '>'; 4240 } 4241 4242 goto moreAttributes; 4243 } 4244 } 4245 } 4246 4247 return Ele(2, null, null); // this is a <! or <? thing that got ignored prolly. 4248 //assert(0); 4249 } 4250 4251 eatWhitespace(); 4252 Ele r; 4253 do { 4254 r = readElement(); // there SHOULD only be one element... 4255 4256 if(r.type == 3 && r.element !is null) 4257 piecesBeforeRoot ~= r.element; 4258 4259 if(r.type == 4) 4260 break; // the document is completely empty... 4261 } while (r.type != 0 || r.element.nodeType != 1); // we look past the xml prologue and doctype; root only begins on a regular node 4262 4263 root = r.element; 4264 4265 if(!strict) // in strict mode, we'll just ignore stuff after the xml 4266 while(r.type != 4) { 4267 r = readElement(); 4268 if(r.type != 4 && r.type != 2) { // if not empty and not ignored 4269 if(r.element !is null) 4270 piecesAfterRoot ~= r.element; 4271 } 4272 } 4273 4274 if(root is null) 4275 { 4276 if(strict) 4277 assert(0, "empty document should be impossible in strict mode"); 4278 else 4279 parse(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do 4280 } 4281 4282 if(paragraphHackfixRequired) { 4283 assert(!strict); // this should never happen in strict mode; it ought to never set the hack flag... 4284 4285 // in loose mode, we can see some "bad" nesting (it's valid html, but poorly formed xml). 4286 // It's hard to handle above though because my code sucks. So, we'll fix it here. 4287 4288 auto iterator = root.tree; 4289 foreach(ele; iterator) { 4290 if(ele.parentNode is null) 4291 continue; 4292 4293 if(ele.tagName == "p" && ele.parentNode.tagName == ele.tagName) { 4294 auto shouldBePreviousSibling = ele.parentNode; 4295 auto holder = shouldBePreviousSibling.parentNode; // this is the two element's mutual holder... 4296 holder.insertAfter(shouldBePreviousSibling, ele.removeFromTree()); 4297 iterator.currentKilled(); // the current branch can be skipped; we'll hit it soon anyway since it's now next up. 4298 } 4299 } 4300 } 4301 } 4302 4303 /* end massive parse function */ 4304 4305 /// Gets the <title> element's innerText, if one exists 4306 @property string title() { 4307 bool doesItMatch(Element e) { 4308 return (e.tagName == "title"); 4309 } 4310 4311 auto e = findFirst(&doesItMatch); 4312 if(e) 4313 return e.innerText(); 4314 return ""; 4315 } 4316 4317 /// Sets the title of the page, creating a <title> element if needed. 4318 @property void title(string t) { 4319 bool doesItMatch(Element e) { 4320 return (e.tagName == "title"); 4321 } 4322 4323 auto e = findFirst(&doesItMatch); 4324 4325 if(!e) { 4326 e = createElement("title"); 4327 auto heads = getElementsByTagName("head"); 4328 if(heads.length) 4329 heads[0].appendChild(e); 4330 } 4331 4332 if(e) 4333 e.innerText = t; 4334 } 4335 4336 // FIXME: would it work to alias root this; ???? might be a good idea 4337 /// These functions all forward to the root element. See the documentation in the Element class. 4338 Element getElementById(string id) { 4339 return root.getElementById(id); 4340 } 4341 4342 /// ditto 4343 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 4344 if( is(SomeElementType : Element)) 4345 out(ret) { assert(ret !is null); } 4346 body { 4347 return root.requireElementById!(SomeElementType)(id, file, line); 4348 } 4349 4350 /// ditto 4351 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 4352 if( is(SomeElementType : Element)) 4353 out(ret) { assert(ret !is null); } 4354 body { 4355 return root.requireSelector!(SomeElementType)(selector, file, line); 4356 } 4357 4358 4359 /// ditto 4360 Element querySelector(string selector) { 4361 return root.querySelector(selector); 4362 } 4363 4364 /// ditto 4365 Element[] querySelectorAll(string selector) { 4366 return root.querySelectorAll(selector); 4367 } 4368 4369 /// ditto 4370 Element[] getElementsBySelector(string selector) { 4371 return root.getElementsBySelector(selector); 4372 } 4373 4374 /// ditto 4375 Element[] getElementsByTagName(string tag) { 4376 return root.getElementsByTagName(tag); 4377 } 4378 4379 /** FIXME: btw, this could just be a lazy range...... */ 4380 Element getFirstElementByTagName(string tag) { 4381 if(loose) 4382 tag = tag.toLower(); 4383 bool doesItMatch(Element e) { 4384 return e.tagName == tag; 4385 } 4386 return findFirst(&doesItMatch); 4387 } 4388 4389 /// This returns the <body> element, if there is one. (It different than Javascript, where it is called 'body', because body is a keyword in D.) 4390 Element mainBody() { 4391 return getFirstElementByTagName("body"); 4392 } 4393 4394 /// this uses a weird thing... it's [name=] if no colon and 4395 /// [property=] if colon 4396 string getMeta(string name) { 4397 string thing = name.indexOf(":") == -1 ? "name" : "property"; 4398 auto e = querySelector("head meta["~thing~"="~name~"]"); 4399 if(e is null) 4400 return null; 4401 return e.content; 4402 } 4403 4404 /// Sets a meta tag in the document header. It is kinda hacky to work easily for both Facebook open graph and traditional html meta tags/ 4405 void setMeta(string name, string value) { 4406 string thing = name.indexOf(":") == -1 ? "name" : "property"; 4407 auto e = querySelector("head meta["~thing~"="~name~"]"); 4408 if(e is null) { 4409 e = requireSelector("head").addChild("meta"); 4410 e.setAttribute(thing, name); 4411 } 4412 4413 e.content = value; 4414 } 4415 4416 ///. 4417 Form[] forms() { 4418 return cast(Form[]) getElementsByTagName("form"); 4419 } 4420 4421 ///. 4422 Form createForm() 4423 out(ret) { 4424 assert(ret !is null); 4425 } 4426 body { 4427 return cast(Form) createElement("form"); 4428 } 4429 4430 ///. 4431 Element createElement(string name) { 4432 if(loose) 4433 name = name.toLower(); 4434 4435 auto e = Element.make(name); 4436 e.parentDocument = this; 4437 4438 return e; 4439 4440 // return new Element(this, name, null, selfClosed); 4441 } 4442 4443 ///. 4444 Element createFragment() { 4445 return new DocumentFragment(this); 4446 } 4447 4448 ///. 4449 Element createTextNode(string content) { 4450 return new TextNode(this, content); 4451 } 4452 4453 4454 ///. 4455 Element findFirst(bool delegate(Element) doesItMatch) { 4456 Element result; 4457 4458 bool goThroughElement(Element e) { 4459 if(doesItMatch(e)) { 4460 result = e; 4461 return true; 4462 } 4463 4464 foreach(child; e.children) { 4465 if(goThroughElement(child)) 4466 return true; 4467 } 4468 4469 return false; 4470 } 4471 4472 goThroughElement(root); 4473 4474 return result; 4475 } 4476 4477 ///. 4478 void clear() { 4479 root = null; 4480 loose = false; 4481 } 4482 4483 ///. 4484 void setProlog(string d) { 4485 _prolog = d; 4486 prologWasSet = true; 4487 } 4488 4489 ///. 4490 private string _prolog = "<!DOCTYPE html>\n"; 4491 private bool prologWasSet = false; // set to true if the user changed it 4492 4493 @property string prolog() const { 4494 // if the user explicitly changed it, do what they want 4495 // or if we didn't keep/find stuff from the document itself, 4496 // we'll use the builtin one as a default. 4497 if(prologWasSet || piecesBeforeRoot.length == 0) 4498 return _prolog; 4499 4500 string p; 4501 foreach(e; piecesBeforeRoot) 4502 p ~= e.toString() ~ "\n"; 4503 return p; 4504 } 4505 4506 ///. 4507 override string toString() const { 4508 return prolog ~ root.toString(); 4509 } 4510 4511 ///. 4512 Element root; 4513 4514 /// if these were kept, this is stuff that appeared before the root element, such as <?xml version ?> decls and <!DOCTYPE>s 4515 Element[] piecesBeforeRoot; 4516 4517 /// stuff after the root, only stored in non-strict mode and not used in toString, but available in case you want it 4518 Element[] piecesAfterRoot; 4519 4520 ///. 4521 bool loose; 4522 4523 4524 4525 // what follows are for mutation events that you can observe 4526 void delegate(DomMutationEvent)[] eventObservers; 4527 4528 void dispatchMutationEvent(DomMutationEvent e) { 4529 foreach(o; eventObservers) 4530 o(e); 4531 } 4532 } 4533 4534 4535 // FIXME: since Document loosens the input requirements, it should probably be the sub class... 4536 /// Specializes Document for handling generic XML. (always uses strict mode, uses xml mime type and file header) 4537 class XmlDocument : Document { 4538 this(string data) { 4539 contentType = "text/xml; charset=utf-8"; 4540 _prolog = `<?xml version="1.0" encoding="UTF-8"?>` ~ "\n"; 4541 4542 parse(data, true, true); 4543 } 4544 } 4545 4546 4547 4548 // for the observers 4549 enum DomMutationOperations { 4550 setAttribute, 4551 removeAttribute, 4552 appendChild, // tagname, attributes[], innerHTML 4553 insertBefore, 4554 truncateChildren, 4555 removeChild, 4556 appendHtml, 4557 replaceHtml, 4558 appendText, 4559 replaceText, 4560 replaceTextOnly 4561 } 4562 4563 // and for observers too 4564 struct DomMutationEvent { 4565 DomMutationOperations operation; 4566 Element target; 4567 Element related; // what this means differs with the operation 4568 Element related2; 4569 string relatedString; 4570 string relatedString2; 4571 } 4572 4573 4574 private enum static string[] selfClosedElements = [ 4575 // html 4 4576 "img", "hr", "input", "br", "col", "link", "meta", 4577 // html 5 4578 "source" ]; 4579 4580 static import std.conv; 4581 4582 ///. 4583 int intFromHex(string hex) { 4584 int place = 1; 4585 int value = 0; 4586 for(sizediff_t a = hex.length - 1; a >= 0; a--) { 4587 int v; 4588 char q = hex[a]; 4589 if( q >= '0' && q <= '9') 4590 v = q - '0'; 4591 else if (q >= 'a' && q <= 'f') 4592 v = q - 'a' + 10; 4593 else throw new Exception("Illegal hex character: " ~ q); 4594 4595 value += v * place; 4596 4597 place *= 16; 4598 } 4599 4600 return value; 4601 } 4602 4603 4604 // CSS selector handling 4605 4606 // EXTENSIONS 4607 // dd - dt means get the dt directly before that dd (opposite of +) NOT IMPLEMENTED 4608 // dd -- dt means rewind siblings until you hit a dt, go as far as you need to NOT IMPLEMENTED 4609 // dt < dl means get the parent of that dt iff it is a dl (usable for "get a dt that are direct children of dl") 4610 // dt << dl means go as far up as needed to find a dl (you have an element and want its containers) NOT IMPLEMENTED 4611 // :first means to stop at the first hit, don't do more (so p + p == p ~ p:first 4612 4613 4614 4615 // CSS4 draft currently says you can change the subject (the element actually returned) by putting a ! at the end of it. 4616 // That might be useful to implement, though I do have parent selectors too. 4617 4618 ///. 4619 static immutable string[] selectorTokens = [ 4620 // It is important that the 2 character possibilities go first here for accurate lexing 4621 "~=", "*=", "|=", "^=", "$=", "!=", // "::" should be there too for full standard 4622 "<<", // my any-parent extension (reciprocal of whitespace) 4623 " - ", // previous-sibling extension (whitespace required to disambiguate tag-names) 4624 ".", ">", "+", "*", ":", "[", "]", "=", "\"", "#", ",", " ", "~", "<" 4625 ]; // other is white space or a name. 4626 4627 ///. 4628 sizediff_t idToken(string str, sizediff_t position) { 4629 sizediff_t tid = -1; 4630 char c = str[position]; 4631 foreach(a, token; selectorTokens) 4632 4633 if(c == token[0]) { 4634 if(token.length > 1) { 4635 if(position + 1 >= str.length || str[position+1] != token[1]) 4636 continue; // not this token 4637 } 4638 tid = a; 4639 break; 4640 } 4641 return tid; 4642 } 4643 4644 ///. 4645 string[] lexSelector(string selector) { 4646 4647 // FIXME: it doesn't support quoted attributes 4648 // FIXME: it doesn't support backslash escaped characters 4649 // FIXME: it should ignore /* comments */ 4650 string[] tokens; 4651 sizediff_t start = -1; 4652 bool skip = false; 4653 // get rid of useless, non-syntax whitespace 4654 4655 selector = selector.strip(); 4656 selector = selector.replace("\n", " "); // FIXME hack 4657 4658 selector = selector.replace(" >", ">"); 4659 selector = selector.replace("> ", ">"); 4660 selector = selector.replace(" +", "+"); 4661 selector = selector.replace("+ ", "+"); 4662 selector = selector.replace(" ~", "~"); 4663 selector = selector.replace("~ ", "~"); 4664 selector = selector.replace(" <", "<"); 4665 selector = selector.replace("< ", "<"); 4666 // FIXME: this is ugly ^^^^^. It should just ignore that whitespace somewhere else. 4667 4668 // FIXME: another ugly hack. maybe i should just give in and do this the right way...... 4669 string fixupEscaping(string input) { 4670 auto lol = input.replace("\\", "\u00ff"); 4671 lol = lol.replace("\u00ff\u00ff", "\\"); 4672 return lol.replace("\u00ff", ""); 4673 } 4674 4675 bool escaping = false; 4676 foreach(i, c; selector) { // kill useless leading/trailing whitespace too 4677 if(skip) { 4678 skip = false; 4679 continue; 4680 } 4681 4682 sizediff_t tid = -1; 4683 4684 if(escaping) 4685 escaping = false; 4686 else if(c == '\\') 4687 escaping = true; 4688 else 4689 tid = idToken(selector, i); 4690 4691 if(tid == -1) { 4692 if(start == -1) 4693 start = i; 4694 } else { 4695 if(start != -1) { 4696 tokens ~= fixupEscaping(selector[start..i]); 4697 start = -1; 4698 } 4699 tokens ~= selectorTokens[tid]; 4700 } 4701 4702 if (tid != -1 && selectorTokens[tid].length == 2) 4703 skip = true; 4704 } 4705 if(start != -1) 4706 tokens ~= fixupEscaping(selector[start..$]); 4707 4708 return tokens; 4709 } 4710 4711 ///. 4712 struct SelectorPart { 4713 string tagNameFilter; ///. 4714 string[] attributesPresent; /// [attr] 4715 string[2][] attributesEqual; /// [attr=value] 4716 string[2][] attributesStartsWith; /// [attr^=value] 4717 string[2][] attributesEndsWith; /// [attr$=value] 4718 // split it on space, then match to these 4719 string[2][] attributesIncludesSeparatedBySpaces; /// [attr~=value] 4720 // split it on dash, then match to these 4721 string[2][] attributesIncludesSeparatedByDashes; /// [attr|=value] 4722 string[2][] attributesInclude; /// [attr*=value] 4723 string[2][] attributesNotEqual; /// [attr!=value] -- extension by me 4724 4725 bool firstChild; ///. 4726 bool lastChild; ///. 4727 4728 bool emptyElement; ///. 4729 bool oddChild; ///. 4730 bool evenChild; ///. 4731 4732 bool rootElement; ///. 4733 4734 int separation = -1; /// -1 == only itself; the null selector, 0 == tree, 1 == childNodes, 2 == childAfter, 3 == youngerSibling, 4 == parentOf 4735 4736 ///. 4737 string toString() { 4738 string ret; 4739 switch(separation) { 4740 default: assert(0); 4741 case -1: break; 4742 case 0: ret ~= " "; break; 4743 case 1: ret ~= ">"; break; 4744 case 2: ret ~= "+"; break; 4745 case 3: ret ~= "~"; break; 4746 case 4: ret ~= "<"; break; 4747 } 4748 ret ~= tagNameFilter; 4749 foreach(a; attributesPresent) ret ~= "[" ~ a ~ "]"; 4750 foreach(a; attributesEqual) ret ~= "[" ~ a[0] ~ "=" ~ a[1] ~ "]"; 4751 foreach(a; attributesEndsWith) ret ~= "[" ~ a[0] ~ "$=" ~ a[1] ~ "]"; 4752 foreach(a; attributesStartsWith) ret ~= "[" ~ a[0] ~ "^=" ~ a[1] ~ "]"; 4753 foreach(a; attributesNotEqual) ret ~= "[" ~ a[0] ~ "!=" ~ a[1] ~ "]"; 4754 foreach(a; attributesInclude) ret ~= "[" ~ a[0] ~ "*=" ~ a[1] ~ "]"; 4755 foreach(a; attributesIncludesSeparatedByDashes) ret ~= "[" ~ a[0] ~ "|=" ~ a[1] ~ "]"; 4756 foreach(a; attributesIncludesSeparatedBySpaces) ret ~= "[" ~ a[0] ~ "~=" ~ a[1] ~ "]"; 4757 4758 if(firstChild) ret ~= ":first-child"; 4759 if(lastChild) ret ~= ":last-child"; 4760 if(emptyElement) ret ~= ":empty"; 4761 if(oddChild) ret ~= ":odd-child"; 4762 if(evenChild) ret ~= ":even-child"; 4763 if(rootElement) ret ~= ":root"; 4764 4765 return ret; 4766 } 4767 4768 // USEFUL 4769 ///. 4770 bool matchElement(Element e) { 4771 // FIXME: this can be called a lot of times, and really add up in times according to the profiler. 4772 // Each individual call is reasonably fast already, but it adds up. 4773 if(e is null) return false; 4774 if(e.nodeType != 1) return false; 4775 4776 if(tagNameFilter != "" && tagNameFilter != "*") 4777 if(e.tagName != tagNameFilter) 4778 return false; 4779 if(firstChild) { 4780 if(e.parentNode is null) 4781 return false; 4782 if(e.parentNode.childElements[0] !is e) 4783 return false; 4784 } 4785 if(lastChild) { 4786 if(e.parentNode is null) 4787 return false; 4788 auto ce = e.parentNode.childElements; 4789 if(ce[$-1] !is e) 4790 return false; 4791 } 4792 if(emptyElement) { 4793 if(e.children.length) 4794 return false; 4795 } 4796 if(rootElement) { 4797 if(e.parentNode !is null) 4798 return false; 4799 } 4800 if(oddChild || evenChild) { 4801 if(e.parentNode is null) 4802 return false; 4803 foreach(i, child; e.parentNode.childElements) { 4804 if(child is e) { 4805 if(oddChild && !(i&1)) 4806 return false; 4807 if(evenChild && (i&1)) 4808 return false; 4809 break; 4810 } 4811 } 4812 } 4813 4814 bool matchWithSeparator(string attr, string value, string separator) { 4815 foreach(s; attr.split(separator)) 4816 if(s == value) 4817 return true; 4818 return false; 4819 } 4820 4821 foreach(a; attributesPresent) 4822 if(a !in e.attributes) 4823 return false; 4824 foreach(a; attributesEqual) 4825 if(a[0] !in e.attributes || e.attributes[a[0]] != a[1]) 4826 return false; 4827 foreach(a; attributesNotEqual) 4828 // FIXME: maybe it should say null counts... this just bit me. 4829 // I did [attr][attr!=value] to work around. 4830 // 4831 // if it's null, it's not equal, right? 4832 //if(a[0] !in e.attributes || e.attributes[a[0]] == a[1]) 4833 if(e.getAttribute(a[0]) == a[1]) 4834 return false; 4835 foreach(a; attributesInclude) 4836 if(a[0] !in e.attributes || (e.attributes[a[0]].indexOf(a[1]) == -1)) 4837 return false; 4838 foreach(a; attributesStartsWith) 4839 if(a[0] !in e.attributes || !e.attributes[a[0]].startsWith(a[1])) 4840 return false; 4841 foreach(a; attributesEndsWith) 4842 if(a[0] !in e.attributes || !e.attributes[a[0]].endsWith(a[1])) 4843 return false; 4844 foreach(a; attributesIncludesSeparatedBySpaces) 4845 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], " ")) 4846 return false; 4847 foreach(a; attributesIncludesSeparatedByDashes) 4848 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], "-")) 4849 return false; 4850 4851 return true; 4852 } 4853 } 4854 4855 // USEFUL 4856 ///. 4857 Element[] getElementsBySelectorParts(Element start, SelectorPart[] parts) { 4858 Element[] ret; 4859 if(!parts.length) { 4860 return [start]; // the null selector only matches the start point; it 4861 // is what terminates the recursion 4862 } 4863 4864 auto part = parts[0]; 4865 switch(part.separation) { 4866 default: assert(0); 4867 case -1: 4868 case 0: // tree 4869 foreach(e; start.tree) { 4870 if(part.separation == 0 && start is e) 4871 continue; // space doesn't match itself! 4872 if(part.matchElement(e)) { 4873 ret ~= getElementsBySelectorParts(e, parts[1..$]); 4874 } 4875 } 4876 break; 4877 case 1: // children 4878 foreach(e; start.childNodes) { 4879 if(part.matchElement(e)) { 4880 ret ~= getElementsBySelectorParts(e, parts[1..$]); 4881 } 4882 } 4883 break; 4884 case 2: // next-sibling 4885 auto tmp = start.parentNode; 4886 if(tmp !is null) { 4887 sizediff_t pos = -1; 4888 auto children = tmp.childElements; 4889 foreach(i, child; children) { 4890 if(child is start) { 4891 pos = i; 4892 break; 4893 } 4894 } 4895 assert(pos != -1); 4896 if(pos + 1 < children.length) { 4897 auto e = children[pos+1]; 4898 if(part.matchElement(e)) 4899 ret ~= getElementsBySelectorParts(e, parts[1..$]); 4900 } 4901 } 4902 break; 4903 case 3: // younger sibling 4904 auto tmp = start.parentNode; 4905 if(tmp !is null) { 4906 sizediff_t pos = -1; 4907 auto children = tmp.childElements; 4908 foreach(i, child; children) { 4909 if(child is start) { 4910 pos = i; 4911 break; 4912 } 4913 } 4914 assert(pos != -1); 4915 foreach(e; children[pos+1..$]) { 4916 if(part.matchElement(e)) 4917 ret ~= getElementsBySelectorParts(e, parts[1..$]); 4918 } 4919 } 4920 break; 4921 case 4: // immediate parent node, an extension of mine to walk back up the tree 4922 auto e = start.parentNode; 4923 if(part.matchElement(e)) { 4924 ret ~= getElementsBySelectorParts(e, parts[1..$]); 4925 } 4926 /* 4927 Example of usefulness: 4928 4929 Consider you have an HTML table. If you want to get all rows that have a th, you can do: 4930 4931 table th < tr 4932 4933 Get all th descendants of the table, then walk back up the tree to fetch their parent tr nodes 4934 */ 4935 break; 4936 case 5: // any parent note, another extension of mine to go up the tree (backward of the whitespace operator) 4937 /* 4938 Like with the < operator, this is best used to find some parent of a particular known element. 4939 4940 Say you have an anchor inside a 4941 */ 4942 } 4943 4944 return ret; 4945 } 4946 4947 ///. 4948 struct Selector { 4949 ///. 4950 SelectorPart[] parts; 4951 4952 ///. 4953 string toString() { 4954 string ret; 4955 foreach(part; parts) 4956 ret ~= part.toString(); 4957 return ret; 4958 } 4959 4960 // USEFUL 4961 ///. 4962 Element[] getElements(Element start) { 4963 return removeDuplicates(getElementsBySelectorParts(start, parts)); 4964 } 4965 4966 // USEFUL (but not implemented) 4967 /// If relativeTo == null, it assumes the root of the parent document. 4968 bool matchElement(Element e, Element relativeTo = null) { 4969 // FIXME 4970 /+ 4971 Element where = e; 4972 foreach(part; retro(parts)) { 4973 if(where is relativeTo) 4974 return false; // at end of line, if we aren't done by now, the match fails 4975 if(!part.matchElement(where)) 4976 return false; // didn't match 4977 4978 if(part.selection == 1) // the > operator 4979 where = where.parentNode; 4980 else if(part.selection == 0) { // generic parent 4981 // need to go up the whole chain 4982 } 4983 } 4984 +/ 4985 return true; // if we got here, it is a success 4986 } 4987 4988 // the string should NOT have commas. Use parseSelectorString for that instead 4989 ///. 4990 static Selector fromString(string selector) { 4991 return parseSelector(lexSelector(selector)); 4992 } 4993 } 4994 4995 ///. 4996 Selector[] parseSelectorString(string selector, bool caseSensitiveTags = true) { 4997 Selector[] ret; 4998 foreach(s; selector.split(",")) { 4999 ret ~= parseSelector(lexSelector(s), caseSensitiveTags); 5000 } 5001 5002 return ret; 5003 } 5004 5005 ///. 5006 Selector parseSelector(string[] tokens, bool caseSensitiveTags = true) { 5007 Selector s; 5008 5009 SelectorPart current; 5010 void commit() { 5011 // might as well skip null items 5012 if(current != current.init) { 5013 s.parts ~= current; 5014 5015 current = current.init; // start right over 5016 } 5017 } 5018 enum State { 5019 Starting, 5020 ReadingClass, 5021 ReadingId, 5022 ReadingAttributeSelector, 5023 ReadingAttributeComparison, 5024 ExpectingAttributeCloser, 5025 ReadingPseudoClass, 5026 ReadingAttributeValue 5027 } 5028 State state = State.Starting; 5029 string attributeName, attributeValue, attributeComparison; 5030 foreach(token; tokens) { 5031 sizediff_t tid = -1; 5032 foreach(i, item; selectorTokens) 5033 if(token == item) { 5034 tid = i; 5035 break; 5036 } 5037 final switch(state) { 5038 case State.Starting: // fresh, might be reading an operator or a tagname 5039 if(tid == -1) { 5040 if(!caseSensitiveTags) 5041 token = token.toLower(); 5042 current.tagNameFilter = token; 5043 } else { 5044 // Selector operators 5045 switch(token) { 5046 case "*": 5047 current.tagNameFilter = "*"; 5048 break; 5049 case " ": 5050 commit(); 5051 current.separation = 0; // tree 5052 break; 5053 case ">": 5054 commit(); 5055 current.separation = 1; // child 5056 break; 5057 case "+": 5058 commit(); 5059 current.separation = 2; // sibling directly after 5060 break; 5061 case "~": 5062 commit(); 5063 current.separation = 3; // any sibling after 5064 break; 5065 case "<": 5066 commit(); 5067 current.separation = 4; // immediate parent of 5068 break; 5069 case "[": 5070 state = State.ReadingAttributeSelector; 5071 break; 5072 case ".": 5073 state = State.ReadingClass; 5074 break; 5075 case "#": 5076 state = State.ReadingId; 5077 break; 5078 case ":": 5079 state = State.ReadingPseudoClass; 5080 break; 5081 5082 default: 5083 assert(0, token); 5084 } 5085 } 5086 break; 5087 case State.ReadingClass: 5088 current.attributesIncludesSeparatedBySpaces ~= ["class", token]; 5089 state = State.Starting; 5090 break; 5091 case State.ReadingId: 5092 current.attributesEqual ~= ["id", token]; 5093 state = State.Starting; 5094 break; 5095 case State.ReadingPseudoClass: 5096 switch(token) { 5097 case "first-child": 5098 current.firstChild = true; 5099 break; 5100 case "last-child": 5101 current.lastChild = true; 5102 break; 5103 case "only-child": 5104 current.firstChild = true; 5105 current.lastChild = true; 5106 break; 5107 case "empty": 5108 // one with no children 5109 current.emptyElement = true; 5110 break; 5111 case "link": 5112 current.attributesPresent ~= "href"; 5113 break; 5114 case "root": 5115 current.rootElement = true; 5116 break; 5117 // FIXME: add :not() 5118 // My extensions 5119 case "odd-child": 5120 current.oddChild = true; 5121 break; 5122 case "even-child": 5123 current.evenChild = true; 5124 break; 5125 5126 case "visited", "active", "hover", "target", "focus", "checked", "selected": 5127 current.attributesPresent ~= "nothing"; 5128 // FIXME 5129 /* 5130 // defined in the standard, but I don't implement it 5131 case "not": 5132 */ 5133 /+ 5134 // extensions not implemented 5135 //case "text": // takes the text in the element and wraps it in an element, returning it 5136 +/ 5137 goto case; 5138 case "before", "after": 5139 current.attributesPresent ~= "FIXME"; 5140 5141 break; 5142 default: 5143 //if(token.indexOf("lang") == -1) 5144 //assert(0, token); 5145 break; 5146 } 5147 state = State.Starting; 5148 break; 5149 case State.ReadingAttributeSelector: 5150 attributeName = token; 5151 attributeComparison = null; 5152 attributeValue = null; 5153 state = State.ReadingAttributeComparison; 5154 break; 5155 case State.ReadingAttributeComparison: 5156 // FIXME: these things really should be quotable in the proper lexer... 5157 if(token != "]") { 5158 if(token.indexOf("=") == -1) { 5159 // not a comparison; consider it 5160 // part of the attribute 5161 attributeValue ~= token; 5162 } else { 5163 attributeComparison = token; 5164 state = State.ReadingAttributeValue; 5165 } 5166 break; 5167 } 5168 goto case; 5169 case State.ExpectingAttributeCloser: 5170 if(token != "]") { 5171 // not the closer; consider it part of comparison 5172 if(attributeComparison == "") 5173 attributeName ~= token; 5174 else 5175 attributeValue ~= token; 5176 break; 5177 } 5178 5179 // FIXME: HACK this chops off quotes from the outside for the comparison 5180 // for compatibility with real CSS. The lexer should be properly fixed, though. 5181 // FIXME: when the lexer is fixed, remove this lest you break it moar. 5182 if(attributeValue.length > 2 && attributeValue[0] == '"' && attributeValue[$-1] == '"') 5183 attributeValue = attributeValue[1 .. $-1]; 5184 5185 // Selector operators 5186 switch(attributeComparison) { 5187 default: assert(0); 5188 case "": 5189 current.attributesPresent ~= attributeName; 5190 break; 5191 case "=": 5192 current.attributesEqual ~= [attributeName, attributeValue]; 5193 break; 5194 case "|=": 5195 current.attributesIncludesSeparatedByDashes ~= [attributeName, attributeValue]; 5196 break; 5197 case "~=": 5198 current.attributesIncludesSeparatedBySpaces ~= [attributeName, attributeValue]; 5199 break; 5200 case "$=": 5201 current.attributesEndsWith ~= [attributeName, attributeValue]; 5202 break; 5203 case "^=": 5204 current.attributesStartsWith ~= [attributeName, attributeValue]; 5205 break; 5206 case "*=": 5207 current.attributesInclude ~= [attributeName, attributeValue]; 5208 break; 5209 case "!=": 5210 current.attributesNotEqual ~= [attributeName, attributeValue]; 5211 break; 5212 } 5213 5214 state = State.Starting; 5215 break; 5216 case State.ReadingAttributeValue: 5217 attributeValue = token; 5218 state = State.ExpectingAttributeCloser; 5219 break; 5220 } 5221 } 5222 5223 commit(); 5224 5225 return s; 5226 } 5227 5228 ///. 5229 Element[] removeDuplicates(Element[] input) { 5230 Element[] ret; 5231 5232 bool[Element] already; 5233 foreach(e; input) { 5234 if(e in already) continue; 5235 already[e] = true; 5236 ret ~= e; 5237 } 5238 5239 return ret; 5240 } 5241 5242 // done with CSS selector handling 5243 5244 5245 // FIXME: use the better parser from html.d 5246 /// This is probably not useful to you unless you're writing a browser or something like that. 5247 /// It represents a *computed* style, like what the browser gives you after applying stylesheets, inline styles, and html attributes. 5248 /// From here, you can start to make a layout engine for the box model and have a css aware browser. 5249 class CssStyle { 5250 ///. 5251 this(string rule, string content) { 5252 rule = rule.strip(); 5253 content = content.strip(); 5254 5255 if(content.length == 0) 5256 return; 5257 5258 originatingRule = rule; 5259 originatingSpecificity = getSpecificityOfRule(rule); // FIXME: if there's commas, this won't actually work! 5260 5261 foreach(part; content.split(";")) { 5262 part = part.strip(); 5263 if(part.length == 0) 5264 continue; 5265 auto idx = part.indexOf(":"); 5266 if(idx == -1) 5267 continue; 5268 //throw new Exception("Bad css rule (no colon): " ~ part); 5269 5270 Property p; 5271 5272 p.name = part[0 .. idx].strip(); 5273 p.value = part[idx + 1 .. $].replace("! important", "!important").replace("!important", "").strip(); // FIXME don't drop important 5274 p.givenExplicitly = true; 5275 p.specificity = originatingSpecificity; 5276 5277 properties ~= p; 5278 } 5279 5280 foreach(property; properties) 5281 expandShortForm(property, originatingSpecificity); 5282 } 5283 5284 ///. 5285 Specificity getSpecificityOfRule(string rule) { 5286 Specificity s; 5287 if(rule.length == 0) { // inline 5288 // s.important = 2; 5289 } else { 5290 // FIXME 5291 } 5292 5293 return s; 5294 } 5295 5296 string originatingRule; ///. 5297 Specificity originatingSpecificity; ///. 5298 5299 ///. 5300 union Specificity { 5301 uint score; ///. 5302 // version(little_endian) 5303 ///. 5304 struct { 5305 ubyte tags; ///. 5306 ubyte classes; ///. 5307 ubyte ids; ///. 5308 ubyte important; /// 0 = none, 1 = stylesheet author, 2 = inline style, 3 = user important 5309 } 5310 } 5311 5312 ///. 5313 struct Property { 5314 bool givenExplicitly; /// this is false if for example the user said "padding" and this is "padding-left" 5315 string name; ///. 5316 string value; ///. 5317 Specificity specificity; ///. 5318 // do we care about the original source rule? 5319 } 5320 5321 ///. 5322 Property[] properties; 5323 5324 ///. 5325 string opDispatch(string nameGiven)(string value = null) if(nameGiven != "popFront") { 5326 string name = unCamelCase(nameGiven); 5327 if(value is null) 5328 return getValue(name); 5329 else 5330 return setValue(name, value, 0x02000000 /* inline specificity */); 5331 } 5332 5333 /// takes dash style name 5334 string getValue(string name) { 5335 foreach(property; properties) 5336 if(property.name == name) 5337 return property.value; 5338 return null; 5339 } 5340 5341 /// takes dash style name 5342 string setValue(string name, string value, Specificity newSpecificity, bool explicit = true) { 5343 value = value.replace("! important", "!important"); 5344 if(value.indexOf("!important") != -1) { 5345 newSpecificity.important = 1; // FIXME 5346 value = value.replace("!important", "").strip(); 5347 } 5348 5349 foreach(ref property; properties) 5350 if(property.name == name) { 5351 if(newSpecificity.score >= property.specificity.score) { 5352 property.givenExplicitly = explicit; 5353 expandShortForm(property, newSpecificity); 5354 return (property.value = value); 5355 } else { 5356 if(name == "display") 5357 {}//writeln("Not setting ", name, " to ", value, " because ", newSpecificity.score, " < ", property.specificity.score); 5358 return value; // do nothing - the specificity is too low 5359 } 5360 } 5361 5362 // it's not here... 5363 5364 Property p; 5365 p.givenExplicitly = true; 5366 p.name = name; 5367 p.value = value; 5368 p.specificity = originatingSpecificity; 5369 5370 properties ~= p; 5371 expandShortForm(p, originatingSpecificity); 5372 5373 return value; 5374 } 5375 5376 private void expandQuadShort(string name, string value, Specificity specificity) { 5377 auto parts = value.split(" "); 5378 switch(parts.length) { 5379 case 1: 5380 setValue(name ~"-left", parts[0], specificity, false); 5381 setValue(name ~"-right", parts[0], specificity, false); 5382 setValue(name ~"-top", parts[0], specificity, false); 5383 setValue(name ~"-bottom", parts[0], specificity, false); 5384 break; 5385 case 2: 5386 setValue(name ~"-left", parts[1], specificity, false); 5387 setValue(name ~"-right", parts[1], specificity, false); 5388 setValue(name ~"-top", parts[0], specificity, false); 5389 setValue(name ~"-bottom", parts[0], specificity, false); 5390 break; 5391 case 3: 5392 setValue(name ~"-top", parts[0], specificity, false); 5393 setValue(name ~"-right", parts[1], specificity, false); 5394 setValue(name ~"-bottom", parts[2], specificity, false); 5395 setValue(name ~"-left", parts[2], specificity, false); 5396 5397 break; 5398 case 4: 5399 setValue(name ~"-top", parts[0], specificity, false); 5400 setValue(name ~"-right", parts[1], specificity, false); 5401 setValue(name ~"-bottom", parts[2], specificity, false); 5402 setValue(name ~"-left", parts[3], specificity, false); 5403 break; 5404 default: 5405 assert(0, value); 5406 } 5407 } 5408 5409 ///. 5410 void expandShortForm(Property p, Specificity specificity) { 5411 switch(p.name) { 5412 case "margin": 5413 case "padding": 5414 expandQuadShort(p.name, p.value, specificity); 5415 break; 5416 case "border": 5417 case "outline": 5418 setValue(p.name ~ "-left", p.value, specificity, false); 5419 setValue(p.name ~ "-right", p.value, specificity, false); 5420 setValue(p.name ~ "-top", p.value, specificity, false); 5421 setValue(p.name ~ "-bottom", p.value, specificity, false); 5422 break; 5423 5424 case "border-top": 5425 case "border-bottom": 5426 case "border-left": 5427 case "border-right": 5428 case "outline-top": 5429 case "outline-bottom": 5430 case "outline-left": 5431 case "outline-right": 5432 5433 default: {} 5434 } 5435 } 5436 5437 ///. 5438 override string toString() { 5439 string ret; 5440 if(originatingRule.length) 5441 ret = originatingRule ~ " {"; 5442 5443 foreach(property; properties) { 5444 if(!property.givenExplicitly) 5445 continue; // skip the inferred shit 5446 5447 if(originatingRule.length) 5448 ret ~= "\n\t"; 5449 else 5450 ret ~= " "; 5451 5452 ret ~= property.name ~ ": " ~ property.value ~ ";"; 5453 } 5454 5455 if(originatingRule.length) 5456 ret ~= "\n}\n"; 5457 5458 return ret; 5459 } 5460 } 5461 5462 string cssUrl(string url) { 5463 return "url(\"" ~ url ~ "\")"; 5464 } 5465 5466 /// This probably isn't useful, unless you're writing a browser or something like that. 5467 /// You might want to look at arsd.html for css macro, nesting, etc., or just use standard css 5468 /// as text. 5469 /// 5470 /// The idea, however, is to represent a kind of CSS object model, complete with specificity, 5471 /// that you can apply to your documents to build the complete computedStyle object. 5472 class StyleSheet { 5473 ///. 5474 CssStyle[] rules; 5475 5476 ///. 5477 this(string source) { 5478 // FIXME: handle @ rules and probably could improve lexer 5479 // add nesting? 5480 int state; 5481 string currentRule; 5482 string currentValue; 5483 5484 string* currentThing = ¤tRule; 5485 foreach(c; source) { 5486 handle: switch(state) { 5487 default: assert(0); 5488 case 0: // starting - we assume we're reading a rule 5489 switch(c) { 5490 case '@': 5491 state = 4; 5492 break; 5493 case '/': 5494 state = 1; 5495 break; 5496 case '{': 5497 currentThing = ¤tValue; 5498 break; 5499 case '}': 5500 if(currentThing is ¤tValue) { 5501 rules ~= new CssStyle(currentRule, currentValue); 5502 5503 currentRule = ""; 5504 currentValue = ""; 5505 5506 currentThing = ¤tRule; 5507 } else { 5508 // idk what is going on here. 5509 // check sveit.com to reproduce 5510 currentRule = ""; 5511 currentValue = ""; 5512 } 5513 break; 5514 default: 5515 (*currentThing) ~= c; 5516 } 5517 break; 5518 case 1: // expecting * 5519 if(c == '*') 5520 state = 2; 5521 else { 5522 state = 0; 5523 (*currentThing) ~= "/" ~ c; 5524 } 5525 break; 5526 case 2: // inside comment 5527 if(c == '*') 5528 state = 3; 5529 break; 5530 case 3: // expecting / to end comment 5531 if(c == '/') 5532 state = 0; 5533 else 5534 state = 2; // it's just a comment so no need to append 5535 break; 5536 case 4: 5537 if(c == '{') 5538 state = 5; 5539 if(c == ';') 5540 state = 0; // just skipping import 5541 break; 5542 case 5: 5543 if(c == '}') 5544 state = 0; // skipping font face probably 5545 } 5546 } 5547 } 5548 5549 /// Run through the document and apply this stylesheet to it. The computedStyle member will be accurate after this call 5550 void apply(Document document) { 5551 foreach(rule; rules) { 5552 if(rule.originatingRule.length == 0) 5553 continue; // this shouldn't happen here in a stylesheet 5554 foreach(element; document.querySelectorAll(rule.originatingRule)) { 5555 // note: this should be a different object than the inline style 5556 // since givenExplicitly is likely destroyed here 5557 auto current = element.computedStyle; 5558 5559 foreach(item; rule.properties) 5560 current.setValue(item.name, item.value, item.specificity); 5561 } 5562 } 5563 } 5564 } 5565 5566 5567 /// This is kinda private; just a little utility container for use by the ElementStream class. 5568 final class Stack(T) { 5569 this() { 5570 internalLength = 0; 5571 arr = initialBuffer[]; 5572 } 5573 5574 ///. 5575 void push(T t) { 5576 if(internalLength >= arr.length) { 5577 auto oldarr = arr; 5578 if(arr.length < 4096) 5579 arr = new T[arr.length * 2]; 5580 else 5581 arr = new T[arr.length + 4096]; 5582 arr[0 .. oldarr.length] = oldarr[]; 5583 } 5584 5585 arr[internalLength] = t; 5586 internalLength++; 5587 } 5588 5589 ///. 5590 T pop() { 5591 assert(internalLength); 5592 internalLength--; 5593 return arr[internalLength]; 5594 } 5595 5596 ///. 5597 T peek() { 5598 assert(internalLength); 5599 return arr[internalLength - 1]; 5600 } 5601 5602 ///. 5603 @property bool empty() { 5604 return internalLength ? false : true; 5605 } 5606 5607 ///. 5608 private T[] arr; 5609 private size_t internalLength; 5610 private T[64] initialBuffer; 5611 // the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep), 5612 // using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push() 5613 // function thanks to this, and push() was actually one of the slowest individual functions in the code! 5614 } 5615 5616 /// This is the lazy range that walks the tree for you. It tries to go in the lexical order of the source: node, then children from first to last, each recursively. 5617 final class ElementStream { 5618 5619 ///. 5620 @property Element front() { 5621 return current.element; 5622 } 5623 5624 /// Use Element.tree instead. 5625 this(Element start) { 5626 current.element = start; 5627 current.childPosition = -1; 5628 isEmpty = false; 5629 stack = new Stack!(Current); 5630 } 5631 5632 /* 5633 Handle it 5634 handle its children 5635 5636 */ 5637 5638 ///. 5639 void popFront() { 5640 more: 5641 if(isEmpty) return; 5642 5643 // FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times) 5644 5645 current.childPosition++; 5646 if(current.childPosition >= current.element.children.length) { 5647 if(stack.empty()) 5648 isEmpty = true; 5649 else { 5650 current = stack.pop(); 5651 goto more; 5652 } 5653 } else { 5654 stack.push(current); 5655 current.element = current.element.children[current.childPosition]; 5656 current.childPosition = -1; 5657 } 5658 } 5659 5660 /// You should call this when you remove an element from the tree. It then doesn't recurse into that node and adjusts the current position, keeping the range stable. 5661 void currentKilled() { 5662 if(stack.empty) // should never happen 5663 isEmpty = true; 5664 else { 5665 current = stack.pop(); 5666 current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right 5667 } 5668 } 5669 5670 ///. 5671 @property bool empty() { 5672 return isEmpty; 5673 } 5674 5675 private: 5676 5677 struct Current { 5678 Element element; 5679 int childPosition; 5680 } 5681 5682 Current current; 5683 5684 Stack!(Current) stack; 5685 5686 bool isEmpty; 5687 } 5688 5689 5690 5691 // unbelievable. 5692 // Don't use any of these in your own code. Instead, try to use phobos or roll your own, as I might kill these at any time. 5693 sizediff_t indexOfBytes(immutable(ubyte)[] haystack, immutable(ubyte)[] needle) { 5694 auto found = std.algorithm.find(haystack, needle); 5695 if(found.length == 0) 5696 return -1; 5697 return haystack.length - found.length; 5698 } 5699 5700 private T[] insertAfter(T)(T[] arr, int position, T[] what) { 5701 assert(position < arr.length); 5702 T[] ret; 5703 ret.length = arr.length + what.length; 5704 int a = 0; 5705 foreach(i; arr[0..position+1]) 5706 ret[a++] = i; 5707 5708 foreach(i; what) 5709 ret[a++] = i; 5710 5711 foreach(i; arr[position+1..$]) 5712 ret[a++] = i; 5713 5714 return ret; 5715 } 5716 5717 package bool isInArray(T)(T item, T[] arr) { 5718 foreach(i; arr) 5719 if(item == i) 5720 return true; 5721 return false; 5722 } 5723 5724 private string[string] dup(in string[string] arr) { 5725 string[string] ret; 5726 foreach(k, v; arr) 5727 ret[k] = v; 5728 return ret; 5729 } 5730 5731 // dom event support, if you want to use it 5732 5733 /// used for DOM events 5734 alias void delegate(Element handlerAttachedTo, Event event) EventHandler; 5735 5736 /// This is a DOM event, like in javascript. Note that this library never fires events - it is only here for you to use if you want it. 5737 class Event { 5738 this(string eventName, Element target) { 5739 this.eventName = eventName; 5740 this.srcElement = target; 5741 } 5742 5743 /// Prevents the default event handler (if there is one) from being called 5744 void preventDefault() { 5745 defaultPrevented = true; 5746 } 5747 5748 /// Stops the event propagation immediately. 5749 void stopPropagation() { 5750 propagationStopped = true; 5751 } 5752 5753 bool defaultPrevented; 5754 bool propagationStopped; 5755 string eventName; 5756 5757 Element srcElement; 5758 alias srcElement target; 5759 5760 Element relatedTarget; 5761 5762 int clientX; 5763 int clientY; 5764 5765 int button; 5766 5767 bool isBubbling; 5768 5769 /// this sends it only to the target. If you want propagation, use dispatch() instead. 5770 void send() { 5771 if(srcElement is null) 5772 return; 5773 5774 auto e = srcElement; 5775 5776 if(eventName in e.bubblingEventHandlers) 5777 foreach(handler; e.bubblingEventHandlers[eventName]) 5778 handler(e, this); 5779 5780 if(!defaultPrevented) 5781 if(eventName in e.defaultEventHandlers) 5782 e.defaultEventHandlers[eventName](e, this); 5783 } 5784 5785 /// this dispatches the element using the capture -> target -> bubble process 5786 void dispatch() { 5787 if(srcElement is null) 5788 return; 5789 5790 // first capture, then bubble 5791 5792 Element[] chain; 5793 Element curr = srcElement; 5794 while(curr) { 5795 auto l = curr; 5796 chain ~= l; 5797 curr = curr.parentNode; 5798 5799 } 5800 5801 isBubbling = false; 5802 5803 foreach(e; chain.retro()) { 5804 if(eventName in e.capturingEventHandlers) 5805 foreach(handler; e.capturingEventHandlers[eventName]) 5806 handler(e, this); 5807 5808 // the default on capture should really be to always do nothing 5809 5810 //if(!defaultPrevented) 5811 // if(eventName in e.defaultEventHandlers) 5812 // e.defaultEventHandlers[eventName](e.element, this); 5813 5814 if(propagationStopped) 5815 break; 5816 } 5817 5818 isBubbling = true; 5819 if(!propagationStopped) 5820 foreach(e; chain) { 5821 if(eventName in e.bubblingEventHandlers) 5822 foreach(handler; e.bubblingEventHandlers[eventName]) 5823 handler(e, this); 5824 5825 if(!defaultPrevented) 5826 if(eventName in e.defaultEventHandlers) 5827 e.defaultEventHandlers[eventName](e, this); 5828 5829 if(propagationStopped) 5830 break; 5831 } 5832 } 5833 } 5834 5835 struct FormFieldOptions { 5836 // usable for any 5837 5838 /// this is a regex pattern used to validate the field 5839 string pattern; 5840 /// must the field be filled in? Even with a regex, it can be submitted blank if this is false. 5841 bool isRequired; 5842 /// this is displayed as an example to the user 5843 string placeholder; 5844 5845 // usable for numeric ones 5846 5847 5848 // convenience methods to quickly get some options 5849 @property static FormFieldOptions none() { 5850 FormFieldOptions f; 5851 return f; 5852 } 5853 5854 static FormFieldOptions required() { 5855 FormFieldOptions f; 5856 f.isRequired = true; 5857 return f; 5858 } 5859 5860 static FormFieldOptions regex(string pattern, bool required = false) { 5861 FormFieldOptions f; 5862 f.pattern = pattern; 5863 f.isRequired = required; 5864 return f; 5865 } 5866 5867 static FormFieldOptions fromElement(Element e) { 5868 FormFieldOptions f; 5869 if(e.hasAttribute("required")) 5870 f.isRequired = true; 5871 if(e.hasAttribute("pattern")) 5872 f.pattern = e.pattern; 5873 if(e.hasAttribute("placeholder")) 5874 f.placeholder = e.placeholder; 5875 return f; 5876 } 5877 5878 Element applyToElement(Element e) { 5879 if(this.isRequired) 5880 e.required = "required"; 5881 if(this.pattern.length) 5882 e.pattern = this.pattern; 5883 if(this.placeholder.length) 5884 e.placeholder = this.placeholder; 5885 return e; 5886 } 5887 } 5888 5889 // this needs to look just like a string, but can expand as needed 5890 version(no_dom_stream) 5891 alias string Utf8Stream; 5892 else 5893 class Utf8Stream { 5894 protected: 5895 // these two should be overridden in subclasses to actually do the stream magic 5896 string getMore() { 5897 if(getMoreHelper !is null) 5898 return getMoreHelper(); 5899 return null; 5900 } 5901 5902 bool hasMore() { 5903 if(hasMoreHelper !is null) 5904 return hasMoreHelper(); 5905 return false; 5906 } 5907 // the rest should be ok 5908 5909 public: 5910 this(string d) { 5911 this.data = d; 5912 } 5913 5914 this(string delegate() getMoreHelper, bool delegate() hasMoreHelper) { 5915 this.getMoreHelper = getMoreHelper; 5916 this.hasMoreHelper = hasMoreHelper; 5917 5918 if(hasMore()) 5919 this.data ~= getMore(); 5920 5921 stdout.flush(); 5922 } 5923 5924 @property final size_t length() { 5925 // the parser checks length primarily directly before accessing the next character 5926 // so this is the place we'll hook to append more if possible and needed. 5927 if(lastIdx + 1 >= data.length && hasMore()) { 5928 data ~= getMore(); 5929 } 5930 return data.length; 5931 } 5932 5933 final char opIndex(size_t idx) { 5934 if(idx > lastIdx) 5935 lastIdx = idx; 5936 return data[idx]; 5937 } 5938 5939 final string opSlice(size_t start, size_t end) { 5940 if(end > lastIdx) 5941 lastIdx = end; 5942 return data[start .. end]; 5943 } 5944 5945 final size_t opDollar() { 5946 return length(); 5947 } 5948 5949 final Utf8Stream opBinary(string op : "~")(string s) { 5950 this.data ~= s; 5951 return this; 5952 } 5953 5954 final Utf8Stream opOpAssign(string op : "~")(string s) { 5955 this.data ~= s; 5956 return this; 5957 } 5958 5959 final Utf8Stream opAssign(string rhs) { 5960 this.data = rhs; 5961 return this; 5962 } 5963 private: 5964 string data; 5965 5966 size_t lastIdx; 5967 5968 bool delegate() hasMoreHelper; 5969 string delegate() getMoreHelper; 5970 5971 5972 /+ 5973 // used to maybe clear some old stuff 5974 // you might have to remove elements parsed with it too since they can hold slices into the 5975 // old stuff, preventing gc 5976 void dropFront(int bytes) { 5977 posAdjustment += bytes; 5978 data = data[bytes .. $]; 5979 } 5980 5981 int posAdjustment; 5982 +/ 5983 } 5984 5985 /* 5986 Copyright: Adam D. Ruppe, 2010 - 2013 5987 License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 5988 Authors: Adam D. Ruppe, with contributions by Nick Sabalausky and Trass3r 5989 5990 Copyright Adam D. Ruppe 2010-2013. 5991 Distributed under the Boost Software License, Version 1.0. 5992 (See accompanying file LICENSE_1_0.txt or copy at 5993 http://www.boost.org/LICENSE_1_0.txt) 5994 */