1/* 2 * Copyright 2004-2010, Axel D��rfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7#include "RTF.h" 8 9#include <ctype.h> 10#include <stdio.h> 11#include <stdlib.h> 12#include <string.h> 13 14#include <DataIO.h> 15 16 17//#define TRACE_RTF 18#ifdef TRACE_RTF 19# define TRACE(x...) printf(x) 20#else 21# define TRACE(x...) ; 22#endif 23 24 25static const char *kDestinationControlWords[] = { 26 "aftncn", "aftnsep", "aftnsepc", "annotation", "atnauthor", "atndate", 27 "atnicn", "atnid", "atnparent", "atnref", "atntime", "atrfend", 28 "atrfstart", "author", "background", "bkmkend", "buptim", "colortbl", 29 "comment", "creatim", "do", "doccomm", "docvar", "fonttbl", "footer", 30 "footerf", "footerl", "footerr", "footnote", "ftncn", "ftnsep", 31 "ftnsepc", "header", "headerf", "headerl", "headerr", "info", 32 "keywords", "operator", "pict", "printim", "private1", "revtim", 33 "rxe", "stylesheet", "subject", "tc", "title", "txe", "xe", 34}; 35 36static char read_char(BDataIO &stream, bool endOfFileAllowed = false); 37static int32 parse_integer(char first, BDataIO &stream, char &_last, int32 base = 10); 38 39 40using namespace RTF; 41 42 43static char 44read_char(BDataIO &stream, bool endOfFileAllowed) 45{ 46 char c; 47 ssize_t bytesRead = stream.Read(&c, 1); 48 49 if (bytesRead < B_OK) 50 throw (status_t)bytesRead; 51 52 if (bytesRead == 0 && !endOfFileAllowed) 53 throw (status_t)B_ERROR; 54 55 return c; 56} 57 58 59static int32 60parse_integer(char first, BDataIO &stream, char &_last, int32 base) 61{ 62 const char *kDigits = "0123456789abcdef"; 63 int32 integer = 0; 64 int32 count = 0; 65 66 char digit = first; 67 68 if (digit == '\0') 69 digit = read_char(stream); 70 71 while (true) { 72 int32 pos = 0; 73 for (; pos < base; pos++) { 74 if (kDigits[pos] == tolower(digit)) { 75 integer = integer * base + pos; 76 count++; 77 break; 78 } 79 } 80 if (pos == base) { 81 _last = digit; 82 goto out; 83 } 84 85 digit = read_char(stream); 86 } 87 88out: 89 if (count == 0) 90 throw (status_t)B_BAD_TYPE; 91 92 return integer; 93} 94 95 96static int 97string_array_compare(const char *key, const char **array) 98{ 99 return strcmp(key, array[0]); 100} 101 102 103static void 104dump(Element &element, int32 level = 0) 105{ 106 printf("%03" B_PRId32 " (%p):", level, &element); 107 for (int32 i = 0; i < level; i++) 108 printf(" "); 109 110 if (RTF::Header *header = dynamic_cast<RTF::Header *>(&element)) { 111 printf("<RTF header, major version %" B_PRId32 ">\n", header->Version()); 112 } else if (RTF::Command *command = dynamic_cast<RTF::Command *>(&element)) { 113 printf("<Command: %s", command->Name()); 114 if (command->HasOption()) 115 printf(", Option %" B_PRId32, command->Option()); 116 puts(">"); 117 } else if (RTF::Text *text = dynamic_cast<RTF::Text *>(&element)) { 118 printf("<Text>"); 119 puts(text->String()); 120 } else if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element)) 121 printf("<Group \"%s\">\n", group->Name()); 122 123 if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element)) { 124 for (uint32 i = 0; i < group->CountElements(); i++) 125 dump(*group->ElementAt(i), level + 1); 126 } 127} 128 129 130// #pragma mark - 131 132 133Parser::Parser(BPositionIO &stream) 134 : 135 fStream(&stream, 65536, false), 136 fIdentified(false) 137{ 138} 139 140 141status_t 142Parser::Identify() 143{ 144 char header[5]; 145 if (fStream.Read(header, sizeof(header)) < (ssize_t)sizeof(header)) 146 return B_IO_ERROR; 147 148 if (strncmp(header, "{\\rtf", 5)) 149 return B_BAD_TYPE; 150 151 fIdentified = true; 152 return B_OK; 153} 154 155 156status_t 157Parser::Parse(Header &header) 158{ 159 if (!fIdentified && Identify() != B_OK) 160 return B_BAD_TYPE; 161 162 try { 163 int32 openBrackets = 1; 164 165 // since we already preparsed parts of the RTF header, the header 166 // is handled here directly 167 char last; 168 header.Parse('\0', fStream, last); 169 170 Group *parent = &header; 171 char c = last; 172 173 while (true) { 174 Element *element = NULL; 175 176 // we'll just ignore the end of the stream 177 if (parent == NULL) 178 return B_OK; 179 180 switch (c) { 181 case '{': 182 openBrackets++; 183 parent->AddElement(element = new Group()); 184 parent = static_cast<Group *>(element); 185 break; 186 187 case '\\': 188 parent->AddElement(element = new Command()); 189 break; 190 191 case '}': 192 openBrackets--; 193 parent->DetermineDestination(); 194 parent = parent->Parent(); 195 // supposed to fall through 196 case '\n': 197 case '\r': 198 { 199 ssize_t bytesRead = fStream.Read(&c, 1); 200 if (bytesRead < B_OK) 201 throw (status_t)bytesRead; 202 else if (bytesRead != 1) { 203 // this is the only valid exit status 204 if (openBrackets == 0) 205 return B_OK; 206 207 throw (status_t)B_ERROR; 208 } 209 continue; 210 } 211 212 default: 213 parent->AddElement(element = new Text()); 214 break; 215 } 216 217 if (element == NULL) 218 throw (status_t)B_ERROR; 219 220 element->Parse(c, fStream, last); 221 c = last; 222 } 223 } catch (status_t status) { 224 return status; 225 } 226 227 return B_OK; 228} 229 230 231// #pragma mark - 232 233 234Element::Element() 235 : 236 fParent(NULL) 237{ 238} 239 240 241Element::~Element() 242{ 243} 244 245 246void 247Element::SetParent(Group *parent) 248{ 249 fParent = parent; 250} 251 252 253Group * 254Element::Parent() const 255{ 256 return fParent; 257} 258 259 260bool 261Element::IsDefinitionDelimiter() 262{ 263 return false; 264} 265 266 267void 268Element::PrintToStream(int32 level) 269{ 270 dump(*this, level); 271} 272 273 274// #pragma mark - 275 276 277Group::Group() 278 : 279 fDestination(TEXT_DESTINATION) 280{ 281} 282 283 284Group::~Group() 285{ 286 Element *element; 287 while ((element = (Element *)fElements.RemoveItem((int32)0)) != NULL) { 288 delete element; 289 } 290} 291 292 293void 294Group::Parse(char first, BDataIO &stream, char &last) 295{ 296 if (first == '\0') 297 first = read_char(stream); 298 299 if (first != '{') 300 throw (status_t)B_BAD_TYPE; 301 302 last = read_char(stream); 303} 304 305 306status_t 307Group::AddElement(Element *element) 308{ 309 if (element == NULL) 310 return B_BAD_VALUE; 311 312 if (fElements.AddItem(element)) { 313 element->SetParent(this); 314 return B_OK; 315 } 316 317 return B_NO_MEMORY; 318} 319 320 321uint32 322Group::CountElements() const 323{ 324 return (uint32)fElements.CountItems(); 325} 326 327 328Element * 329Group::ElementAt(uint32 index) const 330{ 331 return static_cast<Element *>(fElements.ItemAt(index)); 332} 333 334 335Element * 336Group::FindDefinitionStart(int32 index, int32 *_startIndex) const 337{ 338 if (index < 0) 339 return NULL; 340 341 Element *element; 342 int32 number = 0; 343 for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) { 344 if (number == index) { 345 if (_startIndex) 346 *_startIndex = i; 347 return element; 348 } 349 350 if (element->IsDefinitionDelimiter()) 351 number++; 352 } 353 354 return NULL; 355} 356 357 358Command * 359Group::FindDefinition(const char *name, int32 index) const 360{ 361 int32 startIndex; 362 Element *element = FindDefinitionStart(index, &startIndex); 363 if (element == NULL) 364 return NULL; 365 366 for (uint32 i = startIndex; (element = ElementAt(i)) != NULL; i++) { 367 if (element->IsDefinitionDelimiter()) 368 break; 369 370 if (Command *command = dynamic_cast<Command *>(element)) { 371 if (command != NULL && !strcmp(name, command->Name())) 372 return command; 373 } 374 } 375 376 return NULL; 377} 378 379 380Group * 381Group::FindGroup(const char *name) const 382{ 383 Element *element; 384 for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) { 385 Group *group = dynamic_cast<Group *>(element); 386 if (group == NULL) 387 continue; 388 389 Command *command = dynamic_cast<Command *>(group->ElementAt(0)); 390 if (command != NULL && !strcmp(name, command->Name())) 391 return group; 392 } 393 394 return NULL; 395} 396 397 398const char * 399Group::Name() const 400{ 401 Command *command = dynamic_cast<Command *>(ElementAt(0)); 402 if (command != NULL) 403 return command->Name(); 404 405 return NULL; 406} 407 408 409void 410Group::DetermineDestination() 411{ 412 const char *name = Name(); 413 if (name == NULL) 414 return; 415 416 if (!strcmp(name, "*")) { 417 fDestination = COMMENT_DESTINATION; 418 return; 419 } 420 421 // binary search for destination control words 422 423 if (bsearch(name, kDestinationControlWords, 424 sizeof(kDestinationControlWords) / sizeof(kDestinationControlWords[0]), 425 sizeof(kDestinationControlWords[0]), 426 (int (*)(const void *, const void *))string_array_compare) != NULL) 427 fDestination = OTHER_DESTINATION; 428} 429 430 431group_destination 432Group::Destination() const 433{ 434 return fDestination; 435} 436 437 438// #pragma mark - 439 440 441Header::Header() 442 : 443 fVersion(0) 444{ 445} 446 447 448Header::~Header() 449{ 450} 451 452 453void 454Header::Parse(char first, BDataIO &stream, char &last) 455{ 456 // The stream has been peeked into by the parser already, and 457 // only the version follows in the stream -- let's pick it up 458 459 fVersion = parse_integer(first, stream, last); 460 461 // recreate "rtf" command to name this group 462 463 Command *command = new Command(); 464 command->SetName("rtf"); 465 command->SetOption(fVersion); 466 467 AddElement(command); 468} 469 470 471int32 472Header::Version() const 473{ 474 return fVersion; 475} 476 477 478const char * 479Header::Charset() const 480{ 481 Command *command = dynamic_cast<Command *>(ElementAt(1)); 482 if (command == NULL) 483 return NULL; 484 485 return command->Name(); 486} 487 488 489rgb_color 490Header::Color(int32 index) 491{ 492 rgb_color color = {0, 0, 0, 255}; 493 494 Group *colorTable = FindGroup("colortbl"); 495 496 if (colorTable != NULL) { 497 if (Command *gun = colorTable->FindDefinition("red", index)) 498 color.red = gun->Option(); 499 if (Command *gun = colorTable->FindDefinition("green", index)) 500 color.green = gun->Option(); 501 if (Command *gun = colorTable->FindDefinition("blue", index)) 502 color.blue = gun->Option(); 503 } 504 505 return color; 506} 507 508 509// #pragma mark - 510 511 512Text::Text() 513{ 514} 515 516 517Text::~Text() 518{ 519 SetTo(NULL); 520} 521 522 523bool 524Text::IsDefinitionDelimiter() 525{ 526 return fText == ";"; 527} 528 529 530void 531Text::Parse(char first, BDataIO &stream, char &last) 532{ 533 char c = first; 534 if (c == '\0') 535 c = read_char(stream); 536 537 if (c == ';') { 538 // definition delimiter 539 fText.SetTo(";"); 540 last = read_char(stream); 541 return; 542 } 543 544 const size_t kBufferSteps = 1; 545 size_t maxSize = kBufferSteps; 546 char *text = fText.LockBuffer(maxSize); 547 if (text == NULL) 548 throw (status_t)B_NO_MEMORY; 549 550 size_t position = 0; 551 552 while (true) { 553 if (c == '\\' || c == '}' || c == '{' || c == ';' || c == '\n' || c == '\r') 554 break; 555 556 if (position >= maxSize) { 557 fText.UnlockBuffer(position); 558 text = fText.LockBuffer(maxSize += kBufferSteps); 559 if (text == NULL) 560 throw (status_t)B_NO_MEMORY; 561 } 562 563 text[position++] = c; 564 565 c = read_char(stream); 566 } 567 fText.UnlockBuffer(position); 568 569 // ToDo: add support for different charsets - right now, only ASCII is supported! 570 // To achieve this, we should just translate everything into UTF-8 here 571 572 last = c; 573} 574 575 576status_t 577Text::SetTo(const char *text) 578{ 579 return fText.SetTo(text) != NULL ? B_OK : B_NO_MEMORY; 580} 581 582 583const char * 584Text::String() const 585{ 586 return fText.String(); 587} 588 589 590uint32 591Text::Length() const 592{ 593 return fText.Length(); 594} 595 596 597// #pragma mark - 598 599 600Command::Command() 601 : 602 fName(NULL), 603 fHasOption(false), 604 fOption(-1) 605{ 606} 607 608 609Command::~Command() 610{ 611} 612 613 614void 615Command::Parse(char first, BDataIO &stream, char &last) 616{ 617 if (first == '\0') 618 first = read_char(stream); 619 620 if (first != '\\') 621 throw (status_t)B_BAD_TYPE; 622 623 // get name 624 char name[kCommandLength]; 625 size_t length = 0; 626 char c; 627 while (isalpha(c = read_char(stream))) { 628 name[length++] = c; 629 if (length >= kCommandLength - 1) 630 throw (status_t)B_BAD_TYPE; 631 } 632 633 if (length == 0) { 634 if (c == '\n' || c == '\r') { 635 // we're a hard return 636 fName.SetTo("par"); 637 } else 638 fName.SetTo(c, 1); 639 640 // read over character 641 c = read_char(stream); 642 } else 643 fName.SetTo(name, length); 644 645 TRACE("command: %s\n", fName.String()); 646 647 // parse numeric option 648 649 if (c == '-') 650 c = read_char(stream); 651 652 last = c; 653 654 if (fName == "'") { 655 // hexadecimal 656 char bytes[2]; 657 bytes[0] = read_char(stream); 658 bytes[1] = '\0'; 659 BMemoryIO memory(bytes, 2); 660 661 SetOption(parse_integer(c, memory, last, 16)); 662 last = read_char(stream); 663 } else { 664 // decimal 665 if (isdigit(c)) 666 SetOption(parse_integer(c, stream, last)); 667 668 // a space delimiter is eaten up by the command 669 if (isspace(last)) 670 last = read_char(stream); 671 } 672 673 if (HasOption()) 674 TRACE(" option: %ld\n", fOption); 675} 676 677 678status_t 679Command::SetName(const char *name) 680{ 681 return fName.SetTo(name) != NULL ? B_OK : B_NO_MEMORY; 682} 683 684 685const char * 686Command::Name() 687{ 688 return fName.String(); 689} 690 691 692void 693Command::UnsetOption() 694{ 695 fHasOption = false; 696 fOption = -1; 697} 698 699 700void 701Command::SetOption(int32 option) 702{ 703 fOption = option; 704 fHasOption = true; 705} 706 707 708bool 709Command::HasOption() const 710{ 711 return fHasOption; 712} 713 714 715int32 716Command::Option() const 717{ 718 return fOption; 719} 720 721 722// #pragma mark - 723 724 725Iterator::Iterator(Element &start, group_destination destination) 726{ 727 SetTo(start, destination); 728} 729 730 731void 732Iterator::SetTo(Element &start, group_destination destination) 733{ 734 fStart = &start; 735 fDestination = destination; 736 737 Rewind(); 738} 739 740 741void 742Iterator::Rewind() 743{ 744 fStack.MakeEmpty(); 745 fStack.Push(fStart); 746} 747 748 749bool 750Iterator::HasNext() const 751{ 752 return !fStack.IsEmpty(); 753} 754 755 756Element * 757Iterator::Next() 758{ 759 Element *element; 760 761 if (!fStack.Pop(&element)) 762 return NULL; 763 764 Group *group = dynamic_cast<Group *>(element); 765 if (group != NULL 766 && (fDestination == ALL_DESTINATIONS 767 || fDestination == group->Destination())) { 768 // put this group's children on the stack in 769 // reverse order, so that we iterate over 770 // the tree in in-order 771 772 for (int32 i = group->CountElements(); i-- > 0;) { 773 fStack.Push(group->ElementAt(i)); 774 } 775 } 776 777 return element; 778} 779 780 781// #pragma mark - 782 783 784Worker::Worker(RTF::Header &start) 785 : 786 fStart(start) 787{ 788} 789 790 791Worker::~Worker() 792{ 793} 794 795 796void 797Worker::Dispatch(Element *element) 798{ 799 if (RTF::Group *group = dynamic_cast<RTF::Group *>(element)) { 800 fSkip = false; 801 Group(group); 802 803 if (fSkip) 804 return; 805 806 for (int32 i = 0; (element = group->ElementAt(i)) != NULL; i++) 807 Dispatch(element); 808 809 GroupEnd(group); 810 } else if (RTF::Command *command = dynamic_cast<RTF::Command *>(element)) { 811 Command(command); 812 } else if (RTF::Text *text = dynamic_cast<RTF::Text *>(element)) { 813 Text(text); 814 } 815} 816 817 818void 819Worker::Work() 820{ 821 Dispatch(&fStart); 822} 823 824 825void 826Worker::Group(RTF::Group *group) 827{ 828} 829 830 831void 832Worker::GroupEnd(RTF::Group *group) 833{ 834} 835 836 837void 838Worker::Command(RTF::Command *command) 839{ 840} 841 842 843void 844Worker::Text(RTF::Text *text) 845{ 846} 847 848 849RTF::Header & 850Worker::Start() 851{ 852 return fStart; 853} 854 855 856void 857Worker::Skip() 858{ 859 fSkip = true; 860} 861 862 863void 864Worker::Abort(status_t status) 865{ 866 throw status; 867} 868 869