1/* Support for cookies. 2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 3 Free Software Foundation, Inc. 4 5This file is part of GNU Wget. 6 7GNU Wget is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 3 of the License, or (at 10your option) any later version. 11 12GNU Wget is distributed in the hope that it will be useful, but 13WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with Wget. If not, see <http://www.gnu.org/licenses/>. 19 20Additional permission under GNU GPL version 3 section 7 21 22If you modify this program, or any covered work, by linking or 23combining it with the OpenSSL project's OpenSSL library (or a 24modified version of that library), containing parts covered by the 25terms of the OpenSSL or SSLeay licenses, the Free Software Foundation 26grants you additional permission to convey the resulting work. 27Corresponding Source for a non-source form of such a combination 28shall include the source code for the parts of OpenSSL used as well 29as that of the covered work. */ 30 31/* Written by Hrvoje Niksic. Parts are loosely inspired by the 32 cookie patch submitted by Tomasz Wegrzanowski. 33 34 This implements the client-side cookie support, as specified 35 (loosely) by Netscape's "preliminary specification", currently 36 available at: 37 38 http://wp.netscape.com/newsref/std/cookie_spec.html 39 40 rfc2109 is not supported because of its incompatibilities with the 41 above widely-used specification. rfc2965 is entirely ignored, 42 since popular client software doesn't implement it, and even the 43 sites that do send Set-Cookie2 also emit Set-Cookie for 44 compatibility. */ 45 46#include "wget.h" 47 48#include <stdio.h> 49#include <string.h> 50#include <stdlib.h> 51#include <assert.h> 52#include <errno.h> 53#include <time.h> 54#include "utils.h" 55#include "hash.h" 56#include "cookies.h" 57#include "http.h" /* for http_atotm */ 58 59/* Declarations of `struct cookie' and the most basic functions. */ 60 61/* Cookie jar serves as cookie storage and a means of retrieving 62 cookies efficiently. All cookies with the same domain are stored 63 in a linked list called "chain". A cookie chain can be reached by 64 looking up the domain in the cookie jar's chains_by_domain table. 65 66 For example, to reach all the cookies under google.com, one must 67 execute hash_table_get(jar->chains_by_domain, "google.com"). Of 68 course, when sending a cookie to `www.google.com', one must search 69 for cookies that belong to either `www.google.com' or `google.com' 70 -- but the point is that the code doesn't need to go through *all* 71 the cookies. */ 72 73struct cookie_jar { 74 /* Cookie chains indexed by domain. */ 75 struct hash_table *chains; 76 77 int cookie_count; /* number of cookies in the jar. */ 78}; 79 80/* Value set by entry point functions, so that the low-level 81 routines don't need to call time() all the time. */ 82static time_t cookies_now; 83 84struct cookie_jar * 85cookie_jar_new (void) 86{ 87 struct cookie_jar *jar = xnew (struct cookie_jar); 88 jar->chains = make_nocase_string_hash_table (0); 89 jar->cookie_count = 0; 90 return jar; 91} 92 93struct cookie { 94 char *domain; /* domain of the cookie */ 95 int port; /* port number */ 96 char *path; /* path prefix of the cookie */ 97 98 unsigned discard_requested :1; /* whether cookie was created to 99 request discarding another 100 cookie. */ 101 102 unsigned secure :1; /* whether cookie should be 103 transmitted over non-https 104 connections. */ 105 unsigned domain_exact :1; /* whether DOMAIN must match as a 106 whole. */ 107 108 unsigned permanent :1; /* whether the cookie should outlive 109 the session. */ 110 time_t expiry_time; /* time when the cookie expires, 0 111 means undetermined. */ 112 113 char *attr; /* cookie attribute name */ 114 char *value; /* cookie attribute value */ 115 116 struct cookie *next; /* used for chaining of cookies in the 117 same domain. */ 118}; 119 120#define PORT_ANY (-1) 121 122/* Allocate and return a new, empty cookie structure. */ 123 124static struct cookie * 125cookie_new (void) 126{ 127 struct cookie *cookie = xnew0 (struct cookie); 128 129 /* Both cookie->permanent and cookie->expiry_time are now 0. This 130 means that the cookie doesn't expire, but is only valid for this 131 session (i.e. not written out to disk). */ 132 133 cookie->port = PORT_ANY; 134 return cookie; 135} 136 137/* Non-zero if the cookie has expired. Assumes cookies_now has been 138 set by one of the entry point functions. */ 139 140static bool 141cookie_expired_p (const struct cookie *c) 142{ 143 return c->expiry_time != 0 && c->expiry_time < cookies_now; 144} 145 146/* Deallocate COOKIE and its components. */ 147 148static void 149delete_cookie (struct cookie *cookie) 150{ 151 xfree_null (cookie->domain); 152 xfree_null (cookie->path); 153 xfree_null (cookie->attr); 154 xfree_null (cookie->value); 155 xfree (cookie); 156} 157 158/* Functions for storing cookies. 159 160 All cookies can be reached beginning with jar->chains. The key in 161 that table is the domain name, and the value is a linked list of 162 all cookies from that domain. Every new cookie is placed on the 163 head of the list. */ 164 165/* Find and return a cookie in JAR whose domain, path, and attribute 166 name correspond to COOKIE. If found, PREVPTR will point to the 167 location of the cookie previous in chain, or NULL if the found 168 cookie is the head of a chain. 169 170 If no matching cookie is found, return NULL. */ 171 172static struct cookie * 173find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie, 174 struct cookie **prevptr) 175{ 176 struct cookie *chain, *prev; 177 178 chain = hash_table_get (jar->chains, cookie->domain); 179 if (!chain) 180 goto nomatch; 181 182 prev = NULL; 183 for (; chain; prev = chain, chain = chain->next) 184 if (0 == strcmp (cookie->path, chain->path) 185 && 0 == strcmp (cookie->attr, chain->attr) 186 && cookie->port == chain->port) 187 { 188 *prevptr = prev; 189 return chain; 190 } 191 192 nomatch: 193 *prevptr = NULL; 194 return NULL; 195} 196 197/* Store COOKIE to the jar. 198 199 This is done by placing COOKIE at the head of its chain. However, 200 if COOKIE matches a cookie already in memory, as determined by 201 find_matching_cookie, the old cookie is unlinked and destroyed. 202 203 The key of each chain's hash table entry is allocated only the 204 first time; next hash_table_put's reuse the same key. */ 205 206static void 207store_cookie (struct cookie_jar *jar, struct cookie *cookie) 208{ 209 struct cookie *chain_head; 210 char *chain_key; 211 212 if (hash_table_get_pair (jar->chains, cookie->domain, 213 &chain_key, &chain_head)) 214 { 215 /* A chain of cookies in this domain already exists. Check for 216 duplicates -- if an extant cookie exactly matches our domain, 217 port, path, and name, replace it. */ 218 struct cookie *prev; 219 struct cookie *victim = find_matching_cookie (jar, cookie, &prev); 220 221 if (victim) 222 { 223 /* Remove VICTIM from the chain. COOKIE will be placed at 224 the head. */ 225 if (prev) 226 { 227 prev->next = victim->next; 228 cookie->next = chain_head; 229 } 230 else 231 { 232 /* prev is NULL; apparently VICTIM was at the head of 233 the chain. This place will be taken by COOKIE, so 234 all we need to do is: */ 235 cookie->next = victim->next; 236 } 237 delete_cookie (victim); 238 --jar->cookie_count; 239 DEBUGP (("Deleted old cookie (to be replaced.)\n")); 240 } 241 else 242 cookie->next = chain_head; 243 } 244 else 245 { 246 /* We are now creating the chain. Use a copy of cookie->domain 247 as the key for the life-time of the chain. Using 248 cookie->domain would be unsafe because the life-time of the 249 chain may exceed the life-time of the cookie. (Cookies may 250 be deleted from the chain by this very function.) */ 251 cookie->next = NULL; 252 chain_key = xstrdup (cookie->domain); 253 } 254 255 hash_table_put (jar->chains, chain_key, cookie); 256 ++jar->cookie_count; 257 258 IF_DEBUG 259 { 260 time_t exptime = cookie->expiry_time; 261 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n", 262 cookie->domain, cookie->port, 263 cookie->port == PORT_ANY ? " (ANY)" : "", 264 cookie->path, 265 cookie->permanent ? "permanent" : "session", 266 cookie->secure ? "secure" : "insecure", 267 cookie->expiry_time ? datetime_str (exptime) : "none", 268 cookie->attr, cookie->value)); 269 } 270} 271 272/* Discard a cookie matching COOKIE's domain, port, path, and 273 attribute name. This gets called when we encounter a cookie whose 274 expiry date is in the past, or whose max-age is set to 0. The 275 former corresponds to netscape cookie spec, while the latter is 276 specified by rfc2109. */ 277 278static void 279discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie) 280{ 281 struct cookie *prev, *victim; 282 283 if (!hash_table_count (jar->chains)) 284 /* No elements == nothing to discard. */ 285 return; 286 287 victim = find_matching_cookie (jar, cookie, &prev); 288 if (victim) 289 { 290 if (prev) 291 /* Simply unchain the victim. */ 292 prev->next = victim->next; 293 else 294 { 295 /* VICTIM was head of its chain. We need to place a new 296 cookie at the head. */ 297 char *chain_key = NULL; 298 int res; 299 300 res = hash_table_get_pair (jar->chains, victim->domain, 301 &chain_key, NULL); 302 assert (res != 0); 303 if (!victim->next) 304 { 305 /* VICTIM was the only cookie in the chain. Destroy the 306 chain and deallocate the chain key. */ 307 hash_table_remove (jar->chains, victim->domain); 308 xfree (chain_key); 309 } 310 else 311 hash_table_put (jar->chains, chain_key, victim->next); 312 } 313 delete_cookie (victim); 314 DEBUGP (("Discarded old cookie.\n")); 315 } 316} 317 318/* Functions for parsing the `Set-Cookie' header, and creating new 319 cookies from the wire. */ 320 321#define TOKEN_IS(token, string_literal) \ 322 BOUNDED_EQUAL_NO_CASE (token.b, token.e, string_literal) 323 324#define TOKEN_NON_EMPTY(token) (token.b != NULL && token.b != token.e) 325 326/* Parse the contents of the `Set-Cookie' header. The header looks 327 like this: 328 329 name1=value1; name2=value2; ... 330 331 Trailing semicolon is optional; spaces are allowed between all 332 tokens. Additionally, values may be quoted. 333 334 A new cookie is returned upon success, NULL otherwise. 335 336 The first name-value pair will be used to set the cookie's 337 attribute name and value. Subsequent parameters will be checked 338 against field names such as `domain', `path', etc. Recognized 339 fields will be parsed and the corresponding members of COOKIE 340 filled. */ 341 342static struct cookie * 343parse_set_cookie (const char *set_cookie, bool silent) 344{ 345 const char *ptr = set_cookie; 346 struct cookie *cookie = cookie_new (); 347 param_token name, value; 348 349 if (!extract_param (&ptr, &name, &value, ';')) 350 goto error; 351 if (!value.b) 352 goto error; 353 cookie->attr = strdupdelim (name.b, name.e); 354 cookie->value = strdupdelim (value.b, value.e); 355 356 while (extract_param (&ptr, &name, &value, ';')) 357 { 358 if (TOKEN_IS (name, "domain")) 359 { 360 if (!TOKEN_NON_EMPTY (value)) 361 goto error; 362 xfree_null (cookie->domain); 363 /* Strictly speaking, we should set cookie->domain_exact if the 364 domain doesn't begin with a dot. But many sites set the 365 domain to "foo.com" and expect "subhost.foo.com" to get the 366 cookie, and it apparently works in browsers. */ 367 if (*value.b == '.') 368 ++value.b; 369 cookie->domain = strdupdelim (value.b, value.e); 370 } 371 else if (TOKEN_IS (name, "path")) 372 { 373 if (!TOKEN_NON_EMPTY (value)) 374 goto error; 375 xfree_null (cookie->path); 376 cookie->path = strdupdelim (value.b, value.e); 377 } 378 else if (TOKEN_IS (name, "expires")) 379 { 380 char *value_copy; 381 time_t expires; 382 383 if (!TOKEN_NON_EMPTY (value)) 384 goto error; 385 BOUNDED_TO_ALLOCA (value.b, value.e, value_copy); 386 387 expires = http_atotm (value_copy); 388 if (expires != (time_t) -1) 389 { 390 cookie->permanent = 1; 391 cookie->expiry_time = expires; 392 /* According to netscape's specification, expiry time in 393 the past means that discarding of a matching cookie 394 is requested. */ 395 if (cookie->expiry_time < cookies_now) 396 cookie->discard_requested = 1; 397 } 398 else 399 /* Error in expiration spec. Assume default (cookie doesn't 400 expire, but valid only for this session.) */ 401 ; 402 } 403 else if (TOKEN_IS (name, "max-age")) 404 { 405 double maxage = -1; 406 char *value_copy; 407 408 if (!TOKEN_NON_EMPTY (value)) 409 goto error; 410 BOUNDED_TO_ALLOCA (value.b, value.e, value_copy); 411 412 sscanf (value_copy, "%lf", &maxage); 413 if (maxage == -1) 414 /* something went wrong. */ 415 goto error; 416 cookie->permanent = 1; 417 cookie->expiry_time = cookies_now + maxage; 418 419 /* According to rfc2109, a cookie with max-age of 0 means that 420 discarding of a matching cookie is requested. */ 421 if (maxage == 0) 422 cookie->discard_requested = 1; 423 } 424 else if (TOKEN_IS (name, "secure")) 425 { 426 /* ignore value completely */ 427 cookie->secure = 1; 428 } 429 else 430 /* Ignore unrecognized attribute. */ 431 ; 432 } 433 if (*ptr) 434 /* extract_param has encountered a syntax error */ 435 goto error; 436 437 /* The cookie has been successfully constructed; return it. */ 438 return cookie; 439 440 error: 441 if (!silent) 442 logprintf (LOG_NOTQUIET, 443 _("Syntax error in Set-Cookie: %s at position %d.\n"), 444 quotearg_style (escape_quoting_style, set_cookie), 445 (int) (ptr - set_cookie)); 446 delete_cookie (cookie); 447 return NULL; 448} 449 450#undef TOKEN_IS 451#undef TOKEN_NON_EMPTY 452 453/* Sanity checks. These are important, otherwise it is possible for 454 mailcious attackers to destroy important cookie information and/or 455 violate your privacy. */ 456 457 458#define REQUIRE_DIGITS(p) do { \ 459 if (!c_isdigit (*p)) \ 460 return false; \ 461 for (++p; c_isdigit (*p); p++) \ 462 ; \ 463} while (0) 464 465#define REQUIRE_DOT(p) do { \ 466 if (*p++ != '.') \ 467 return false; \ 468} while (0) 469 470/* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>. 471 472 We don't want to call network functions like inet_addr() because 473 all we need is a check, preferrably one that is small, fast, and 474 well-defined. */ 475 476static bool 477numeric_address_p (const char *addr) 478{ 479 const char *p = addr; 480 481 REQUIRE_DIGITS (p); /* A */ 482 REQUIRE_DOT (p); /* . */ 483 REQUIRE_DIGITS (p); /* B */ 484 REQUIRE_DOT (p); /* . */ 485 REQUIRE_DIGITS (p); /* C */ 486 REQUIRE_DOT (p); /* . */ 487 REQUIRE_DIGITS (p); /* D */ 488 489 if (*p != '\0') 490 return false; 491 return true; 492} 493 494/* Check whether COOKIE_DOMAIN is an appropriate domain for HOST. 495 Originally I tried to make the check compliant with rfc2109, but 496 the sites deviated too often, so I had to fall back to "tail 497 matching", as defined by the original Netscape's cookie spec. */ 498 499static bool 500check_domain_match (const char *cookie_domain, const char *host) 501{ 502 DEBUGP (("cdm: 1")); 503 504 /* Numeric address requires exact match. It also requires HOST to 505 be an IP address. */ 506 if (numeric_address_p (cookie_domain)) 507 return 0 == strcmp (cookie_domain, host); 508 509 DEBUGP ((" 2")); 510 511 /* For the sake of efficiency, check for exact match first. */ 512 if (0 == strcasecmp (cookie_domain, host)) 513 return true; 514 515 DEBUGP ((" 3")); 516 517 /* HOST must match the tail of cookie_domain. */ 518 if (!match_tail (host, cookie_domain, true)) 519 return false; 520 521 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must 522 make sure that somebody is not trying to set the cookie for a 523 subdomain shared by many entities. For example, "company.co.uk" 524 must not be allowed to set a cookie for ".co.uk". On the other 525 hand, "sso.redhat.de" should be able to set a cookie for 526 ".redhat.de". 527 528 The only marginally sane way to handle this I can think of is to 529 reject on the basis of the length of the second-level domain name 530 (but when the top-level domain is unknown), with the assumption 531 that those of three or less characters could be reserved. For 532 example: 533 534 .co.org -> works because the TLD is known 535 .co.uk -> doesn't work because "co" is only two chars long 536 .com.au -> doesn't work because "com" is only 3 chars long 537 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh) 538 .cnn.de -> doesn't work for the same reason (ugh!!) 539 .abcd.de -> works because "abcd" is 4 chars long 540 .img.cnn.de -> works because it's not trying to set the 2nd level domain 541 .cnn.co.uk -> works for the same reason 542 543 That should prevent misuse, while allowing reasonable usage. If 544 someone knows of a better way to handle this, please let me 545 know. */ 546 { 547 const char *p = cookie_domain; 548 int dccount = 1; /* number of domain components */ 549 int ldcl = 0; /* last domain component length */ 550 int nldcl = 0; /* next to last domain component length */ 551 int out; 552 if (*p == '.') 553 /* Ignore leading period in this calculation. */ 554 ++p; 555 DEBUGP ((" 4")); 556 for (out = 0; !out; p++) 557 switch (*p) 558 { 559 case '\0': 560 out = 1; 561 break; 562 case '.': 563 if (ldcl == 0) 564 /* Empty domain component found -- the domain is invalid. */ 565 return false; 566 if (*(p + 1) == '\0') 567 { 568 /* Tolerate trailing '.' by not treating the domain as 569 one ending with an empty domain component. */ 570 out = 1; 571 break; 572 } 573 nldcl = ldcl; 574 ldcl = 0; 575 ++dccount; 576 break; 577 default: 578 ++ldcl; 579 } 580 581 DEBUGP ((" 5")); 582 583 if (dccount < 2) 584 return false; 585 586 DEBUGP ((" 6")); 587 588 if (dccount == 2) 589 { 590 size_t i; 591 int known_toplevel = false; 592 static const char *known_toplevel_domains[] = { 593 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int" 594 }; 595 for (i = 0; i < countof (known_toplevel_domains); i++) 596 if (match_tail (cookie_domain, known_toplevel_domains[i], true)) 597 { 598 known_toplevel = true; 599 break; 600 } 601 if (!known_toplevel && nldcl <= 3) 602 return false; 603 } 604 } 605 606 DEBUGP ((" 7")); 607 608 /* Don't allow the host "foobar.com" to set a cookie for domain 609 "bar.com". */ 610 if (*cookie_domain != '.') 611 { 612 int dlen = strlen (cookie_domain); 613 int hlen = strlen (host); 614 /* cookie host: hostname.foobar.com */ 615 /* desired domain: bar.com */ 616 /* '.' must be here in host-> ^ */ 617 if (hlen > dlen && host[hlen - dlen - 1] != '.') 618 return false; 619 } 620 621 DEBUGP ((" 8")); 622 623 return true; 624} 625 626static int path_matches (const char *, const char *); 627 628/* Check whether PATH begins with COOKIE_PATH. */ 629 630static bool 631check_path_match (const char *cookie_path, const char *path) 632{ 633 return path_matches (path, cookie_path) != 0; 634} 635 636/* Prepend '/' to string S. S is copied to fresh stack-allocated 637 space and its value is modified to point to the new location. */ 638 639#define PREPEND_SLASH(s) do { \ 640 char *PS_newstr = (char *) alloca (1 + strlen (s) + 1); \ 641 *PS_newstr = '/'; \ 642 strcpy (PS_newstr + 1, s); \ 643 s = PS_newstr; \ 644} while (0) 645 646 647/* Process the HTTP `Set-Cookie' header. This results in storing the 648 cookie or discarding a matching one, or ignoring it completely, all 649 depending on the contents. */ 650 651void 652cookie_handle_set_cookie (struct cookie_jar *jar, 653 const char *host, int port, 654 const char *path, const char *set_cookie) 655{ 656 struct cookie *cookie; 657 cookies_now = time (NULL); 658 659 /* Wget's paths don't begin with '/' (blame rfc1808), but cookie 660 usage assumes /-prefixed paths. Until the rest of Wget is fixed, 661 simply prepend slash to PATH. */ 662 PREPEND_SLASH (path); 663 664 cookie = parse_set_cookie (set_cookie, false); 665 if (!cookie) 666 goto out; 667 668 /* Sanitize parts of cookie. */ 669 670 if (!cookie->domain) 671 { 672 copy_domain: 673 /* If the domain was not provided, we use the one we're talking 674 to, and set exact match. */ 675 cookie->domain = xstrdup (host); 676 cookie->domain_exact = 1; 677 /* Set the port, but only if it's non-default. */ 678 if (port != 80 && port != 443) 679 cookie->port = port; 680 } 681 else 682 { 683 if (!check_domain_match (cookie->domain, host)) 684 { 685 logprintf (LOG_NOTQUIET, 686 _("Cookie coming from %s attempted to set domain to %s\n"), 687 quotearg_style (escape_quoting_style, host), 688 quotearg_style (escape_quoting_style, cookie->domain)); 689 xfree (cookie->domain); 690 goto copy_domain; 691 } 692 } 693 694 if (!cookie->path) 695 { 696 /* The cookie doesn't set path: set it to the URL path, sans the 697 file part ("/dir/file" truncated to "/dir/"). */ 698 char *trailing_slash = strrchr (path, '/'); 699 if (trailing_slash) 700 cookie->path = strdupdelim (path, trailing_slash + 1); 701 else 702 /* no slash in the string -- can this even happen? */ 703 cookie->path = xstrdup (path); 704 } 705 else 706 { 707 /* The cookie sets its own path; verify that it is legal. */ 708 if (!check_path_match (cookie->path, path)) 709 { 710 DEBUGP (("Attempt to fake the path: %s, %s\n", 711 cookie->path, path)); 712 goto out; 713 } 714 } 715 716 /* Now store the cookie, or discard an existing cookie, if 717 discarding was requested. */ 718 719 if (cookie->discard_requested) 720 { 721 discard_matching_cookie (jar, cookie); 722 goto out; 723 } 724 725 store_cookie (jar, cookie); 726 return; 727 728 out: 729 if (cookie) 730 delete_cookie (cookie); 731} 732 733/* Support for sending out cookies in HTTP requests, based on 734 previously stored cookies. Entry point is 735 `build_cookies_request'. */ 736 737/* Return a count of how many times CHR occurs in STRING. */ 738 739static int 740count_char (const char *string, char chr) 741{ 742 const char *p; 743 int count = 0; 744 for (p = string; *p; p++) 745 if (*p == chr) 746 ++count; 747 return count; 748} 749 750/* Find the cookie chains whose domains match HOST and store them to 751 DEST. 752 753 A cookie chain is the head of a list of cookies that belong to a 754 host/domain. Given HOST "img.search.xemacs.org", this function 755 will return the chains for "img.search.xemacs.org", 756 "search.xemacs.org", and "xemacs.org" -- those of them that exist 757 (if any), that is. 758 759 DEST should be large enough to accept (in the worst case) as many 760 elements as there are domain components of HOST. */ 761 762static int 763find_chains_of_host (struct cookie_jar *jar, const char *host, 764 struct cookie *dest[]) 765{ 766 int dest_count = 0; 767 int passes, passcnt; 768 769 /* Bail out quickly if there are no cookies in the jar. */ 770 if (!hash_table_count (jar->chains)) 771 return 0; 772 773 if (numeric_address_p (host)) 774 /* If host is an IP address, only check for the exact match. */ 775 passes = 1; 776 else 777 /* Otherwise, check all the subdomains except the top-level (last) 778 one. As a domain with N components has N-1 dots, the number of 779 passes equals the number of dots. */ 780 passes = count_char (host, '.'); 781 782 passcnt = 0; 783 784 /* Find chains that match HOST, starting with exact match and 785 progressing to less specific domains. For instance, given HOST 786 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then 787 srk.fer.hr's, then fer.hr's. */ 788 while (1) 789 { 790 struct cookie *chain = hash_table_get (jar->chains, host); 791 if (chain) 792 dest[dest_count++] = chain; 793 if (++passcnt >= passes) 794 break; 795 host = strchr (host, '.') + 1; 796 } 797 798 return dest_count; 799} 800 801/* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero 802 otherwise. */ 803 804static int 805path_matches (const char *full_path, const char *prefix) 806{ 807 int len = strlen (prefix); 808 809 if (0 != strncmp (full_path, prefix, len)) 810 /* FULL_PATH doesn't begin with PREFIX. */ 811 return 0; 812 813 /* Length of PREFIX determines the quality of the match. */ 814 return len + 1; 815} 816 817/* Return true iff COOKIE matches the provided parameters of the URL 818 being downloaded: HOST, PORT, PATH, and SECFLAG. 819 820 If PATH_GOODNESS is non-NULL, store the "path goodness" value 821 there. That value is a measure of how closely COOKIE matches PATH, 822 used for ordering cookies. */ 823 824static bool 825cookie_matches_url (const struct cookie *cookie, 826 const char *host, int port, const char *path, 827 bool secflag, int *path_goodness) 828{ 829 int pg; 830 831 if (cookie_expired_p (cookie)) 832 /* Ignore stale cookies. Don't bother unchaining the cookie at 833 this point -- Wget is a relatively short-lived application, and 834 stale cookies will not be saved by `save_cookies'. On the 835 other hand, this function should be as efficient as 836 possible. */ 837 return false; 838 839 if (cookie->secure && !secflag) 840 /* Don't transmit secure cookies over insecure connections. */ 841 return false; 842 if (cookie->port != PORT_ANY && cookie->port != port) 843 return false; 844 845 /* If exact domain match is required, verify that cookie's domain is 846 equal to HOST. If not, assume success on the grounds of the 847 cookie's chain having been found by find_chains_of_host. */ 848 if (cookie->domain_exact 849 && 0 != strcasecmp (host, cookie->domain)) 850 return false; 851 852 pg = path_matches (path, cookie->path); 853 if (pg == 0) 854 return false; 855 856 if (path_goodness) 857 /* If the caller requested path_goodness, we return it. This is 858 an optimization, so that the caller doesn't need to call 859 path_matches() again. */ 860 *path_goodness = pg; 861 return true; 862} 863 864/* A structure that points to a cookie, along with the additional 865 information about the cookie's "goodness". This allows us to sort 866 the cookies when returning them to the server, as required by the 867 spec. */ 868 869struct weighed_cookie { 870 struct cookie *cookie; 871 int domain_goodness; 872 int path_goodness; 873}; 874 875/* Comparator used for uniquifying the list. */ 876 877static int 878equality_comparator (const void *p1, const void *p2) 879{ 880 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1; 881 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2; 882 883 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr); 884 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value); 885 886 /* We only really care whether both name and value are equal. We 887 return them in this order only for consistency... */ 888 return namecmp ? namecmp : valuecmp; 889} 890 891/* Eliminate duplicate cookies. "Duplicate cookies" are any two 892 cookies with the same attr name and value. Whenever a duplicate 893 pair is found, one of the cookies is removed. */ 894 895static int 896eliminate_dups (struct weighed_cookie *outgoing, int count) 897{ 898 struct weighed_cookie *h; /* hare */ 899 struct weighed_cookie *t; /* tortoise */ 900 struct weighed_cookie *end = outgoing + count; 901 902 /* We deploy a simple uniquify algorithm: first sort the array 903 according to our sort criteria, then copy it to itself, comparing 904 each cookie to its neighbor and ignoring the duplicates. */ 905 906 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator); 907 908 /* "Hare" runs through all the entries in the array, followed by 909 "tortoise". If a duplicate is found, the hare skips it. 910 Non-duplicate entries are copied to the tortoise ptr. */ 911 912 for (h = t = outgoing; h < end; h++) 913 { 914 if (h != end - 1) 915 { 916 struct cookie *c0 = h[0].cookie; 917 struct cookie *c1 = h[1].cookie; 918 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value)) 919 continue; /* ignore the duplicate */ 920 } 921 922 /* If the hare has advanced past the tortoise (because of 923 previous dups), make sure the values get copied. Otherwise, 924 no copying is necessary. */ 925 if (h != t) 926 *t++ = *h; 927 else 928 t++; 929 } 930 return t - outgoing; 931} 932 933/* Comparator used for sorting by quality. */ 934 935static int 936goodness_comparator (const void *p1, const void *p2) 937{ 938 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1; 939 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2; 940 941 /* Subtractions take `wc2' as the first argument becauase we want a 942 sort in *decreasing* order of goodness. */ 943 int dgdiff = wc2->domain_goodness - wc1->domain_goodness; 944 int pgdiff = wc2->path_goodness - wc1->path_goodness; 945 946 /* Sort by domain goodness; if these are the same, sort by path 947 goodness. (The sorting order isn't really specified; maybe it 948 should be the other way around.) */ 949 return dgdiff ? dgdiff : pgdiff; 950} 951 952/* Generate a `Cookie' header for a request that goes to HOST:PORT and 953 requests PATH from the server. The resulting string is allocated 954 with `malloc', and the caller is responsible for freeing it. If no 955 cookies pertain to this request, i.e. no cookie header should be 956 generated, NULL is returned. */ 957 958char * 959cookie_header (struct cookie_jar *jar, const char *host, 960 int port, const char *path, bool secflag) 961{ 962 struct cookie **chains; 963 int chain_count; 964 965 struct cookie *cookie; 966 struct weighed_cookie *outgoing; 967 int count, i, ocnt; 968 char *result; 969 int result_size, pos; 970 PREPEND_SLASH (path); /* see cookie_handle_set_cookie */ 971 972 /* First, find the cookie chains whose domains match HOST. */ 973 974 /* Allocate room for find_chains_of_host to write to. The number of 975 chains can at most equal the number of subdomains, hence 976 1+<number of dots>. */ 977 chains = alloca_array (struct cookie *, 1 + count_char (host, '.')); 978 chain_count = find_chains_of_host (jar, host, chains); 979 980 /* No cookies for this host. */ 981 if (!chain_count) 982 return NULL; 983 984 cookies_now = time (NULL); 985 986 /* Now extract from the chains those cookies that match our host 987 (for domain_exact cookies), port (for cookies with port other 988 than PORT_ANY), etc. See matching_cookie for details. */ 989 990 /* Count the number of matching cookies. */ 991 count = 0; 992 for (i = 0; i < chain_count; i++) 993 for (cookie = chains[i]; cookie; cookie = cookie->next) 994 if (cookie_matches_url (cookie, host, port, path, secflag, NULL)) 995 ++count; 996 if (!count) 997 return NULL; /* no cookies matched */ 998 999 /* Allocate the array. */ 1000 outgoing = alloca_array (struct weighed_cookie, count); 1001 1002 /* Fill the array with all the matching cookies from the chains that 1003 match HOST. */ 1004 ocnt = 0; 1005 for (i = 0; i < chain_count; i++) 1006 for (cookie = chains[i]; cookie; cookie = cookie->next) 1007 { 1008 int pg; 1009 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg)) 1010 continue; 1011 outgoing[ocnt].cookie = cookie; 1012 outgoing[ocnt].domain_goodness = strlen (cookie->domain); 1013 outgoing[ocnt].path_goodness = pg; 1014 ++ocnt; 1015 } 1016 assert (ocnt == count); 1017 1018 /* Eliminate duplicate cookies; that is, those whose name and value 1019 are the same. */ 1020 count = eliminate_dups (outgoing, count); 1021 1022 /* Sort the array so that best-matching domains come first, and 1023 that, within one domain, best-matching paths come first. */ 1024 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator); 1025 1026 /* Count the space the name=value pairs will take. */ 1027 result_size = 0; 1028 for (i = 0; i < count; i++) 1029 { 1030 struct cookie *c = outgoing[i].cookie; 1031 /* name=value */ 1032 result_size += strlen (c->attr) + 1 + strlen (c->value); 1033 } 1034 1035 /* Allocate output buffer: 1036 name=value pairs -- result_size 1037 "; " separators -- (count - 1) * 2 1038 \0 terminator -- 1 */ 1039 result_size = result_size + (count - 1) * 2 + 1; 1040 result = xmalloc (result_size); 1041 pos = 0; 1042 for (i = 0; i < count; i++) 1043 { 1044 struct cookie *c = outgoing[i].cookie; 1045 int namlen = strlen (c->attr); 1046 int vallen = strlen (c->value); 1047 1048 memcpy (result + pos, c->attr, namlen); 1049 pos += namlen; 1050 result[pos++] = '='; 1051 memcpy (result + pos, c->value, vallen); 1052 pos += vallen; 1053 if (i < count - 1) 1054 { 1055 result[pos++] = ';'; 1056 result[pos++] = ' '; 1057 } 1058 } 1059 result[pos++] = '\0'; 1060 assert (pos == result_size); 1061 return result; 1062} 1063 1064/* Support for loading and saving cookies. The format used for 1065 loading and saving should be the format of the `cookies.txt' file 1066 used by Netscape and Mozilla, at least the Unix versions. 1067 (Apparently IE can export cookies in that format as well.) The 1068 format goes like this: 1069 1070 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE 1071 1072 DOMAIN -- cookie domain, optionally followed by :PORT 1073 DOMAIN-FLAG -- whether all hosts in the domain match 1074 PATH -- cookie path 1075 SECURE-FLAG -- whether cookie requires secure connection 1076 TIMESTAMP -- expiry timestamp, number of seconds since epoch 1077 ATTR-NAME -- name of the cookie attribute 1078 ATTR-VALUE -- value of the cookie attribute (empty if absent) 1079 1080 The fields are separated by TABs. All fields are mandatory, except 1081 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values 1082 being "TRUE" and "FALSE'. Empty lines, lines consisting of 1083 whitespace only, and comment lines (beginning with # optionally 1084 preceded by whitespace) are ignored. 1085 1086 Example line from cookies.txt (split in two lines for readability): 1087 1088 .google.com TRUE / FALSE 2147368447 \ 1089 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012 1090 1091*/ 1092 1093/* If the region [B, E) ends with :<digits>, parse the number, return 1094 it, and store new boundary (location of the `:') to DOMAIN_E_PTR. 1095 If port is not specified, return 0. */ 1096 1097static int 1098domain_port (const char *domain_b, const char *domain_e, 1099 const char **domain_e_ptr) 1100{ 1101 int port = 0; 1102 const char *p; 1103 const char *colon = memchr (domain_b, ':', domain_e - domain_b); 1104 if (!colon) 1105 return 0; 1106 for (p = colon + 1; p < domain_e && c_isdigit (*p); p++) 1107 port = 10 * port + (*p - '0'); 1108 if (p < domain_e) 1109 /* Garbage following port number. */ 1110 return 0; 1111 *domain_e_ptr = colon; 1112 return port; 1113} 1114 1115#define GET_WORD(p, b, e) do { \ 1116 b = p; \ 1117 while (*p && *p != '\t') \ 1118 ++p; \ 1119 e = p; \ 1120 if (b == e || !*p) \ 1121 goto next; \ 1122 ++p; \ 1123} while (0) 1124 1125/* Load cookies from FILE. */ 1126 1127void 1128cookie_jar_load (struct cookie_jar *jar, const char *file) 1129{ 1130 char *line; 1131 FILE *fp = fopen (file, "r"); 1132 if (!fp) 1133 { 1134 logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"), 1135 quote (file), strerror (errno)); 1136 return; 1137 } 1138 cookies_now = time (NULL); 1139 1140 for (; ((line = read_whole_line (fp)) != NULL); xfree (line)) 1141 { 1142 struct cookie *cookie; 1143 char *p = line; 1144 1145 double expiry; 1146 int port; 1147 1148 char *domain_b = NULL, *domain_e = NULL; 1149 char *domflag_b = NULL, *domflag_e = NULL; 1150 char *path_b = NULL, *path_e = NULL; 1151 char *secure_b = NULL, *secure_e = NULL; 1152 char *expires_b = NULL, *expires_e = NULL; 1153 char *name_b = NULL, *name_e = NULL; 1154 char *value_b = NULL, *value_e = NULL; 1155 1156 /* Skip leading white-space. */ 1157 while (*p && c_isspace (*p)) 1158 ++p; 1159 /* Ignore empty lines. */ 1160 if (!*p || *p == '#') 1161 continue; 1162 1163 GET_WORD (p, domain_b, domain_e); 1164 GET_WORD (p, domflag_b, domflag_e); 1165 GET_WORD (p, path_b, path_e); 1166 GET_WORD (p, secure_b, secure_e); 1167 GET_WORD (p, expires_b, expires_e); 1168 GET_WORD (p, name_b, name_e); 1169 1170 /* Don't use GET_WORD for value because it ends with newline, 1171 not TAB. */ 1172 value_b = p; 1173 value_e = p + strlen (p); 1174 if (value_e > value_b && value_e[-1] == '\n') 1175 --value_e; 1176 if (value_e > value_b && value_e[-1] == '\r') 1177 --value_e; 1178 /* Empty values are legal (I think), so don't bother checking. */ 1179 1180 cookie = cookie_new (); 1181 1182 cookie->attr = strdupdelim (name_b, name_e); 1183 cookie->value = strdupdelim (value_b, value_e); 1184 cookie->path = strdupdelim (path_b, path_e); 1185 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE"); 1186 1187 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE 1188 value indicating if all machines within a given domain can 1189 access the variable. This value is set automatically by the 1190 browser, depending on the value set for the domain." */ 1191 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE"); 1192 1193 /* DOMAIN needs special treatment because we might need to 1194 extract the port. */ 1195 port = domain_port (domain_b, domain_e, (const char **)&domain_e); 1196 if (port) 1197 cookie->port = port; 1198 1199 if (*domain_b == '.') 1200 ++domain_b; /* remove leading dot internally */ 1201 cookie->domain = strdupdelim (domain_b, domain_e); 1202 1203 /* safe default in case EXPIRES field is garbled. */ 1204 expiry = (double)cookies_now - 1; 1205 1206 /* I don't like changing the line, but it's safe here. (line is 1207 malloced.) */ 1208 *expires_e = '\0'; 1209 sscanf (expires_b, "%lf", &expiry); 1210 1211 if (expiry == 0) 1212 { 1213 /* EXPIRY can be 0 for session cookies saved because the 1214 user specified `--keep-session-cookies' in the past. 1215 They remain session cookies, and will be saved only if 1216 the user has specified `keep-session-cookies' again. */ 1217 } 1218 else 1219 { 1220 if (expiry < cookies_now) 1221 goto abort_cookie; /* ignore stale cookie. */ 1222 cookie->expiry_time = expiry; 1223 cookie->permanent = 1; 1224 } 1225 1226 store_cookie (jar, cookie); 1227 1228 next: 1229 continue; 1230 1231 abort_cookie: 1232 delete_cookie (cookie); 1233 } 1234 fclose (fp); 1235} 1236 1237/* Save cookies, in format described above, to FILE. */ 1238 1239void 1240cookie_jar_save (struct cookie_jar *jar, const char *file) 1241{ 1242 FILE *fp; 1243 hash_table_iterator iter; 1244 1245 DEBUGP (("Saving cookies to %s.\n", file)); 1246 1247 cookies_now = time (NULL); 1248 1249 fp = fopen (file, "w"); 1250 if (!fp) 1251 { 1252 logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"), 1253 quote (file), strerror (errno)); 1254 return; 1255 } 1256 1257 fputs ("# HTTP cookie file.\n", fp); 1258 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (cookies_now)); 1259 fputs ("# Edit at your own risk.\n\n", fp); 1260 1261 for (hash_table_iterate (jar->chains, &iter); 1262 hash_table_iter_next (&iter); 1263 ) 1264 { 1265 const char *domain = iter.key; 1266 struct cookie *cookie = iter.value; 1267 for (; cookie; cookie = cookie->next) 1268 { 1269 if (!cookie->permanent && !opt.keep_session_cookies) 1270 continue; 1271 if (cookie_expired_p (cookie)) 1272 continue; 1273 if (!cookie->domain_exact) 1274 fputc ('.', fp); 1275 fputs (domain, fp); 1276 if (cookie->port != PORT_ANY) 1277 fprintf (fp, ":%d", cookie->port); 1278 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n", 1279 cookie->domain_exact ? "FALSE" : "TRUE", 1280 cookie->path, cookie->secure ? "TRUE" : "FALSE", 1281 (double)cookie->expiry_time, 1282 cookie->attr, cookie->value); 1283 if (ferror (fp)) 1284 goto out; 1285 } 1286 } 1287 out: 1288 if (ferror (fp)) 1289 logprintf (LOG_NOTQUIET, _("Error writing to %s: %s\n"), 1290 quote (file), strerror (errno)); 1291 if (fclose (fp) < 0) 1292 logprintf (LOG_NOTQUIET, _("Error closing %s: %s\n"), 1293 quote (file), strerror (errno)); 1294 1295 DEBUGP (("Done saving cookies.\n")); 1296} 1297 1298/* Clean up cookie-related data. */ 1299 1300void 1301cookie_jar_delete (struct cookie_jar *jar) 1302{ 1303 /* Iterate over chains (indexed by domain) and free them. */ 1304 hash_table_iterator iter; 1305 for (hash_table_iterate (jar->chains, &iter); hash_table_iter_next (&iter); ) 1306 { 1307 struct cookie *chain = iter.value; 1308 xfree (iter.key); 1309 /* Then all cookies in this chain. */ 1310 while (chain) 1311 { 1312 struct cookie *next = chain->next; 1313 delete_cookie (chain); 1314 chain = next; 1315 } 1316 } 1317 hash_table_destroy (jar->chains); 1318 xfree (jar); 1319} 1320 1321/* Test cases. Currently this is only tests parse_set_cookies. To 1322 use, recompile Wget with -DTEST_COOKIES and call test_cookies() 1323 from main. */ 1324 1325#ifdef TEST_COOKIES 1326void 1327test_cookies (void) 1328{ 1329 /* Tests expected to succeed: */ 1330 static struct { 1331 const char *data; 1332 const char *results[10]; 1333 } tests_succ[] = { 1334 { "arg=value", {"arg", "value", NULL} }, 1335 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} }, 1336 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} }, 1337 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} }, 1338 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} }, 1339 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} }, 1340 { "arg=", {"arg", "", NULL} }, 1341 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} }, 1342 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} }, 1343 }; 1344 1345 /* Tests expected to fail: */ 1346 static char *tests_fail[] = { 1347 ";", 1348 "arg=\"unterminated", 1349 "=empty-name", 1350 "arg1=;=another-empty-name", 1351 }; 1352 int i; 1353 1354 for (i = 0; i < countof (tests_succ); i++) 1355 { 1356 int ind; 1357 const char *data = tests_succ[i].data; 1358 const char **expected = tests_succ[i].results; 1359 struct cookie *c; 1360 1361 c = parse_set_cookie (data, true); 1362 if (!c) 1363 { 1364 printf ("NULL cookie returned for valid data: %s\n", data); 1365 continue; 1366 } 1367 1368 /* Test whether extract_param handles these cases correctly. */ 1369 { 1370 param_token name, value; 1371 const char *ptr = data; 1372 int j = 0; 1373 while (extract_param (&ptr, &name, &value, ';')) 1374 { 1375 char *n = strdupdelim (name.b, name.e); 1376 char *v = strdupdelim (value.b, value.e); 1377 if (!expected[j]) 1378 { 1379 printf ("Too many parameters for '%s'\n", data); 1380 break; 1381 } 1382 if (0 != strcmp (expected[j], n)) 1383 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n", 1384 j / 2 + 1, data, expected[j], n); 1385 if (0 != strcmp (expected[j + 1], v)) 1386 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n", 1387 j / 2 + 1, data, expected[j + 1], v); 1388 j += 2; 1389 free (n); 1390 free (v); 1391 } 1392 if (expected[j]) 1393 printf ("Too few parameters for '%s'\n", data); 1394 } 1395 } 1396 1397 for (i = 0; i < countof (tests_fail); i++) 1398 { 1399 struct cookie *c; 1400 char *data = tests_fail[i]; 1401 c = parse_set_cookie (data, true); 1402 if (c) 1403 printf ("Failed to report error on invalid data: %s\n", data); 1404 } 1405} 1406#endif /* TEST_COOKIES */ 1407