17001Sfzhinkin#!/usr/bin/perl 211833Sctornqvi 37001Sfzhinkinuse strict; 47001Sfzhinkinuse warnings; 57001Sfzhinkin 67001Sfzhinkinuse HTTPTest; 77001Sfzhinkin 87001Sfzhinkin# This test checks that Wget parses "nofollow" when it appears in <meta 97001Sfzhinkin# name="robots"> tags, regardless of where in a list of comma-separated 107001Sfzhinkin# values it appears, and regardless of spelling. 117001Sfzhinkin# 127001Sfzhinkin# Three different files contain links to the file "bombshell.html", each 137001Sfzhinkin# with "nofollow" set, at various positions in a list of values for a 147001Sfzhinkin# <meta name="robots"> tag, and with various degrees of separating 157001Sfzhinkin# whitesspace. If bombshell.html is downloaded, the test 167001Sfzhinkin# has failed. 177001Sfzhinkin 187001Sfzhinkin############################################################################### 197001Sfzhinkin 207001Sfzhinkinmy $nofollow_start = <<EOF; 217001Sfzhinkin<meta name="roBoTS" content="noFolLow , foo, bar "> 227001Sfzhinkin<a href="/bombshell.html">Don't follow me!</a> 237001SfzhinkinEOF 247001Sfzhinkin 257001Sfzhinkinmy $nofollow_mid = <<EOF; 267001Sfzhinkin<meta name="rObOts" content=" foo , NOfOllow , bar "> 277001Sfzhinkin<a href="/bombshell.html">Don't follow me!</a> 2811833SctornqviEOF 2910551Schegar 308013Sykantsermy $nofollow_end = <<EOF; 3111707Stpivovarova<meta name="RoBotS" content="foo,BAr, nofOLLOw "> 3211833Sctornqvi<a href="/bombshell.html">Don't follow me!</a> 3311534SiignatyevEOF 3411534Siignatyev 357001Sfzhinkinmy $nofollow_solo = <<EOF; 367001Sfzhinkin<meta name="robots" content="nofollow"> 377001Sfzhinkin<a href="/bombshell.html">Don't follow me!</a> 387001SfzhinkinEOF 397001Sfzhinkin 407001Sfzhinkin# code, msg, headers, content 417001Sfzhinkinmy %urls = ( 427001Sfzhinkin '/start.html' => { 4311707Stpivovarova code => "200", 4411707Stpivovarova msg => "Ok", 457001Sfzhinkin headers => { 467001Sfzhinkin "Content-type" => "text/html", 477001Sfzhinkin }, 487001Sfzhinkin content => $nofollow_start, 497001Sfzhinkin }, 507001Sfzhinkin '/mid.html' => { 517001Sfzhinkin code => "200", 5211707Stpivovarova msg => "Ok", 537001Sfzhinkin headers => { 547001Sfzhinkin "Content-type" => "text/html", 557001Sfzhinkin }, 567001Sfzhinkin content => $nofollow_mid, 577001Sfzhinkin }, 587001Sfzhinkin '/end.html' => { 5911707Stpivovarova code => "200", 6011707Stpivovarova msg => "Ok", 617001Sfzhinkin headers => { 6211534Siignatyev "Content-type" => "text/html", 637001Sfzhinkin }, 647001Sfzhinkin content => $nofollow_end, 6511534Siignatyev }, 6611707Stpivovarova '/solo.html' => { 6711534Siignatyev code => "200", 6811534Siignatyev msg => "Ok", 697001Sfzhinkin headers => { 707001Sfzhinkin "Content-type" => "text/html", 717001Sfzhinkin }, 727001Sfzhinkin content => $nofollow_solo, 737001Sfzhinkin }, 747001Sfzhinkin '/bombshell.html' => { 75 code => "200", 76 msg => "Ok", 77 headers => { 78 "Content-type" => "text/html", 79 }, 80 content => 'Hello', 81 }, 82); 83 84my $cmdline = $WgetTest::WGETPATH . " -r -nd " 85 . join(' ',(map "http://localhost:{{port}}/$_.html", 86 qw(start mid end solo))); 87 88my $expected_error_code = 0; 89 90my %expected_downloaded_files = ( 91 'start.html' => { 92 content => $nofollow_start, 93 }, 94 'mid.html' => { 95 content => $nofollow_mid, 96 }, 97 'end.html' => { 98 content => $nofollow_end, 99 }, 100 'solo.html' => { 101 content => $nofollow_solo, 102 } 103); 104 105############################################################################### 106 107my $the_test = HTTPTest->new (name => "Test-meta-robots", 108 input => \%urls, 109 cmdline => $cmdline, 110 errcode => $expected_error_code, 111 output => \%expected_downloaded_files); 112exit $the_test->run(); 113 114# vim: et ts=4 sw=4 115 116