1#!/usr/bin/perl 2 3use strict; 4use warnings; 5 6use WgetFeature qw(iri); 7use HTTPTest; 8 9# cf. http://en.wikipedia.org/wiki/Latin1 10# http://en.wikipedia.org/wiki/ISO-8859-15 11############################################################################### 12# 13# mime : charset found in Content-Type HTTP MIME header 14# meta : charset found in Content-Type meta tag 15# 16# index.html mime + file = iso-8859-15 17# p1_français.html meta + file = iso-8859-1, mime = utf-8 18# p2_één.html meta + file = utf-8, mime =iso-8859-1 19# 20 21my $ccedilla_l1 = "\xE7"; 22my $ccedilla_u8 = "\xC3\xA7"; 23my $eacute_l1 = "\xE9"; 24my $eacute_u8 = "\xC3\xA9"; 25 26my $urllist = <<EOF; 27http://localhost:{{port}}/ 28http://localhost:{{port}}/p1_fran${ccedilla_l1}ais.html 29http://localhost:{{port}}/p2_${eacute_l1}${eacute_l1}n.html 30EOF 31 32my $pageindex = <<EOF; 33<html> 34<head> 35 <title>Main Page</title> 36</head> 37<body> 38 <p> 39 Main page. 40 </p> 41</body> 42</html> 43EOF 44 45my $pagefrancais = <<EOF; 46<html> 47<head> 48 <title>La seule page en fran��ais</title> 49 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"/> 50</head> 51<body> 52 <p> 53 French page. 54 </p> 55</body> 56</html> 57EOF 58 59my $pageeen = <<EOF; 60<html> 61<head> 62 <title>Die enkele nederlandstalige pagina</title> 63 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> 64</head> 65<body> 66 <p> 67 Dutch page. 68 </p> 69</body> 70</html> 71EOF 72 73my $page404 = <<EOF; 74<html> 75<head> 76 <title>404</title> 77</head> 78<body> 79 <p> 80 Nop nop nop... 81 </p> 82</body> 83</html> 84EOF 85 86# code, msg, headers, content 87my %urls = ( 88 '/index.html' => { 89 code => "200", 90 msg => "Ok", 91 headers => { 92 "Content-type" => "text/html; charset=ISO-8859-15", 93 }, 94 content => $pageindex, 95 }, 96 '/robots.txt' => { 97 code => "200", 98 msg => "Ok", 99 headers => { 100 "Content-type" => "text/plain", 101 }, 102 content => "", 103 }, 104 '/p1_fran%C3%A7ais.html' => { # UTF-8 encoded 105 code => "404", 106 msg => "File not found", 107 headers => { 108 "Content-type" => "text/html; charset=UTF-8", 109 }, 110 content => $page404, 111 }, 112 '/p1_fran%E7ais.html' => { 113 code => "200", 114 msg => "Ok", 115 headers => { 116 "Content-type" => "text/html; charset=UTF-8", 117 }, 118 content => $pagefrancais, 119 }, 120 '/p2_%C3%A9%C3%A9n.html' => { # UTF-8 encoded 121 code => "200", 122 msg => "Ok", 123 headers => { 124 "Content-type" => "text/html; charset=ISO-8859-1", 125 }, 126 content => $pageeen, 127 }, 128 '/p2_%E9%E9n.html' => { 129 code => "200", 130 msg => "Ok", 131 headers => { 132 "Content-type" => "text/html; charset=ISO-8859-1", 133 }, 134 content => $pageeen, 135 }, 136 '/url_list.txt' => { 137 code => "200", 138 msg => "Ok", 139 headers => { 140 "Content-type" => "text/plain; charset=ISO-8859-1", 141 }, 142 content => $urllist, 143 }, 144); 145 146my $cmdline = $WgetTest::WGETPATH . " --iri -d -i http://localhost:{{port}}/url_list.txt"; 147 148my $expected_error_code = 0; 149 150my %expected_downloaded_files = ( 151 'url_list.txt' => { 152 content => $urllist, 153 }, 154 'index.html' => { 155 content => $pageindex, 156 }, 157 "p1_fran${ccedilla_l1}ais.html" => { 158 content => $pagefrancais, 159 }, 160 "p2_${eacute_u8}${eacute_u8}n.html" => { 161 content => $pageeen, 162 }, 163); 164 165############################################################################### 166 167my $the_test = HTTPTest->new (name => "Test-iri-list", 168 input => \%urls, 169 cmdline => $cmdline, 170 errcode => $expected_error_code, 171 output => \%expected_downloaded_files); 172exit $the_test->run(); 173 174# vim: et ts=4 sw=4 175 176