From 73cfa618b4e9c91688cbf3f3f035e1774a0c2947 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Sun, 3 Feb 2019 11:48:43 +0000 Subject: pagetitle.t, linkpage.t, titlepage.t: Exercise Unicode more Signed-off-by: Simon McVittie --- t/linkpage.t | 8 +++++++- t/pagetitle.t | 37 +++++++++++++++++++++++++++++++++---- t/titlepage.t | 8 +++++++- 3 files changed, 47 insertions(+), 6 deletions(-) (limited to 't') diff --git a/t/linkpage.t b/t/linkpage.t index 8085de153..db5fc7ae5 100755 --- a/t/linkpage.t +++ b/t/linkpage.t @@ -1,7 +1,7 @@ #!/usr/bin/perl use warnings; use strict; -use Test::More tests => 7; +use Test::More; BEGIN { use_ok("IkiWiki"); } @@ -11,3 +11,9 @@ is(linkpage("foo bar/baz"), "foo_bar/baz"); is(linkpage("foo bar&baz"), "foo_bar__38__baz"); is(linkpage("foo bar & baz"), "foo_bar___38___baz"); is(linkpage("foo bar_baz"), "foo_bar_baz"); +is(linkpage("foo bar\xACbaz"), "foo_bar__172__baz", 'U+00AC is in Latin-1 range'); +is(linkpage("foo bar\x{04D2}baz"), "foo_bar\x{04D2}baz", 'U+04D2 is alphanumeric'); +is(linkpage("foo bar\x{2260}baz"), "foo_bar__8800__baz", 'U+2260 is nonalphanumeric'); +is(linkpage("foo bar\x{0001F4A9}baz"), "foo_bar__128169__baz", 'U+1F4A9 is outside BMP'); + +done_testing; diff --git a/t/pagetitle.t b/t/pagetitle.t index d9aa62063..d00d9d297 100755 --- a/t/pagetitle.t +++ b/t/pagetitle.t @@ -1,13 +1,42 @@ #!/usr/bin/perl use warnings; use strict; -use Test::More tests => 7; +use Test::More; BEGIN { use_ok("IkiWiki"); } +# pagetitle(x) => XML-escaped form of page title is(pagetitle("foo_bar"), "foo bar"); is(pagetitle("foo_bar_baz"), "foo bar baz"); is(pagetitle("foo_bar__33__baz"), "foo bar!baz"); -is(pagetitle("foo_bar__1234__baz"), "foo barӒbaz"); -is(pagetitle("foo_bar___33___baz"), "foo bar ! baz"); -is(pagetitle("foo_bar___95___baz"), "foo bar _ baz"); +# Ӓ is U+04D2 CYRILLIC CAPITAL LETTER A WITH DIAERESIS +is(pagetitle("foo_bar__1234__baz"), "foo barӒbaz", 'Unicode in BMP'); +# ≠ is U+2260 NOT EQUAL TO +is(pagetitle("foo_bar__8800__baz"), "foo bar≠baz", 'Unicode in BMP'); +is(pagetitle("foo_bar___33___baz"), "foo bar ! baz", 'Exclamation mark'); +is(pagetitle("foo_bar___95___baz"), "foo bar _ baz", 'Underscore'); +# Outside basic multilingual plane: 💩 is U+1F4A9 PILE OF POO +is(pagetitle("foo_bar__128169__baz"), "foo bar💩baz", 'Unicode outside BMP'); + +# pagetitle(x, false) => same +is(pagetitle("foo_bar__33__baz", 0), "foo bar!baz"); +is(pagetitle("foo_bar__1234__baz", undef), "foo barӒbaz", 'Unicode in BMP'); +is(pagetitle("foo_bar__8800__baz", undef), "foo bar≠baz", 'Unicode in BMP'); +is(pagetitle("foo_bar___33___baz", ""), "foo bar ! baz", 'Exclamation mark'); +is(pagetitle("foo_bar___95___baz", 0), "foo bar _ baz", 'Underscore'); +is(pagetitle("foo_bar__128169__baz", 0), "foo bar💩baz", 'Unicode outside BMP'); + +# pagetitle(x, true) => unescaped form of page title +is(pagetitle("foo_bar", 1), "foo bar"); +is(pagetitle("foo_bar_baz", 'unescaped'), "foo bar baz"); +is(pagetitle("foo_bar__33__baz", 42), "foo bar!baz"); +is(chr(1234), "\x{04D2}"); +is(pagetitle("foo_bar__1234__baz", 1), "foo bar\x{04D2}baz", 'Unicode in BMP'); +is(chr(8800), "\x{2260}"); +is(pagetitle("foo_bar__8800__baz", 1), "foo bar\x{2260}baz", 'Unicode in BMP'); +is(pagetitle("foo_bar___33___baz", 1), "foo bar ! baz"); +is(pagetitle("foo_bar___95___baz", 1), "foo bar _ baz"); +is(chr(128169), "\x{0001F4A9}"); +is(pagetitle("foo_bar__128169__baz", 1), "foo bar\x{0001F4A9}baz", 'Unicode outside BMP'); + +done_testing; diff --git a/t/titlepage.t b/t/titlepage.t index 5df33423e..d6bb1b121 100755 --- a/t/titlepage.t +++ b/t/titlepage.t @@ -1,7 +1,7 @@ #!/usr/bin/perl use warnings; use strict; -use Test::More tests => 7; +use Test::More; BEGIN { use_ok("IkiWiki"); } @@ -11,3 +11,9 @@ is(titlepage("foo bar/baz"), "foo_bar/baz"); is(titlepage("foo bar&baz"), "foo_bar__38__baz"); is(titlepage("foo bar & baz"), "foo_bar___38___baz"); is(titlepage("foo bar_baz"), "foo_bar__95__baz"); +is(titlepage("foo bar\xACbaz"), "foo_bar__172__baz", 'U+00AC is in Latin-1 range'); +is(titlepage("foo bar\x{04D2}baz"), "foo_bar\x{04D2}baz", 'U+04D2 is alphanumeric'); +is(titlepage("foo bar\x{2260}baz"), "foo_bar__8800__baz", 'U+2260 is nonalphanumeric'); +is(titlepage("foo bar\x{0001F4A9}baz"), "foo_bar__128169__baz", 'U+1F4A9 is outside BMP'); + +done_testing; -- cgit v1.2.3