mirror of
https://github.com/zdharma-continuum/zinit-module.git
synced 2024-11-21 13:27:58 +01:00
587 lines
14 KiB
Text
587 lines
14 KiB
Text
%prep
|
|
|
|
# Find a UTF-8 locale.
|
|
setopt multibyte
|
|
# Don't let LC_* override our choice of locale.
|
|
unset -m LC_\*
|
|
mb_ok=
|
|
langs=(en_{US,GB}.{UTF-,utf}8 en.UTF-8
|
|
$(locale -a 2>/dev/null | egrep 'utf8|UTF-8'))
|
|
for LANG in $langs; do
|
|
if [[ é = ? ]]; then
|
|
mb_ok=1
|
|
break;
|
|
fi
|
|
done
|
|
if [[ -z $mb_ok ]]; then
|
|
ZTST_unimplemented="no UTF-8 locale or multibyte mode is not implemented"
|
|
else
|
|
print -u $ZTST_fd Testing multibyte with locale $LANG
|
|
mkdir multibyte.tmp && cd multibyte.tmp
|
|
fi
|
|
|
|
%test
|
|
|
|
a=ténébreux
|
|
for i in {1..9}; do
|
|
print ${a[i]}
|
|
for j in {$i..9}; do
|
|
print $i $j ${a[i,j]} ${a[-j,-i]}
|
|
done
|
|
done
|
|
0:Basic indexing with multibyte characters
|
|
>t
|
|
>1 1 t x
|
|
>1 2 té ux
|
|
>1 3 tén eux
|
|
>1 4 téné reux
|
|
>1 5 ténéb breux
|
|
>1 6 ténébr ébreux
|
|
>1 7 ténébre nébreux
|
|
>1 8 ténébreu énébreux
|
|
>1 9 ténébreux ténébreux
|
|
>é
|
|
>2 2 é u
|
|
>2 3 én eu
|
|
>2 4 éné reu
|
|
>2 5 énéb breu
|
|
>2 6 énébr ébreu
|
|
>2 7 énébre nébreu
|
|
>2 8 énébreu énébreu
|
|
>2 9 énébreux ténébreu
|
|
>n
|
|
>3 3 n e
|
|
>3 4 né re
|
|
>3 5 néb bre
|
|
>3 6 nébr ébre
|
|
>3 7 nébre nébre
|
|
>3 8 nébreu énébre
|
|
>3 9 nébreux ténébre
|
|
>é
|
|
>4 4 é r
|
|
>4 5 éb br
|
|
>4 6 ébr ébr
|
|
>4 7 ébre nébr
|
|
>4 8 ébreu énébr
|
|
>4 9 ébreux ténébr
|
|
>b
|
|
>5 5 b b
|
|
>5 6 br éb
|
|
>5 7 bre néb
|
|
>5 8 breu énéb
|
|
>5 9 breux ténéb
|
|
>r
|
|
>6 6 r é
|
|
>6 7 re né
|
|
>6 8 reu éné
|
|
>6 9 reux téné
|
|
>e
|
|
>7 7 e n
|
|
>7 8 eu én
|
|
>7 9 eux tén
|
|
>u
|
|
>8 8 u é
|
|
>8 9 ux té
|
|
>x
|
|
>9 9 x t
|
|
|
|
s=é
|
|
print A${s[-2]}A B${s[-1]}B C${s[0]}C D${s[1]}D E${s[2]}E
|
|
0:Out of range subscripts with multibyte characters
|
|
>AA BéB CC DéD EE
|
|
|
|
print ${a[(i)é]} ${a[(I)é]} ${a[${a[(i)é]},${a[(I)é]}]}
|
|
0:Reverse indexing with multibyte characters
|
|
>2 4 éné
|
|
|
|
print ${a[(r)én,(r)éb]}
|
|
0:Subscript searching with multibyte characters
|
|
>énéb
|
|
|
|
print ${a[(rb:1:)é,-1]}
|
|
print ${a[(rb:2:)é,-1]}
|
|
print ${a[(rb:3:)é,-1]}
|
|
print ${a[(rb:4:)é,-1]}
|
|
print ${a[(rb:5:)é,-1]}
|
|
0:Subscript searching with initial offset
|
|
>énébreux
|
|
>énébreux
|
|
>ébreux
|
|
>ébreux
|
|
>
|
|
|
|
print ${a[(rn:1:)é,-1]}
|
|
print ${a[(rn:2:)é,-1]}
|
|
print ${a[(rn:3:)é,-1]}
|
|
0:Subscript searching with count
|
|
>énébreux
|
|
>ébreux
|
|
>
|
|
|
|
print ${a[(R)én,(R)éb]}
|
|
0:Backward subscript searching with multibyte characters
|
|
>énéb
|
|
|
|
# Starting offsets with (R) seem to be so strange as to be hardly
|
|
# worth testing.
|
|
|
|
setopt extendedglob
|
|
[[ $a = (#b)t(én)(éb)reux ]] || print "Failed to match." >&2
|
|
for i in {1..${#match}}; do
|
|
print $match[i] $mbegin[i] $mend[i] ${a[$mbegin[i],$mend[i]]}
|
|
done
|
|
0:Multibyte offsets in pattern tests
|
|
>én 2 3 én
|
|
>éb 4 5 éb
|
|
|
|
b=${(U)a}
|
|
print $b
|
|
print ${(L)b}
|
|
desdichado="Je suis le $a, le veuf, l'inconsolé"
|
|
print ${(C)desdichado}
|
|
lxiv="l'état c'est moi"
|
|
print ${(C)lxiv}
|
|
0:Case modification of multibyte strings
|
|
>TÉNÉBREUX
|
|
>ténébreux
|
|
>Je Suis Le Ténébreux, Le Veuf, L'Inconsolé
|
|
>L'État C'Est Moi
|
|
|
|
array=(ølaf ødd øpened án encyclopædia)
|
|
barray=(${(U)array})
|
|
print $barray
|
|
print ${(L)barray}
|
|
print ${(C)array}
|
|
print ${(C)barray}
|
|
0:Case modification of arrays with multibyte strings
|
|
>ØLAF ØDD ØPENED ÁN ENCYCLOPÆDIA
|
|
>ølaf ødd øpened án encyclopædia
|
|
>Ølaf Ødd Øpened Án Encyclopædia
|
|
>Ølaf Ødd Øpened Án Encyclopædia
|
|
|
|
print $(( ##¥ ))
|
|
pound=£
|
|
print $(( #pound ))
|
|
alpha=α
|
|
print $(( ##α )) $(( #alpha ))
|
|
0:Conversion to Unicode in mathematical expressions
|
|
>165
|
|
>163
|
|
>945 945
|
|
|
|
unsetopt posix_identifiers
|
|
expr='hähä=3 || exit 1; print $hähä'
|
|
eval $expr
|
|
setopt posix_identifiers
|
|
(eval $expr)
|
|
1:POSIX_IDENTIFIERS option
|
|
>3
|
|
?(eval):1: command not found: hähä=3
|
|
|
|
foo="Ølaf«Ødd«øpénëd«ån«àpple"
|
|
print -l ${(s.«.)foo}
|
|
ioh="Ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγος."
|
|
print -l ${=ioh}
|
|
print ${(w)#ioh}
|
|
0:Splitting with multibyte characters
|
|
>Ølaf
|
|
>Ødd
|
|
>øpénëd
|
|
>ån
|
|
>àpple
|
|
>Ἐν
|
|
>ἀρχῇ
|
|
>ἦν
|
|
>ὁ
|
|
>λόγος,
|
|
>καὶ
|
|
>ὁ
|
|
>λόγος
|
|
>ἦν
|
|
>πρὸς
|
|
>τὸν
|
|
>θεόν,
|
|
>καὶ
|
|
>θεὸς
|
|
>ἦν
|
|
>ὁ
|
|
>λόγος.
|
|
>17
|
|
|
|
read -d £ one
|
|
read -d £ two
|
|
print $one
|
|
print $two
|
|
0:read with multibyte delimiter
|
|
<first£second£
|
|
>first
|
|
>second
|
|
|
|
(IFS=«
|
|
read -d » -A array
|
|
print -l $array)
|
|
0:read -A with multibyte IFS
|
|
<dominus«illuminatio«mea»ignored
|
|
>dominus
|
|
>illuminatio
|
|
>mea
|
|
|
|
read -k2 -u0 twochars
|
|
print $twochars
|
|
0:read multibyte characters
|
|
<«»ignored
|
|
>«»
|
|
|
|
read -q -u0 mb
|
|
print $?
|
|
0:multibyte character makes read -q return false
|
|
<«
|
|
>1
|
|
|
|
# See if the system grokks first-century Greek...
|
|
ioh="Ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγος."
|
|
for (( i = 1; i <= ${#ioh}; i++ )); do
|
|
# FC3 doesn't recognise ῇ (U+1FC7: Greek small letter eta with
|
|
# perispomeni and ypogegrammeni, of course) as a lower case character.
|
|
if [[ $ioh[i] != [[:lower:]] && $i != 7 ]]; then
|
|
for tp in upper space punct invalid; do
|
|
if [[ $tp = invalid || $ioh[i] = [[:${tp}:]] ]]; then
|
|
print "$i: $tp"
|
|
break
|
|
fi
|
|
done
|
|
fi
|
|
done
|
|
0:isw* functions on non-ASCII wide characters
|
|
>1: upper
|
|
>3: space
|
|
>8: space
|
|
>11: space
|
|
>13: space
|
|
>19: punct
|
|
>20: space
|
|
>24: space
|
|
>26: space
|
|
>32: space
|
|
>35: space
|
|
>40: space
|
|
>44: space
|
|
>49: punct
|
|
>50: space
|
|
>54: space
|
|
>59: space
|
|
>62: space
|
|
>64: space
|
|
>70: punct
|
|
|
|
ioh="Ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγος"
|
|
print ${ioh#[[:alpha:]]##}
|
|
print ${ioh##[[:alpha:]]##}
|
|
print ${ioh%[[:alpha:]]##}
|
|
print ${ioh%%[[:alpha:]]##}
|
|
print ${(S)ioh#λ*ς}
|
|
print ${(S)ioh##λ*ς}
|
|
print ${(S)ioh%θ*ς}
|
|
print ${(S)ioh%%θ*ς}
|
|
0:Parameter #, ##, %, %% with multibyte characters
|
|
>ν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγος
|
|
> ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγος
|
|
>Ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγο
|
|
>Ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ
|
|
>Ἐν ἀρχῇ ἦν ὁ , καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγος
|
|
>Ἐν ἀρχῇ ἦν ὁ
|
|
>Ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ ἦν ὁ λόγος
|
|
>Ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ
|
|
|
|
a="1ë34ë6"
|
|
print ${(BEN)a#*4}
|
|
print ${(BEN)a##*ë}
|
|
print ${(BEN)a%4*}
|
|
print ${(BEN)a%%ë*}
|
|
print ${(SBEN)a#ë3}
|
|
print ${(SBEN)a%4ë}
|
|
0:Flags B, E, N and S in ${...#...} and ${...%...}
|
|
>1 5 4
|
|
>1 6 5
|
|
>4 7 3
|
|
>2 7 5
|
|
>2 4 2
|
|
>4 6 2
|
|
|
|
foo=(κατέβην χθὲς εἰς Πειραιᾶ)
|
|
print ${(l.3..¥.r.3..£.)foo}
|
|
print ${(l.4..¥.r.2..£.)foo}
|
|
print ${(l.5..¥.r.1..£.)foo}
|
|
print ${(l.4..¥..«.r.4..£..».)foo}
|
|
print ${(l.4..¥..Σωκράτης.r.4..£..Γλαύκωνος.)foo}
|
|
0:simultaneous left and right padding
|
|
>κατέβη ¥χθὲς£ ¥¥εἰς£ Πειραι
|
|
>¥κατέβ ¥¥χθὲς ¥¥¥εἰς ¥Πειρα
|
|
>¥¥κατέ ¥¥¥χθὲ ¥¥¥¥εἰ ¥¥Πειρ
|
|
>«κατέβην ¥«χθὲς»£ ¥¥«εἰς»£ «Πειραιᾶ
|
|
>ςκατέβην ηςχθὲςΓλ τηςεἰςΓλ ςΠειραιᾶ
|
|
# er... yeah, that looks right...
|
|
|
|
foo=picobarn
|
|
print ${foo:s£bar£rod£:s¥rod¥stick¥}
|
|
0:Delimiters in modifiers
|
|
>picostickn
|
|
|
|
# TODO: if we get paired multibyte bracket delimiters to work
|
|
# (as Emacs does, the smug so-and-so), the following should change.
|
|
foo=bar
|
|
print ${(r£5££X£)foo}
|
|
print ${(l«10««Y««HI«)foo}
|
|
0:Delimiters in parameter flags
|
|
>barXX
|
|
>YYYYYHIbar
|
|
|
|
printf "%4.3s\n" főobar
|
|
0:Multibyte characters in printf widths
|
|
> főo
|
|
|
|
# We ask for case-insensitive sorting here (and supply upper case
|
|
# characters) so that we exercise the logic in the shell that lowers the
|
|
# case of the string for case-insensitive sorting.
|
|
print -oi HÛH HÔH HÎH HÊH HÂH
|
|
(LC_ALL=C; print -oi HAH HUH HEH HÉH HÈH)
|
|
0:Multibyte characters in print sorting
|
|
>HÂH HÊH HÎH HÔH HÛH
|
|
>HAH HEH HUH HÈH HÉH
|
|
|
|
# These are control characters in Unicode, so don't show up.
|
|
# We just want to check they're not being treated as tokens.
|
|
for x in {128..150}; do
|
|
print ${(#)x}
|
|
done | while read line; do
|
|
print ${#line} $(( #line ))
|
|
done
|
|
0:evaluated character number with multibyte characters
|
|
>1 128
|
|
>1 129
|
|
>1 130
|
|
>1 131
|
|
>1 132
|
|
>1 133
|
|
>1 134
|
|
>1 135
|
|
>1 136
|
|
>1 137
|
|
>1 138
|
|
>1 139
|
|
>1 140
|
|
>1 141
|
|
>1 142
|
|
>1 143
|
|
>1 144
|
|
>1 145
|
|
>1 146
|
|
>1 147
|
|
>1 148
|
|
>1 149
|
|
>1 150
|
|
|
|
touch ngs1txt ngs2txt ngs10txt ngs20txt ngs100txt ngs200txt
|
|
setopt numericglobsort
|
|
print -l ngs*
|
|
0:NUMERIC_GLOB_SORT option in UTF-8 locale
|
|
>ngs1txt
|
|
>ngs2txt
|
|
>ngs10txt
|
|
>ngs20txt
|
|
>ngs100txt
|
|
>ngs200txt
|
|
|
|
# Not strictly multibyte, but gives us a well-defined locale for testing.
|
|
foo=$'X\xc0Y\x07Z\x7fT'
|
|
print -r ${(q)foo}
|
|
0:Backslash-quoting of unprintable/invalid characters uses $'...'
|
|
>X$'\300'Y$'\a'Z$'\177'T
|
|
|
|
# This also isn't strictly multibyte and is here to reduce the
|
|
# likelihood of a "cannot do character set conversion" error.
|
|
(print $'\u00e9') 2>&1 | read
|
|
if [[ $REPLY != é ]]; then
|
|
print "warning: your system can't do simple Unicode conversion." >&$ZTST_fd
|
|
print "Check you have a correctly installed iconv library." >&$ZTST_fd
|
|
# cheat
|
|
repeat 4 print OK
|
|
else
|
|
testfn() { (LC_ALL=C; print $'\u00e9') }
|
|
repeat 4 testfn 2>&1 | while read line; do
|
|
if [[ $line = *"character not in range"* ]]; then
|
|
print OK
|
|
elif [[ $line = "?" ]]; then
|
|
print OK
|
|
else
|
|
print Failed: no error message and no question mark
|
|
fi
|
|
done
|
|
fi
|
|
true
|
|
0:error handling in Unicode quoting
|
|
>OK
|
|
>OK
|
|
>OK
|
|
>OK
|
|
|
|
tmp1='glob/\(\)Ą/*'
|
|
[[ glob/'()Ą'/foo == $~tmp1 ]] && print "Matched against $tmp1"
|
|
tmp1='glob/\(\)Ā/*'
|
|
[[ glob/'()Ā'/bar == $~tmp1 ]] && print "Matched against $tmp1"
|
|
0:Backslashes and metafied characters in patterns
|
|
>Matched against glob/()Ą/*
|
|
>Matched against glob/()Ā/*
|
|
|
|
mkdir 梶浦由記 'Пётр Ильич Чайковский'
|
|
(cd 梶浦由記; print ${${(%):-%~}:t})
|
|
(cd 'Пётр Ильич Чайковский'; print ${${(%):-%~}:t})
|
|
0:Metafied characters in prompt expansion
|
|
>梶浦由記
|
|
>Пётр Ильич Чайковский
|
|
|
|
(
|
|
setopt nonomatch
|
|
tmp1=Ą
|
|
tmpA=(Ą 'Пётр Ильич Чайковский' 梶浦由記)
|
|
print ${tmp1} ${(%)tmp1} ${(%%)tmp1}
|
|
print ${#tmp1} ${#${(%)tmp1}} ${#${(%%)tmp1}}
|
|
print ${tmpA}
|
|
print ${(%)tmpA}
|
|
print ${(%%)tmpA}
|
|
)
|
|
0:More metafied characters in prompt expansion
|
|
>Ą Ą Ą
|
|
>1 1 1
|
|
>Ą Пётр Ильич Чайковский 梶浦由記
|
|
>Ą Пётр Ильич Чайковский 梶浦由記
|
|
>Ą Пётр Ильич Чайковский 梶浦由記
|
|
|
|
setopt cbases
|
|
print $'\xc5' | read
|
|
print $(( [#16] #REPLY ))
|
|
0:read passes through invalid multibyte characters
|
|
>0xC5
|
|
|
|
word=abcま
|
|
word[-1]=
|
|
print $word
|
|
word=abcま
|
|
word[-2]=
|
|
print $word
|
|
word=abcま
|
|
word[4]=d
|
|
print $word
|
|
word=abcま
|
|
word[3]=not_c
|
|
print $word
|
|
0:assignment with negative indices
|
|
>abc
|
|
>abま
|
|
>abcd
|
|
>abnot_cま
|
|
|
|
# The following doesn't necessarily need UTF-8, but this gives
|
|
# us the full effect --- if we parse this wrongly the \xe9
|
|
# in combination with the tokenized input afterwards looks like a
|
|
# valid UTF-8 character. But it isn't.
|
|
print $'$\xe9#``' >test_bad_param
|
|
(setopt nonomatch
|
|
. ./test_bad_param)
|
|
127:Invalid parameter name with following tokenized input
|
|
?./test_bad_param:1: command not found: $\M-i#
|
|
|
|
lines=$'one\tZSH\tthree\nfour\tfive\tsix'
|
|
print -X8 -r -- $lines
|
|
0:Tab expansion with extra-wide characters
|
|
>one ZSH three
|
|
>four five six
|
|
# This doesn't look aligned in my editor because actually the characters
|
|
# aren't quite double width, but the arithmetic is correct.
|
|
# It appears just to be an effect of the font.
|
|
|
|
() {
|
|
emulate -L zsh
|
|
setopt errreturn
|
|
local cdpath=(.)
|
|
mkdir ホ
|
|
cd ホ
|
|
cd ..
|
|
cd ./ホ
|
|
cd ..
|
|
}
|
|
0:cd with special characters
|
|
|
|
test_array=(
|
|
'[[ \xcc = \xcc ]]'
|
|
'[[ \xcc != \xcd ]]'
|
|
'[[ \xcc != \ucc ]]'
|
|
'[[ \ucc = \ucc ]]'
|
|
'[[ \ucc = [\ucc] ]]'
|
|
'[[ \xcc != [\ucc] ]]'
|
|
# Not clear how useful the following is...
|
|
'[[ \xcc = [\xcc] ]]'
|
|
)
|
|
for test in $test_array; do
|
|
if ! eval ${(g::)test} ; then
|
|
print -rl "Test $test failed" >&2
|
|
fi
|
|
done
|
|
0:Invalid characters in pattern matching
|
|
|
|
[[ $'\xe3' == [[:INCOMPLETE:]] ]] || print fail 1
|
|
[[ $'\xe3\x83' == [[:INCOMPLETE:]][[:INVALID:]] ]] || print fail 2
|
|
[[ $'\xe3\x83\x9b' != [[:INCOMPLETE:][:INVALID:]] ]] || print fail 3
|
|
[[ $'\xe3\x83\x9b' = ? ]] || print fail 4
|
|
0:Testing incomplete and invalid multibyte character components
|
|
|
|
print -r -- ${(q+):-ホ}
|
|
foo='She said "ホ". I said "You can'\''t '\''ホ'\'' me!'
|
|
print -r -- ${(q+)foo}
|
|
0:${(q+)...} with printable multibyte characters
|
|
>ホ
|
|
>'She said "ホ". I said "You can'\''t '\''ホ'\'' me!'
|
|
|
|
# This will silently succeed if zsh/parameter isn't available
|
|
(zmodload zsh/parameter >/dev/null 2>&1
|
|
f() {
|
|
: $(:)
|
|
"↓"
|
|
}
|
|
: $functions)
|
|
0:Multibyte handling of functions parameter
|
|
|
|
# c1=U+0104 (Ą) and c2=U+0120 (Ġ) are chosen so that
|
|
# u1 = utf8(c1) = c4 84 < u2 = utf8(c2) = c4 a0
|
|
# metafy(u1) = c4 83 a4 > metafy(u2) = c4 83 80
|
|
# in both UTF-8 and ASCII collations (the latter is used in macOS
|
|
# and some versions of BSDs).
|
|
local -a names=( $'\u0104' $'\u0120' )
|
|
print -o $names
|
|
mkdir -p colltest
|
|
cd colltest
|
|
touch $names
|
|
print ?
|
|
0:Sorting of metafied characters
|
|
>Ą Ġ
|
|
>Ą Ġ
|
|
|
|
printf '%q%q\n' 你你
|
|
0:printf %q and quotestring and general metafy / token madness
|
|
>你你
|
|
|
|
# This test is kept last as it introduces an additional
|
|
# dependency on the system regex library.
|
|
if zmodload zsh/regex 2>/dev/null; then
|
|
[[ $'\ua0' =~ '^.$' ]] && print OK
|
|
[[ $'\ua0' =~ $'^\ua0$' ]] && print OK
|
|
[[ $'\ua0'X =~ '^X$' ]] || print OK
|
|
else
|
|
ZTST_skip="regexp library not found."
|
|
fi
|
|
0:Ensure no confusion on metafied input to regex module
|
|
>OK
|
|
>OK
|
|
>OK
|
|
F:A failure here may indicate the system regex library does not
|
|
F:support character sets outside the portable 7-bit range.
|