!C99Shell v. 2.0 [PHP 7 Update] [25.02.2019]!

Software: nginx/1.23.4. PHP/5.6.40-65+ubuntu20.04.1+deb.sury.org+1 

uname -a: Linux foro-restaurado-2 5.15.0-1040-oracle #46-Ubuntu SMP Fri Jul 14 21:47:21 UTC 2023
aarch64
 

uid=33(www-data) gid=33(www-data) groups=33(www-data) 

Safe-mode: OFF (not secure)

/usr/src/linux-oracle-6.8-headers-6.8.0-1028/drivers/crypto/vmx/   drwxr-xr-x
Free 83.31 GB of 96.73 GB (86.13%)
Home    Back    Forward    UPDIR    Refresh    Search    Buffer    Encoder    Tools    Proc.    FTP brute    Sec.    SQL    PHP-code    Update    Feedback    Self remove    Logout    


Viewing file:     aesp8-ppc.pl (94.82 KB)      -rw-r--r--
Select action/file-type:
(+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
#! /usr/bin/env perl
# SPDX-License-Identifier: GPL-2.0

# This code is taken from CRYPTOGAMs[1] and is included here using the option
# in the license to distribute the code under the GPL. Therefore this program
# is free software; you can redistribute it and/or modify it under the terms of
# the GNU General Public License version 2 as published by the Free Software
# Foundation.
#
# [1] https://www.openssl.org/~appro/cryptogams/

# Copyright (c) 2006-2017, CRYPTOGAMS by <[email protected]>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
#       * Redistributions of source code must retain copyright notices,
#         this list of conditions and the following disclaimer.
#
#       * Redistributions in binary form must reproduce the above
#         copyright notice, this list of conditions and the following
#         disclaimer in the documentation and/or other materials
#         provided with the distribution.
#
#       * Neither the name of the CRYPTOGAMS nor the names of its
#         copyright holder and contributors may be used to endorse or
#         promote products derived from this software without specific
#         prior written permission.
#
# ALTERNATIVELY, provided that this notice is retained in full, this
# product may be distributed under the terms of the GNU General Public
# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
# those given above.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# ====================================================================
# Written by Andy Polyakov <[email protected]> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see https://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# This module implements support for AES instructions as per PowerISA
# specification version 2.07, first implemented by POWER8 processor.
# The module is endian-agnostic in sense that it supports both big-
# and little-endian cases. Data alignment in parallelizable modes is
# handled with VSX loads and stores, which implies MSR.VSX flag being
# set. It should also be noted that ISA specification doesn't prohibit
# alignment exceptions for these instructions on page boundaries.
# Initially alignment was handled in pure AltiVec/VMX way [when data
# is aligned programmatically, which in turn guarantees exception-
# free execution], but it turned to hamper performance when vcipher
# instructions are interleaved. It's reckoned that eventual
# misalignment penalties at page boundaries are in average lower
# than additional overhead in pure AltiVec approach.
#
# May 2016
#
# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
# systems were measured.
#
######################################################################
# Current large-block performance in cycles per byte processed with
# 128-bit key (less is better).
#
#        CBC en-/decrypt    CTR    XTS
# POWER8[le]    3.96/0.72    0.74    1.1
# POWER8[be]    3.75/0.65    0.66    1.0

$flavour = shift;

if ($flavour =~ /64/) {
    $SIZE_T    =8;
    $LRSAVE    =2*$SIZE_T;
    $STU    ="stdu";
    $POP    ="ld";
    $PUSH    ="std";
    $UCMP    ="cmpld";
    $SHL    ="sldi";
} elsif ($flavour =~ /32/) {
    $SIZE_T    =4;
    $LRSAVE    =$SIZE_T;
    $STU    ="stwu";
    $POP    ="lwz";
    $PUSH    ="stw";
    $UCMP    ="cmplw";
    $SHL    ="slwi";
} else { die "nonsense $flavour"; }

$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;

$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
die "can't locate ppc-xlate.pl";

open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";

$FRAME=8*$SIZE_T;
$prefix="aes_p8";

$sp="r1";
$vrsave="r12";

#########################################################################
{{{    # Key setup procedures                        #
my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));

$code.=<<___;
.machine    "any"

.text

.align    7
rcon:
.long    0x01000000, 0x01000000, 0x01000000, 0x01000000    ?rev
.long    0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000    ?rev
.long    0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c    ?rev
.long    0,0,0,0                        ?asis
.long    0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe
Lconsts:
    mflr    r0
    bcl    20,31,\$+4
    mflr    $ptr     #vvvvv "distance between . and rcon
    addi    $ptr,$ptr,-0x58
    mtlr    r0
    blr
    .long    0
    .byte    0,12,0x14,0,0,0,0,0
.asciz    "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"

.globl    .${prefix}_set_encrypt_key
Lset_encrypt_key:
    mflr        r11
    $PUSH        r11,$LRSAVE($sp)

    li        $ptr,-1
    ${UCMP}i    $inp,0
    beq-        Lenc_key_abort        # if ($inp==0) return -1;
    ${UCMP}i    $out,0
    beq-        Lenc_key_abort        # if ($out==0) return -1;
    li        $ptr,-2
    cmpwi        $bits,128
    blt-        Lenc_key_abort
    cmpwi        $bits,256
    bgt-        Lenc_key_abort
    andi.        r0,$bits,0x3f
    bne-        Lenc_key_abort

    lis        r0,0xfff0
    mfspr        $vrsave,256
    mtspr        256,r0

    bl        Lconsts
    mtlr        r11

    neg        r9,$inp
    lvx        $in0,0,$inp
    addi        $inp,$inp,15        # 15 is not typo
    lvsr        $key,0,r9        # borrow $key
    li        r8,0x20
    cmpwi        $bits,192
    lvx        $in1,0,$inp
    le?vspltisb    $mask,0x0f        # borrow $mask
    lvx        $rcon,0,$ptr
    le?vxor        $key,$key,$mask        # adjust for byte swap
    lvx        $mask,r8,$ptr
    addi        $ptr,$ptr,0x10
    vperm        $in0,$in0,$in1,$key    # align [and byte swap in LE]
    li        $cnt,8
    vxor        $zero,$zero,$zero
    mtctr        $cnt

    ?lvsr        $outperm,0,$out
    vspltisb    $outmask,-1
    lvx        $outhead,0,$out
    ?vperm        $outmask,$zero,$outmask,$outperm

    blt        Loop128
    addi        $inp,$inp,8
    beq        L192
    addi        $inp,$inp,8
    b        L256

.align    4
Loop128:
    vperm        $key,$in0,$in0,$mask    # rotate-n-splat
    vsldoi        $tmp,$zero,$in0,12    # >>32
     vperm        $outtail,$in0,$in0,$outperm    # rotate
     vsel        $stage,$outhead,$outtail,$outmask
     vmr        $outhead,$outtail
    vcipherlast    $key,$key,$rcon
     stvx        $stage,0,$out
     addi        $out,$out,16

    vxor        $in0,$in0,$tmp
    vsldoi        $tmp,$zero,$tmp,12    # >>32
    vxor        $in0,$in0,$tmp
    vsldoi        $tmp,$zero,$tmp,12    # >>32
    vxor        $in0,$in0,$tmp
     vadduwm    $rcon,$rcon,$rcon
    vxor        $in0,$in0,$key
    bdnz        Loop128

    lvx        $rcon,0,$ptr        # last two round keys

    vperm        $key,$in0,$in0,$mask    # rotate-n-splat
    vsldoi        $tmp,$zero,$in0,12    # >>32
     vperm        $outtail,$in0,$in0,$outperm    # rotate
     vsel        $stage,$outhead,$outtail,$outmask
     vmr        $outhead,$outtail
    vcipherlast    $key,$key,$rcon
     stvx        $stage,0,$out
     addi        $out,$out,16

    vxor        $in0,$in0,$tmp
    vsldoi        $tmp,$zero,$tmp,12    # >>32
    vxor        $in0,$in0,$tmp
    vsldoi        $tmp,$zero,$tmp,12    # >>32
    vxor        $in0,$in0,$tmp
     vadduwm    $rcon,$rcon,$rcon
    vxor        $in0,$in0,$key

    vperm        $key,$in0,$in0,$mask    # rotate-n-splat
    vsldoi        $tmp,$zero,$in0,12    # >>32
     vperm        $outtail,$in0,$in0,$outperm    # rotate
     vsel        $stage,$outhead,$outtail,$outmask
     vmr        $outhead,$outtail
    vcipherlast    $key,$key,$rcon
     stvx        $stage,0,$out
     addi        $out,$out,16

    vxor        $in0,$in0,$tmp
    vsldoi        $tmp,$zero,$tmp,12    # >>32
    vxor        $in0,$in0,$tmp
    vsldoi        $tmp,$zero,$tmp,12    # >>32
    vxor        $in0,$in0,$tmp
    vxor        $in0,$in0,$key
     vperm        $outtail,$in0,$in0,$outperm    # rotate
     vsel        $stage,$outhead,$outtail,$outmask
     vmr        $outhead,$outtail
     stvx        $stage,0,$out

    addi        $inp,$out,15        # 15 is not typo
    addi        $out,$out,0x50

    li        $rounds,10
    b        Ldone

.align    4
L192:
    lvx        $tmp,0,$inp
    li        $cnt,4
     vperm        $outtail,$in0,$in0,$outperm    # rotate
     vsel        $stage,$outhead,$outtail,$outmask
     vmr        $outhead,$outtail
     stvx        $stage,0,$out
     addi        $out,$out,16
    vperm        $in1,$in1,$tmp,$key    # align [and byte swap in LE]
    vspltisb    $key,8            # borrow $key
    mtctr        $cnt
    vsububm        $mask,$mask,$key    # adjust the mask

Loop192:
    vperm        $key,$in1,$in1,$mask    # roate-n-splat
    vsldoi        $tmp,$zero,$in0,12    # >>32
    vcipherlast    $key,$key,$rcon

    vxor        $in0,$in0,$tmp
    vsldoi        $tmp,$zero,$tmp,12    # >>32
    vxor        $in0,$in0,$tmp
    vsldoi        $tmp,$zero,$tmp,12    # >>32
    vxor        $in0,$in0,$tmp

     vsldoi        $stage,$zero,$in1,8
    vspltw        $tmp,$in0,3
    vxor        $tmp,$tmp,$in1
    vsldoi        $in1,$zero,$in1,12    # >>32
     vadduwm    $rcon,$rcon,$rcon
    vxor        $in1,$in1,$tmp
    vxor        $in0,$in0,$key
    vxor        $in1,$in1,$key
     vsldoi        $stage,$stage,$in0,8

    vperm        $key,$in1,$in1,$mask    # rotate-n-splat
    vsldoi        $tmp,$zero,$in0,12    # >>32
     vperm        $outtail,$stage,$stage,$outperm    # rotate
     vsel        $stage,$outhead,$outtail,$outmask
     vmr        $outhead,$outtail
    vcipherlast    $key,$key,$rcon
     stvx        $stage,0,$out
     addi        $out,$out,16

     vsldoi        $stage,$in0,$in1,8
    vxor        $in0,$in0,$tmp
    vsldoi        $tmp,$zero,$tmp,12    # >>32
     vperm        $outtail,$stage,$stage,$outperm    # rotate
     vsel        $stage,$outhead,$outtail,$outmask
     vmr        $outhead,$outtail
    vxor        $in0,$in0,$tmp
    vsldoi        $tmp,$zero,$tmp,12    # >>32
    vxor        $in0,$in0,$tmp
     stvx        $stage,0,$out
     addi        $out,$out,16

    vspltw        $tmp,$in0,3
    vxor        $tmp,$tmp,$in1
    vsldoi        $in1,$zero,$in1,12    # >>32
     vadduwm    $rcon,$rcon,$rcon
    vxor        $in1,$in1,$tmp
    vxor        $in0,$in0,$key
    vxor        $in1,$in1,$key
     vperm        $outtail,$in0,$in0,$outperm    # rotate
     vsel        $stage,$outhead,$outtail,$outmask
     vmr        $outhead,$outtail
     stvx        $stage,0,$out
     addi        $inp,$out,15        # 15 is not typo
     addi        $out,$out,16
    bdnz        Loop192

    li        $rounds,12
    addi        $out,$out,0x20
    b        Ldone

.align    4
L256:
    lvx        $tmp,0,$inp
    li        $cnt,7
    li        $rounds,14
     vperm        $outtail,$in0,$in0,$outperm    # rotate
     vsel        $stage,$outhead,$outtail,$outmask
     vmr        $outhead,$outtail
     stvx        $stage,0,$out
     addi        $out,$out,16
    vperm        $in1,$in1,$tmp,$key    # align [and byte swap in LE]
    mtctr        $cnt

Loop256:
    vperm        $key,$in1,$in1,$mask    # rotate-n-splat
    vsldoi        $tmp,$zero,$in0,12    # >>32
     vperm        $outtail,$in1,$in1,$outperm    # rotate
     vsel        $stage,$outhead,$outtail,$outmask
     vmr        $outhead,$outtail
    vcipherlast    $key,$key,$rcon
     stvx        $stage,0,$out
     addi        $out,$out,16

    vxor        $in0,$in0,$tmp
    vsldoi        $tmp,$zero,$tmp,12    # >>32
    vxor        $in0,$in0,$tmp
    vsldoi        $tmp,$zero,$tmp,12    # >>32
    vxor        $in0,$in0,$tmp
     vadduwm    $rcon,$rcon,$rcon
    vxor        $in0,$in0,$key
     vperm        $outtail,$in0,$in0,$outperm    # rotate
     vsel        $stage,$outhead,$outtail,$outmask
     vmr        $outhead,$outtail
     stvx        $stage,0,$out
     addi        $inp,$out,15        # 15 is not typo
     addi        $out,$out,16
    bdz        Ldone

    vspltw        $key,$in0,3        # just splat
    vsldoi        $tmp,$zero,$in1,12    # >>32
    vsbox        $key,$key

    vxor        $in1,$in1,$tmp
    vsldoi        $tmp,$zero,$tmp,12    # >>32
    vxor        $in1,$in1,$tmp
    vsldoi        $tmp,$zero,$tmp,12    # >>32
    vxor        $in1,$in1,$tmp

    vxor        $in1,$in1,$key
    b        Loop256

.align    4
Ldone:
    lvx        $in1,0,$inp        # redundant in aligned case
    vsel        $in1,$outhead,$in1,$outmask
    stvx        $in1,0,$inp
    li        $ptr,0
    mtspr        256,$vrsave
    stw        $rounds,0($out)

Lenc_key_abort:
    mr        r3,$ptr
    blr
    .long        0
    .byte        0,12,0x14,1,0,0,3,0
    .long        0
.size    .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key

.globl    .${prefix}_set_decrypt_key
    $STU        $sp,-$FRAME($sp)
    mflr        r10
    $PUSH        r10,$FRAME+$LRSAVE($sp)
    bl        Lset_encrypt_key
    mtlr        r10

    cmpwi        r3,0
    bne-        Ldec_key_abort

    slwi        $cnt,$rounds,4
    subi        $inp,$out,240        # first round key
    srwi        $rounds,$rounds,1
    add        $out,$inp,$cnt        # last round key
    mtctr        $rounds

Ldeckey:
    lwz        r0, 0($inp)
    lwz        r6, 4($inp)
    lwz        r7, 8($inp)
    lwz        r8, 12($inp)
    addi        $inp,$inp,16
    lwz        r9, 0($out)
    lwz        r10,4($out)
    lwz        r11,8($out)
    lwz        r12,12($out)
    stw        r0, 0($out)
    stw        r6, 4($out)
    stw        r7, 8($out)
    stw        r8, 12($out)
    subi        $out,$out,16
    stw        r9, -16($inp)
    stw        r10,-12($inp)
    stw        r11,-8($inp)
    stw        r12,-4($inp)
    bdnz        Ldeckey

    xor        r3,r3,r3        # return value
Ldec_key_abort:
    addi        $sp,$sp,$FRAME
    blr
    .long        0
    .byte        0,12,4,1,0x80,0,3,0
    .long        0
.size    .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
___
}}}
#########################################################################
{{{    # Single block en- and decrypt procedures            #
sub gen_block () {
my $dir = shift;
my $n   = $dir eq "de" ? "n" : "";
my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));

$code.=<<___;
.globl    .${prefix}_${dir}crypt
    lwz        $rounds,240($key)
    lis        r0,0xfc00
    mfspr        $vrsave,256
    li        $idx,15            # 15 is not typo
    mtspr        256,r0

    lvx        v0,0,$inp
    neg        r11,$out
    lvx        v1,$idx,$inp
    lvsl        v2,0,$inp        # inpperm
    le?vspltisb    v4,0x0f
    ?lvsl        v3,0,r11        # outperm
    le?vxor        v2,v2,v4
    li        $idx,16
    vperm        v0,v0,v1,v2        # align [and byte swap in LE]
    lvx        v1,0,$key
    ?lvsl        v5,0,$key        # keyperm
    srwi        $rounds,$rounds,1
    lvx        v2,$idx,$key
    addi        $idx,$idx,16
    subi        $rounds,$rounds,1
    ?vperm        v1,v1,v2,v5        # align round key

    vxor        v0,v0,v1
    lvx        v1,$idx,$key
    addi        $idx,$idx,16
    mtctr        $rounds

Loop_${dir}c:
    ?vperm        v2,v2,v1,v5
    v${n}cipher    v0,v0,v2
    lvx        v2,$idx,$key
    addi        $idx,$idx,16
    ?vperm        v1,v1,v2,v5
    v${n}cipher    v0,v0,v1
    lvx        v1,$idx,$key
    addi        $idx,$idx,16
    bdnz        Loop_${dir}c

    ?vperm        v2,v2,v1,v5
    v${n}cipher    v0,v0,v2
    lvx        v2,$idx,$key
    ?vperm        v1,v1,v2,v5
    v${n}cipherlast    v0,v0,v1

    vspltisb    v2,-1
    vxor        v1,v1,v1
    li        $idx,15            # 15 is not typo
    ?vperm        v2,v1,v2,v3        # outmask
    le?vxor        v3,v3,v4
    lvx        v1,0,$out        # outhead
    vperm        v0,v0,v0,v3        # rotate [and byte swap in LE]
    vsel        v1,v1,v0,v2
    lvx        v4,$idx,$out
    stvx        v1,0,$out
    vsel        v0,v0,v4,v2
    stvx        v0,$idx,$out

    mtspr        256,$vrsave
    blr
    .long        0
    .byte        0,12,0x14,0,0,0,3,0
    .long        0
.size    .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
___
}
&gen_block("en");
&gen_block("de");
}}}
#########################################################################
{{{    # CBC en- and decrypt procedures                #
my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
my ($rndkey0,$rndkey1,$inout,$tmp)=        map("v$_",(0..3));
my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
                        map("v$_",(4..10));
$code.=<<___;
.globl    .${prefix}_cbc_encrypt
    ${UCMP}i    $len,16
    bltlr-

    cmpwi        $enc,0            # test direction
    lis        r0,0xffe0
    mfspr        $vrsave,256
    mtspr        256,r0

    li        $idx,15
    vxor        $rndkey0,$rndkey0,$rndkey0
    le?vspltisb    $tmp,0x0f

    lvx        $ivec,0,$ivp        # load [unaligned] iv
    lvsl        $inpperm,0,$ivp
    lvx        $inptail,$idx,$ivp
    le?vxor        $inpperm,$inpperm,$tmp
    vperm        $ivec,$ivec,$inptail,$inpperm

    neg        r11,$inp
    ?lvsl        $keyperm,0,$key        # prepare for unaligned key
    lwz        $rounds,240($key)

    lvsr        $inpperm,0,r11        # prepare for unaligned load
    lvx        $inptail,0,$inp
    addi        $inp,$inp,15        # 15 is not typo
    le?vxor        $inpperm,$inpperm,$tmp

    ?lvsr        $outperm,0,$out        # prepare for unaligned store
    vspltisb    $outmask,-1
    lvx        $outhead,0,$out
    ?vperm        $outmask,$rndkey0,$outmask,$outperm
    le?vxor        $outperm,$outperm,$tmp

    srwi        $rounds,$rounds,1
    li        $idx,16
    subi        $rounds,$rounds,1
    beq        Lcbc_dec

Lcbc_enc:
    vmr        $inout,$inptail
    lvx        $inptail,0,$inp
    addi        $inp,$inp,16
    mtctr        $rounds
    subi        $len,$len,16        # len-=16

    lvx        $rndkey0,0,$key
     vperm        $inout,$inout,$inptail,$inpperm
    lvx        $rndkey1,$idx,$key
    addi        $idx,$idx,16
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vxor        $inout,$inout,$rndkey0
    lvx        $rndkey0,$idx,$key
    addi        $idx,$idx,16
    vxor        $inout,$inout,$ivec

Loop_cbc_enc:
    ?vperm        $rndkey1,$rndkey1,$rndkey0,$keyperm
    vcipher        $inout,$inout,$rndkey1
    lvx        $rndkey1,$idx,$key
    addi        $idx,$idx,16
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vcipher        $inout,$inout,$rndkey0
    lvx        $rndkey0,$idx,$key
    addi        $idx,$idx,16
    bdnz        Loop_cbc_enc

    ?vperm        $rndkey1,$rndkey1,$rndkey0,$keyperm
    vcipher        $inout,$inout,$rndkey1
    lvx        $rndkey1,$idx,$key
    li        $idx,16
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vcipherlast    $ivec,$inout,$rndkey0
    ${UCMP}i    $len,16

    vperm        $tmp,$ivec,$ivec,$outperm
    vsel        $inout,$outhead,$tmp,$outmask
    vmr        $outhead,$tmp
    stvx        $inout,0,$out
    addi        $out,$out,16
    bge        Lcbc_enc

    b        Lcbc_done

.align    4
Lcbc_dec:
    ${UCMP}i    $len,128
    bge        _aesp8_cbc_decrypt8x
    vmr        $tmp,$inptail
    lvx        $inptail,0,$inp
    addi        $inp,$inp,16
    mtctr        $rounds
    subi        $len,$len,16        # len-=16

    lvx        $rndkey0,0,$key
     vperm        $tmp,$tmp,$inptail,$inpperm
    lvx        $rndkey1,$idx,$key
    addi        $idx,$idx,16
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vxor        $inout,$tmp,$rndkey0
    lvx        $rndkey0,$idx,$key
    addi        $idx,$idx,16

Loop_cbc_dec:
    ?vperm        $rndkey1,$rndkey1,$rndkey0,$keyperm
    vncipher    $inout,$inout,$rndkey1
    lvx        $rndkey1,$idx,$key
    addi        $idx,$idx,16
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vncipher    $inout,$inout,$rndkey0
    lvx        $rndkey0,$idx,$key
    addi        $idx,$idx,16
    bdnz        Loop_cbc_dec

    ?vperm        $rndkey1,$rndkey1,$rndkey0,$keyperm
    vncipher    $inout,$inout,$rndkey1
    lvx        $rndkey1,$idx,$key
    li        $idx,16
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vncipherlast    $inout,$inout,$rndkey0
    ${UCMP}i    $len,16

    vxor        $inout,$inout,$ivec
    vmr        $ivec,$tmp
    vperm        $tmp,$inout,$inout,$outperm
    vsel        $inout,$outhead,$tmp,$outmask
    vmr        $outhead,$tmp
    stvx        $inout,0,$out
    addi        $out,$out,16
    bge        Lcbc_dec

Lcbc_done:
    addi        $out,$out,-1
    lvx        $inout,0,$out        # redundant in aligned case
    vsel        $inout,$outhead,$inout,$outmask
    stvx        $inout,0,$out

    neg        $enc,$ivp        # write [unaligned] iv
    li        $idx,15            # 15 is not typo
    vxor        $rndkey0,$rndkey0,$rndkey0
    vspltisb    $outmask,-1
    le?vspltisb    $tmp,0x0f
    ?lvsl        $outperm,0,$enc
    ?vperm        $outmask,$rndkey0,$outmask,$outperm
    le?vxor        $outperm,$outperm,$tmp
    lvx        $outhead,0,$ivp
    vperm        $ivec,$ivec,$ivec,$outperm
    vsel        $inout,$outhead,$ivec,$outmask
    lvx        $inptail,$idx,$ivp
    stvx        $inout,0,$ivp
    vsel        $inout,$ivec,$inptail,$outmask
    stvx        $inout,$idx,$ivp

    mtspr        256,$vrsave
    blr
    .long        0
    .byte        0,12,0x14,0,0,0,6,0
    .long        0
___
#########################################################################
{{    # Optimized CBC decrypt procedure                #
my $key_="r11";
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
my $rndkey0="v23";    # v24-v25 rotating buffer for first found keys
            # v26-v31 last 6 round keys
my ($tmp,$keyperm)=($in3,$in4);    # aliases with "caller", redundant assignment

$code.=<<___;
.align    5
_aesp8_cbc_decrypt8x:
    $STU        $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
    li        r10,`$FRAME+8*16+15`
    li        r11,`$FRAME+8*16+31`
    stvx        v20,r10,$sp        # ABI says so
    addi        r10,r10,32
    stvx        v21,r11,$sp
    addi        r11,r11,32
    stvx        v22,r10,$sp
    addi        r10,r10,32
    stvx        v23,r11,$sp
    addi        r11,r11,32
    stvx        v24,r10,$sp
    addi        r10,r10,32
    stvx        v25,r11,$sp
    addi        r11,r11,32
    stvx        v26,r10,$sp
    addi        r10,r10,32
    stvx        v27,r11,$sp
    addi        r11,r11,32
    stvx        v28,r10,$sp
    addi        r10,r10,32
    stvx        v29,r11,$sp
    addi        r11,r11,32
    stvx        v30,r10,$sp
    stvx        v31,r11,$sp
    li        r0,-1
    stw        $vrsave,`$FRAME+21*16-4`($sp)    # save vrsave
    li        $x10,0x10
    $PUSH        r26,`$FRAME+21*16+0*$SIZE_T`($sp)
    li        $x20,0x20
    $PUSH        r27,`$FRAME+21*16+1*$SIZE_T`($sp)
    li        $x30,0x30
    $PUSH        r28,`$FRAME+21*16+2*$SIZE_T`($sp)
    li        $x40,0x40
    $PUSH        r29,`$FRAME+21*16+3*$SIZE_T`($sp)
    li        $x50,0x50
    $PUSH        r30,`$FRAME+21*16+4*$SIZE_T`($sp)
    li        $x60,0x60
    $PUSH        r31,`$FRAME+21*16+5*$SIZE_T`($sp)
    li        $x70,0x70
    mtspr        256,r0

    subi        $rounds,$rounds,3    # -4 in total
    subi        $len,$len,128        # bias

    lvx        $rndkey0,$x00,$key    # load key schedule
    lvx        v30,$x10,$key
    addi        $key,$key,0x20
    lvx        v31,$x00,$key
    ?vperm        $rndkey0,$rndkey0,v30,$keyperm
    addi        $key_,$sp,$FRAME+15
    mtctr        $rounds

Load_cbc_dec_key:
    ?vperm        v24,v30,v31,$keyperm
    lvx        v30,$x10,$key
    addi        $key,$key,0x20
    stvx        v24,$x00,$key_        # off-load round[1]
    ?vperm        v25,v31,v30,$keyperm
    lvx        v31,$x00,$key
    stvx        v25,$x10,$key_        # off-load round[2]
    addi        $key_,$key_,0x20
    bdnz        Load_cbc_dec_key

    lvx        v26,$x10,$key
    ?vperm        v24,v30,v31,$keyperm
    lvx        v27,$x20,$key
    stvx        v24,$x00,$key_        # off-load round[3]
    ?vperm        v25,v31,v26,$keyperm
    lvx        v28,$x30,$key
    stvx        v25,$x10,$key_        # off-load round[4]
    addi        $key_,$sp,$FRAME+15    # rewind $key_
    ?vperm        v26,v26,v27,$keyperm
    lvx        v29,$x40,$key
    ?vperm        v27,v27,v28,$keyperm
    lvx        v30,$x50,$key
    ?vperm        v28,v28,v29,$keyperm
    lvx        v31,$x60,$key
    ?vperm        v29,v29,v30,$keyperm
    lvx        $out0,$x70,$key        # borrow $out0
    ?vperm        v30,v30,v31,$keyperm
    lvx        v24,$x00,$key_        # pre-load round[1]
    ?vperm        v31,v31,$out0,$keyperm
    lvx        v25,$x10,$key_        # pre-load round[2]

    #lvx        $inptail,0,$inp        # "caller" already did this
    #addi        $inp,$inp,15        # 15 is not typo
    subi        $inp,$inp,15        # undo "caller"

     le?li        $idx,8
    lvx_u        $in0,$x00,$inp        # load first 8 "words"
     le?lvsl    $inpperm,0,$idx
     le?vspltisb    $tmp,0x0f
    lvx_u        $in1,$x10,$inp
     le?vxor    $inpperm,$inpperm,$tmp    # transform for lvx_u/stvx_u
    lvx_u        $in2,$x20,$inp
     le?vperm    $in0,$in0,$in0,$inpperm
    lvx_u        $in3,$x30,$inp
     le?vperm    $in1,$in1,$in1,$inpperm
    lvx_u        $in4,$x40,$inp
     le?vperm    $in2,$in2,$in2,$inpperm
    vxor        $out0,$in0,$rndkey0
    lvx_u        $in5,$x50,$inp
     le?vperm    $in3,$in3,$in3,$inpperm
    vxor        $out1,$in1,$rndkey0
    lvx_u        $in6,$x60,$inp
     le?vperm    $in4,$in4,$in4,$inpperm
    vxor        $out2,$in2,$rndkey0
    lvx_u        $in7,$x70,$inp
    addi        $inp,$inp,0x80
     le?vperm    $in5,$in5,$in5,$inpperm
    vxor        $out3,$in3,$rndkey0
     le?vperm    $in6,$in6,$in6,$inpperm
    vxor        $out4,$in4,$rndkey0
     le?vperm    $in7,$in7,$in7,$inpperm
    vxor        $out5,$in5,$rndkey0
    vxor        $out6,$in6,$rndkey0
    vxor        $out7,$in7,$rndkey0

    mtctr        $rounds
    b        Loop_cbc_dec8x
.align    5
Loop_cbc_dec8x:
    vncipher    $out0,$out0,v24
    vncipher    $out1,$out1,v24
    vncipher    $out2,$out2,v24
    vncipher    $out3,$out3,v24
    vncipher    $out4,$out4,v24
    vncipher    $out5,$out5,v24
    vncipher    $out6,$out6,v24
    vncipher    $out7,$out7,v24
    lvx        v24,$x20,$key_        # round[3]
    addi        $key_,$key_,0x20

    vncipher    $out0,$out0,v25
    vncipher    $out1,$out1,v25
    vncipher    $out2,$out2,v25
    vncipher    $out3,$out3,v25
    vncipher    $out4,$out4,v25
    vncipher    $out5,$out5,v25
    vncipher    $out6,$out6,v25
    vncipher    $out7,$out7,v25
    lvx        v25,$x10,$key_        # round[4]
    bdnz        Loop_cbc_dec8x

    subic        $len,$len,128        # $len-=128
    vncipher    $out0,$out0,v24
    vncipher    $out1,$out1,v24
    vncipher    $out2,$out2,v24
    vncipher    $out3,$out3,v24
    vncipher    $out4,$out4,v24
    vncipher    $out5,$out5,v24
    vncipher    $out6,$out6,v24
    vncipher    $out7,$out7,v24

    subfe.        r0,r0,r0        # borrow?-1:0
    vncipher    $out0,$out0,v25
    vncipher    $out1,$out1,v25
    vncipher    $out2,$out2,v25
    vncipher    $out3,$out3,v25
    vncipher    $out4,$out4,v25
    vncipher    $out5,$out5,v25
    vncipher    $out6,$out6,v25
    vncipher    $out7,$out7,v25

    and        r0,r0,$len
    vncipher    $out0,$out0,v26
    vncipher    $out1,$out1,v26
    vncipher    $out2,$out2,v26
    vncipher    $out3,$out3,v26
    vncipher    $out4,$out4,v26
    vncipher    $out5,$out5,v26
    vncipher    $out6,$out6,v26
    vncipher    $out7,$out7,v26

    add        $inp,$inp,r0        # $inp is adjusted in such
                        # way that at exit from the
                        # loop inX-in7 are loaded
                        # with last "words"
    vncipher    $out0,$out0,v27
    vncipher    $out1,$out1,v27
    vncipher    $out2,$out2,v27
    vncipher    $out3,$out3,v27
    vncipher    $out4,$out4,v27
    vncipher    $out5,$out5,v27
    vncipher    $out6,$out6,v27
    vncipher    $out7,$out7,v27

    addi        $key_,$sp,$FRAME+15    # rewind $key_
    vncipher    $out0,$out0,v28
    vncipher    $out1,$out1,v28
    vncipher    $out2,$out2,v28
    vncipher    $out3,$out3,v28
    vncipher    $out4,$out4,v28
    vncipher    $out5,$out5,v28
    vncipher    $out6,$out6,v28
    vncipher    $out7,$out7,v28
    lvx        v24,$x00,$key_        # re-pre-load round[1]

    vncipher    $out0,$out0,v29
    vncipher    $out1,$out1,v29
    vncipher    $out2,$out2,v29
    vncipher    $out3,$out3,v29
    vncipher    $out4,$out4,v29
    vncipher    $out5,$out5,v29
    vncipher    $out6,$out6,v29
    vncipher    $out7,$out7,v29
    lvx        v25,$x10,$key_        # re-pre-load round[2]

    vncipher    $out0,$out0,v30
     vxor        $ivec,$ivec,v31        # xor with last round key
    vncipher    $out1,$out1,v30
     vxor        $in0,$in0,v31
    vncipher    $out2,$out2,v30
     vxor        $in1,$in1,v31
    vncipher    $out3,$out3,v30
     vxor        $in2,$in2,v31
    vncipher    $out4,$out4,v30
     vxor        $in3,$in3,v31
    vncipher    $out5,$out5,v30
     vxor        $in4,$in4,v31
    vncipher    $out6,$out6,v30
     vxor        $in5,$in5,v31
    vncipher    $out7,$out7,v30
     vxor        $in6,$in6,v31

    vncipherlast    $out0,$out0,$ivec
    vncipherlast    $out1,$out1,$in0
     lvx_u        $in0,$x00,$inp        # load next input block
    vncipherlast    $out2,$out2,$in1
     lvx_u        $in1,$x10,$inp
    vncipherlast    $out3,$out3,$in2
     le?vperm    $in0,$in0,$in0,$inpperm
     lvx_u        $in2,$x20,$inp
    vncipherlast    $out4,$out4,$in3
     le?vperm    $in1,$in1,$in1,$inpperm
     lvx_u        $in3,$x30,$inp
    vncipherlast    $out5,$out5,$in4
     le?vperm    $in2,$in2,$in2,$inpperm
     lvx_u        $in4,$x40,$inp
    vncipherlast    $out6,$out6,$in5
     le?vperm    $in3,$in3,$in3,$inpperm
     lvx_u        $in5,$x50,$inp
    vncipherlast    $out7,$out7,$in6
     le?vperm    $in4,$in4,$in4,$inpperm
     lvx_u        $in6,$x60,$inp
    vmr        $ivec,$in7
     le?vperm    $in5,$in5,$in5,$inpperm
     lvx_u        $in7,$x70,$inp
     addi        $inp,$inp,0x80

    le?vperm    $out0,$out0,$out0,$inpperm
    le?vperm    $out1,$out1,$out1,$inpperm
    stvx_u        $out0,$x00,$out
     le?vperm    $in6,$in6,$in6,$inpperm
     vxor        $out0,$in0,$rndkey0
    le?vperm    $out2,$out2,$out2,$inpperm
    stvx_u        $out1,$x10,$out
     le?vperm    $in7,$in7,$in7,$inpperm
     vxor        $out1,$in1,$rndkey0
    le?vperm    $out3,$out3,$out3,$inpperm
    stvx_u        $out2,$x20,$out
     vxor        $out2,$in2,$rndkey0
    le?vperm    $out4,$out4,$out4,$inpperm
    stvx_u        $out3,$x30,$out
     vxor        $out3,$in3,$rndkey0
    le?vperm    $out5,$out5,$out5,$inpperm
    stvx_u        $out4,$x40,$out
     vxor        $out4,$in4,$rndkey0
    le?vperm    $out6,$out6,$out6,$inpperm
    stvx_u        $out5,$x50,$out
     vxor        $out5,$in5,$rndkey0
    le?vperm    $out7,$out7,$out7,$inpperm
    stvx_u        $out6,$x60,$out
     vxor        $out6,$in6,$rndkey0
    stvx_u        $out7,$x70,$out
    addi        $out,$out,0x80
     vxor        $out7,$in7,$rndkey0

    mtctr        $rounds
    beq        Loop_cbc_dec8x        # did $len-=128 borrow?

    addic.        $len,$len,128
    beq        Lcbc_dec8x_done
    nop
    nop

Loop_cbc_dec8x_tail:                # up to 7 "words" tail...
    vncipher    $out1,$out1,v24
    vncipher    $out2,$out2,v24
    vncipher    $out3,$out3,v24
    vncipher    $out4,$out4,v24
    vncipher    $out5,$out5,v24
    vncipher    $out6,$out6,v24
    vncipher    $out7,$out7,v24
    lvx        v24,$x20,$key_        # round[3]
    addi        $key_,$key_,0x20

    vncipher    $out1,$out1,v25
    vncipher    $out2,$out2,v25
    vncipher    $out3,$out3,v25
    vncipher    $out4,$out4,v25
    vncipher    $out5,$out5,v25
    vncipher    $out6,$out6,v25
    vncipher    $out7,$out7,v25
    lvx        v25,$x10,$key_        # round[4]
    bdnz        Loop_cbc_dec8x_tail

    vncipher    $out1,$out1,v24
    vncipher    $out2,$out2,v24
    vncipher    $out3,$out3,v24
    vncipher    $out4,$out4,v24
    vncipher    $out5,$out5,v24
    vncipher    $out6,$out6,v24
    vncipher    $out7,$out7,v24

    vncipher    $out1,$out1,v25
    vncipher    $out2,$out2,v25
    vncipher    $out3,$out3,v25
    vncipher    $out4,$out4,v25
    vncipher    $out5,$out5,v25
    vncipher    $out6,$out6,v25
    vncipher    $out7,$out7,v25

    vncipher    $out1,$out1,v26
    vncipher    $out2,$out2,v26
    vncipher    $out3,$out3,v26
    vncipher    $out4,$out4,v26
    vncipher    $out5,$out5,v26
    vncipher    $out6,$out6,v26
    vncipher    $out7,$out7,v26

    vncipher    $out1,$out1,v27
    vncipher    $out2,$out2,v27
    vncipher    $out3,$out3,v27
    vncipher    $out4,$out4,v27
    vncipher    $out5,$out5,v27
    vncipher    $out6,$out6,v27
    vncipher    $out7,$out7,v27

    vncipher    $out1,$out1,v28
    vncipher    $out2,$out2,v28
    vncipher    $out3,$out3,v28
    vncipher    $out4,$out4,v28
    vncipher    $out5,$out5,v28
    vncipher    $out6,$out6,v28
    vncipher    $out7,$out7,v28

    vncipher    $out1,$out1,v29
    vncipher    $out2,$out2,v29
    vncipher    $out3,$out3,v29
    vncipher    $out4,$out4,v29
    vncipher    $out5,$out5,v29
    vncipher    $out6,$out6,v29
    vncipher    $out7,$out7,v29

    vncipher    $out1,$out1,v30
     vxor        $ivec,$ivec,v31        # last round key
    vncipher    $out2,$out2,v30
     vxor        $in1,$in1,v31
    vncipher    $out3,$out3,v30
     vxor        $in2,$in2,v31
    vncipher    $out4,$out4,v30
     vxor        $in3,$in3,v31
    vncipher    $out5,$out5,v30
     vxor        $in4,$in4,v31
    vncipher    $out6,$out6,v30
     vxor        $in5,$in5,v31
    vncipher    $out7,$out7,v30
     vxor        $in6,$in6,v31

    cmplwi        $len,32            # switch($len)
    blt        Lcbc_dec8x_one
    nop
    beq        Lcbc_dec8x_two
    cmplwi        $len,64
    blt        Lcbc_dec8x_three
    nop
    beq        Lcbc_dec8x_four
    cmplwi        $len,96
    blt        Lcbc_dec8x_five
    nop
    beq        Lcbc_dec8x_six

Lcbc_dec8x_seven:
    vncipherlast    $out1,$out1,$ivec
    vncipherlast    $out2,$out2,$in1
    vncipherlast    $out3,$out3,$in2
    vncipherlast    $out4,$out4,$in3
    vncipherlast    $out5,$out5,$in4
    vncipherlast    $out6,$out6,$in5
    vncipherlast    $out7,$out7,$in6
    vmr        $ivec,$in7

    le?vperm    $out1,$out1,$out1,$inpperm
    le?vperm    $out2,$out2,$out2,$inpperm
    stvx_u        $out1,$x00,$out
    le?vperm    $out3,$out3,$out3,$inpperm
    stvx_u        $out2,$x10,$out
    le?vperm    $out4,$out4,$out4,$inpperm
    stvx_u        $out3,$x20,$out
    le?vperm    $out5,$out5,$out5,$inpperm
    stvx_u        $out4,$x30,$out
    le?vperm    $out6,$out6,$out6,$inpperm
    stvx_u        $out5,$x40,$out
    le?vperm    $out7,$out7,$out7,$inpperm
    stvx_u        $out6,$x50,$out
    stvx_u        $out7,$x60,$out
    addi        $out,$out,0x70
    b        Lcbc_dec8x_done

.align    5
Lcbc_dec8x_six:
    vncipherlast    $out2,$out2,$ivec
    vncipherlast    $out3,$out3,$in2
    vncipherlast    $out4,$out4,$in3
    vncipherlast    $out5,$out5,$in4
    vncipherlast    $out6,$out6,$in5
    vncipherlast    $out7,$out7,$in6
    vmr        $ivec,$in7

    le?vperm    $out2,$out2,$out2,$inpperm
    le?vperm    $out3,$out3,$out3,$inpperm
    stvx_u        $out2,$x00,$out
    le?vperm    $out4,$out4,$out4,$inpperm
    stvx_u        $out3,$x10,$out
    le?vperm    $out5,$out5,$out5,$inpperm
    stvx_u        $out4,$x20,$out
    le?vperm    $out6,$out6,$out6,$inpperm
    stvx_u        $out5,$x30,$out
    le?vperm    $out7,$out7,$out7,$inpperm
    stvx_u        $out6,$x40,$out
    stvx_u        $out7,$x50,$out
    addi        $out,$out,0x60
    b        Lcbc_dec8x_done

.align    5
Lcbc_dec8x_five:
    vncipherlast    $out3,$out3,$ivec
    vncipherlast    $out4,$out4,$in3
    vncipherlast    $out5,$out5,$in4
    vncipherlast    $out6,$out6,$in5
    vncipherlast    $out7,$out7,$in6
    vmr        $ivec,$in7

    le?vperm    $out3,$out3,$out3,$inpperm
    le?vperm    $out4,$out4,$out4,$inpperm
    stvx_u        $out3,$x00,$out
    le?vperm    $out5,$out5,$out5,$inpperm
    stvx_u        $out4,$x10,$out
    le?vperm    $out6,$out6,$out6,$inpperm
    stvx_u        $out5,$x20,$out
    le?vperm    $out7,$out7,$out7,$inpperm
    stvx_u        $out6,$x30,$out
    stvx_u        $out7,$x40,$out
    addi        $out,$out,0x50
    b        Lcbc_dec8x_done

.align    5
Lcbc_dec8x_four:
    vncipherlast    $out4,$out4,$ivec
    vncipherlast    $out5,$out5,$in4
    vncipherlast    $out6,$out6,$in5
    vncipherlast    $out7,$out7,$in6
    vmr        $ivec,$in7

    le?vperm    $out4,$out4,$out4,$inpperm
    le?vperm    $out5,$out5,$out5,$inpperm
    stvx_u        $out4,$x00,$out
    le?vperm    $out6,$out6,$out6,$inpperm
    stvx_u        $out5,$x10,$out
    le?vperm    $out7,$out7,$out7,$inpperm
    stvx_u        $out6,$x20,$out
    stvx_u        $out7,$x30,$out
    addi        $out,$out,0x40
    b        Lcbc_dec8x_done

.align    5
Lcbc_dec8x_three:
    vncipherlast    $out5,$out5,$ivec
    vncipherlast    $out6,$out6,$in5
    vncipherlast    $out7,$out7,$in6
    vmr        $ivec,$in7

    le?vperm    $out5,$out5,$out5,$inpperm
    le?vperm    $out6,$out6,$out6,$inpperm
    stvx_u        $out5,$x00,$out
    le?vperm    $out7,$out7,$out7,$inpperm
    stvx_u        $out6,$x10,$out
    stvx_u        $out7,$x20,$out
    addi        $out,$out,0x30
    b        Lcbc_dec8x_done

.align    5
Lcbc_dec8x_two:
    vncipherlast    $out6,$out6,$ivec
    vncipherlast    $out7,$out7,$in6
    vmr        $ivec,$in7

    le?vperm    $out6,$out6,$out6,$inpperm
    le?vperm    $out7,$out7,$out7,$inpperm
    stvx_u        $out6,$x00,$out
    stvx_u        $out7,$x10,$out
    addi        $out,$out,0x20
    b        Lcbc_dec8x_done

.align    5
Lcbc_dec8x_one:
    vncipherlast    $out7,$out7,$ivec
    vmr        $ivec,$in7

    le?vperm    $out7,$out7,$out7,$inpperm
    stvx_u        $out7,0,$out
    addi        $out,$out,0x10

Lcbc_dec8x_done:
    le?vperm    $ivec,$ivec,$ivec,$inpperm
    stvx_u        $ivec,0,$ivp        # write [unaligned] iv

    li        r10,`$FRAME+15`
    li        r11,`$FRAME+31`
    stvx        $inpperm,r10,$sp    # wipe copies of round keys
    addi        r10,r10,32
    stvx        $inpperm,r11,$sp
    addi        r11,r11,32
    stvx        $inpperm,r10,$sp
    addi        r10,r10,32
    stvx        $inpperm,r11,$sp
    addi        r11,r11,32
    stvx        $inpperm,r10,$sp
    addi        r10,r10,32
    stvx        $inpperm,r11,$sp
    addi        r11,r11,32
    stvx        $inpperm,r10,$sp
    addi        r10,r10,32
    stvx        $inpperm,r11,$sp
    addi        r11,r11,32

    mtspr        256,$vrsave
    lvx        v20,r10,$sp        # ABI says so
    addi        r10,r10,32
    lvx        v21,r11,$sp
    addi        r11,r11,32
    lvx        v22,r10,$sp
    addi        r10,r10,32
    lvx        v23,r11,$sp
    addi        r11,r11,32
    lvx        v24,r10,$sp
    addi        r10,r10,32
    lvx        v25,r11,$sp
    addi        r11,r11,32
    lvx        v26,r10,$sp
    addi        r10,r10,32
    lvx        v27,r11,$sp
    addi        r11,r11,32
    lvx        v28,r10,$sp
    addi        r10,r10,32
    lvx        v29,r11,$sp
    addi        r11,r11,32
    lvx        v30,r10,$sp
    lvx        v31,r11,$sp
    $POP        r26,`$FRAME+21*16+0*$SIZE_T`($sp)
    $POP        r27,`$FRAME+21*16+1*$SIZE_T`($sp)
    $POP        r28,`$FRAME+21*16+2*$SIZE_T`($sp)
    $POP        r29,`$FRAME+21*16+3*$SIZE_T`($sp)
    $POP        r30,`$FRAME+21*16+4*$SIZE_T`($sp)
    $POP        r31,`$FRAME+21*16+5*$SIZE_T`($sp)
    addi        $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
    blr
    .long        0
    .byte        0,12,0x14,0,0x80,6,6,0
    .long        0
.size    .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
___
}}    }}}

#########################################################################
{{{    # CTR procedure[s]                        #

####################### WARNING: Here be dragons! #######################
#
# This code is written as 'ctr32', based on a 32-bit counter used
# upstream. The kernel does *not* use a 32-bit counter. The kernel uses
# a 128-bit counter.
#
# This leads to subtle changes from the upstream code: the counter
# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
# both the bulk (8 blocks at a time) path, and in the individual block
# path. Be aware of this when doing updates.
#
# See:
# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
# https://github.com/openssl/openssl/pull/8942
#
#########################################################################
my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
my ($rndkey0,$rndkey1,$inout,$tmp)=        map("v$_",(0..3));
my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
                        map("v$_",(4..11));
my $dat=$tmp;

$code.=<<___;
.globl    .${prefix}_ctr32_encrypt_blocks
    ${UCMP}i    $len,1
    bltlr-

    lis        r0,0xfff0
    mfspr        $vrsave,256
    mtspr        256,r0

    li        $idx,15
    vxor        $rndkey0,$rndkey0,$rndkey0
    le?vspltisb    $tmp,0x0f

    lvx        $ivec,0,$ivp        # load [unaligned] iv
    lvsl        $inpperm,0,$ivp
    lvx        $inptail,$idx,$ivp
     vspltisb    $one,1
    le?vxor        $inpperm,$inpperm,$tmp
    vperm        $ivec,$ivec,$inptail,$inpperm
     vsldoi        $one,$rndkey0,$one,1

    neg        r11,$inp
    ?lvsl        $keyperm,0,$key        # prepare for unaligned key
    lwz        $rounds,240($key)

    lvsr        $inpperm,0,r11        # prepare for unaligned load
    lvx        $inptail,0,$inp
    addi        $inp,$inp,15        # 15 is not typo
    le?vxor        $inpperm,$inpperm,$tmp

    srwi        $rounds,$rounds,1
    li        $idx,16
    subi        $rounds,$rounds,1

    ${UCMP}i    $len,8
    bge        _aesp8_ctr32_encrypt8x

    ?lvsr        $outperm,0,$out        # prepare for unaligned store
    vspltisb    $outmask,-1
    lvx        $outhead,0,$out
    ?vperm        $outmask,$rndkey0,$outmask,$outperm
    le?vxor        $outperm,$outperm,$tmp

    lvx        $rndkey0,0,$key
    mtctr        $rounds
    lvx        $rndkey1,$idx,$key
    addi        $idx,$idx,16
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vxor        $inout,$ivec,$rndkey0
    lvx        $rndkey0,$idx,$key
    addi        $idx,$idx,16
    b        Loop_ctr32_enc

.align    5
Loop_ctr32_enc:
    ?vperm        $rndkey1,$rndkey1,$rndkey0,$keyperm
    vcipher        $inout,$inout,$rndkey1
    lvx        $rndkey1,$idx,$key
    addi        $idx,$idx,16
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vcipher        $inout,$inout,$rndkey0
    lvx        $rndkey0,$idx,$key
    addi        $idx,$idx,16
    bdnz        Loop_ctr32_enc

    vadduqm        $ivec,$ivec,$one    # Kernel change for 128-bit
     vmr        $dat,$inptail
     lvx        $inptail,0,$inp
     addi        $inp,$inp,16
     subic.        $len,$len,1        # blocks--

    ?vperm        $rndkey1,$rndkey1,$rndkey0,$keyperm
    vcipher        $inout,$inout,$rndkey1
    lvx        $rndkey1,$idx,$key
     vperm        $dat,$dat,$inptail,$inpperm
     li        $idx,16
    ?vperm        $rndkey1,$rndkey0,$rndkey1,$keyperm
     lvx        $rndkey0,0,$key
    vxor        $dat,$dat,$rndkey1    # last round key
    vcipherlast    $inout,$inout,$dat

     lvx        $rndkey1,$idx,$key
     addi        $idx,$idx,16
    vperm        $inout,$inout,$inout,$outperm
    vsel        $dat,$outhead,$inout,$outmask
     mtctr        $rounds
     ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vmr        $outhead,$inout
     vxor        $inout,$ivec,$rndkey0
     lvx        $rndkey0,$idx,$key
     addi        $idx,$idx,16
    stvx        $dat,0,$out
    addi        $out,$out,16
    bne        Loop_ctr32_enc

    addi        $out,$out,-1
    lvx        $inout,0,$out        # redundant in aligned case
    vsel        $inout,$outhead,$inout,$outmask
    stvx        $inout,0,$out

    mtspr        256,$vrsave
    blr
    .long        0
    .byte        0,12,0x14,0,0,0,6,0
    .long        0
___
#########################################################################
{{    # Optimized CTR procedure                    #
my $key_="r11";
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
my $rndkey0="v23";    # v24-v25 rotating buffer for first found keys
            # v26-v31 last 6 round keys
my ($tmp,$keyperm)=($in3,$in4);    # aliases with "caller", redundant assignment
my ($two,$three,$four)=($outhead,$outperm,$outmask);

$code.=<<___;
.align    5
_aesp8_ctr32_encrypt8x:
    $STU        $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
    li        r10,`$FRAME+8*16+15`
    li        r11,`$FRAME+8*16+31`
    stvx        v20,r10,$sp        # ABI says so
    addi        r10,r10,32
    stvx        v21,r11,$sp
    addi        r11,r11,32
    stvx        v22,r10,$sp
    addi        r10,r10,32
    stvx        v23,r11,$sp
    addi        r11,r11,32
    stvx        v24,r10,$sp
    addi        r10,r10,32
    stvx        v25,r11,$sp
    addi        r11,r11,32
    stvx        v26,r10,$sp
    addi        r10,r10,32
    stvx        v27,r11,$sp
    addi        r11,r11,32
    stvx        v28,r10,$sp
    addi        r10,r10,32
    stvx        v29,r11,$sp
    addi        r11,r11,32
    stvx        v30,r10,$sp
    stvx        v31,r11,$sp
    li        r0,-1
    stw        $vrsave,`$FRAME+21*16-4`($sp)    # save vrsave
    li        $x10,0x10
    $PUSH        r26,`$FRAME+21*16+0*$SIZE_T`($sp)
    li        $x20,0x20
    $PUSH        r27,`$FRAME+21*16+1*$SIZE_T`($sp)
    li        $x30,0x30
    $PUSH        r28,`$FRAME+21*16+2*$SIZE_T`($sp)
    li        $x40,0x40
    $PUSH        r29,`$FRAME+21*16+3*$SIZE_T`($sp)
    li        $x50,0x50
    $PUSH        r30,`$FRAME+21*16+4*$SIZE_T`($sp)
    li        $x60,0x60
    $PUSH        r31,`$FRAME+21*16+5*$SIZE_T`($sp)
    li        $x70,0x70
    mtspr        256,r0

    subi        $rounds,$rounds,3    # -4 in total

    lvx        $rndkey0,$x00,$key    # load key schedule
    lvx        v30,$x10,$key
    addi        $key,$key,0x20
    lvx        v31,$x00,$key
    ?vperm        $rndkey0,$rndkey0,v30,$keyperm
    addi        $key_,$sp,$FRAME+15
    mtctr        $rounds

Load_ctr32_enc_key:
    ?vperm        v24,v30,v31,$keyperm
    lvx        v30,$x10,$key
    addi        $key,$key,0x20
    stvx        v24,$x00,$key_        # off-load round[1]
    ?vperm        v25,v31,v30,$keyperm
    lvx        v31,$x00,$key
    stvx        v25,$x10,$key_        # off-load round[2]
    addi        $key_,$key_,0x20
    bdnz        Load_ctr32_enc_key

    lvx        v26,$x10,$key
    ?vperm        v24,v30,v31,$keyperm
    lvx        v27,$x20,$key
    stvx        v24,$x00,$key_        # off-load round[3]
    ?vperm        v25,v31,v26,$keyperm
    lvx        v28,$x30,$key
    stvx        v25,$x10,$key_        # off-load round[4]
    addi        $key_,$sp,$FRAME+15    # rewind $key_
    ?vperm        v26,v26,v27,$keyperm
    lvx        v29,$x40,$key
    ?vperm        v27,v27,v28,$keyperm
    lvx        v30,$x50,$key
    ?vperm        v28,v28,v29,$keyperm
    lvx        v31,$x60,$key
    ?vperm        v29,v29,v30,$keyperm
    lvx        $out0,$x70,$key        # borrow $out0
    ?vperm        v30,v30,v31,$keyperm
    lvx        v24,$x00,$key_        # pre-load round[1]
    ?vperm        v31,v31,$out0,$keyperm
    lvx        v25,$x10,$key_        # pre-load round[2]

    vadduqm        $two,$one,$one
    subi        $inp,$inp,15        # undo "caller"
    $SHL        $len,$len,4

    vadduqm        $out1,$ivec,$one    # counter values ...
    vadduqm        $out2,$ivec,$two    # (do all ctr adds as 128-bit)
    vxor        $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
     le?li        $idx,8
    vadduqm        $out3,$out1,$two
    vxor        $out1,$out1,$rndkey0
     le?lvsl    $inpperm,0,$idx
    vadduqm        $out4,$out2,$two
    vxor        $out2,$out2,$rndkey0
     le?vspltisb    $tmp,0x0f
    vadduqm        $out5,$out3,$two
    vxor        $out3,$out3,$rndkey0
     le?vxor    $inpperm,$inpperm,$tmp    # transform for lvx_u/stvx_u
    vadduqm        $out6,$out4,$two
    vxor        $out4,$out4,$rndkey0
    vadduqm        $out7,$out5,$two
    vxor        $out5,$out5,$rndkey0
    vadduqm        $ivec,$out6,$two    # next counter value
    vxor        $out6,$out6,$rndkey0
    vxor        $out7,$out7,$rndkey0

    mtctr        $rounds
    b        Loop_ctr32_enc8x
.align    5
Loop_ctr32_enc8x:
    vcipher     $out0,$out0,v24
    vcipher     $out1,$out1,v24
    vcipher     $out2,$out2,v24
    vcipher     $out3,$out3,v24
    vcipher     $out4,$out4,v24
    vcipher     $out5,$out5,v24
    vcipher     $out6,$out6,v24
    vcipher     $out7,$out7,v24
Loop_ctr32_enc8x_middle:
    lvx        v24,$x20,$key_        # round[3]
    addi        $key_,$key_,0x20

    vcipher     $out0,$out0,v25
    vcipher     $out1,$out1,v25
    vcipher     $out2,$out2,v25
    vcipher     $out3,$out3,v25
    vcipher     $out4,$out4,v25
    vcipher     $out5,$out5,v25
    vcipher     $out6,$out6,v25
    vcipher     $out7,$out7,v25
    lvx        v25,$x10,$key_        # round[4]
    bdnz        Loop_ctr32_enc8x

    subic        r11,$len,256        # $len-256, borrow $key_
    vcipher     $out0,$out0,v24
    vcipher     $out1,$out1,v24
    vcipher     $out2,$out2,v24
    vcipher     $out3,$out3,v24
    vcipher     $out4,$out4,v24
    vcipher     $out5,$out5,v24
    vcipher     $out6,$out6,v24
    vcipher     $out7,$out7,v24

    subfe        r0,r0,r0        # borrow?-1:0
    vcipher     $out0,$out0,v25
    vcipher     $out1,$out1,v25
    vcipher     $out2,$out2,v25
    vcipher     $out3,$out3,v25
    vcipher     $out4,$out4,v25
    vcipher        $out5,$out5,v25
    vcipher        $out6,$out6,v25
    vcipher        $out7,$out7,v25

    and        r0,r0,r11
    addi        $key_,$sp,$FRAME+15    # rewind $key_
    vcipher        $out0,$out0,v26
    vcipher        $out1,$out1,v26
    vcipher        $out2,$out2,v26
    vcipher        $out3,$out3,v26
    vcipher        $out4,$out4,v26
    vcipher        $out5,$out5,v26
    vcipher        $out6,$out6,v26
    vcipher        $out7,$out7,v26
    lvx        v24,$x00,$key_        # re-pre-load round[1]

    subic        $len,$len,129        # $len-=129
    vcipher        $out0,$out0,v27
    addi        $len,$len,1        # $len-=128 really
    vcipher        $out1,$out1,v27
    vcipher        $out2,$out2,v27
    vcipher        $out3,$out3,v27
    vcipher        $out4,$out4,v27
    vcipher        $out5,$out5,v27
    vcipher        $out6,$out6,v27
    vcipher        $out7,$out7,v27
    lvx        v25,$x10,$key_        # re-pre-load round[2]

    vcipher        $out0,$out0,v28
     lvx_u        $in0,$x00,$inp        # load input
    vcipher        $out1,$out1,v28
     lvx_u        $in1,$x10,$inp
    vcipher        $out2,$out2,v28
     lvx_u        $in2,$x20,$inp
    vcipher        $out3,$out3,v28
     lvx_u        $in3,$x30,$inp
    vcipher        $out4,$out4,v28
     lvx_u        $in4,$x40,$inp
    vcipher        $out5,$out5,v28
     lvx_u        $in5,$x50,$inp
    vcipher        $out6,$out6,v28
     lvx_u        $in6,$x60,$inp
    vcipher        $out7,$out7,v28
     lvx_u        $in7,$x70,$inp
     addi        $inp,$inp,0x80

    vcipher        $out0,$out0,v29
     le?vperm    $in0,$in0,$in0,$inpperm
    vcipher        $out1,$out1,v29
     le?vperm    $in1,$in1,$in1,$inpperm
    vcipher        $out2,$out2,v29
     le?vperm    $in2,$in2,$in2,$inpperm
    vcipher        $out3,$out3,v29
     le?vperm    $in3,$in3,$in3,$inpperm
    vcipher        $out4,$out4,v29
     le?vperm    $in4,$in4,$in4,$inpperm
    vcipher        $out5,$out5,v29
     le?vperm    $in5,$in5,$in5,$inpperm
    vcipher        $out6,$out6,v29
     le?vperm    $in6,$in6,$in6,$inpperm
    vcipher        $out7,$out7,v29
     le?vperm    $in7,$in7,$in7,$inpperm

    add        $inp,$inp,r0        # $inp is adjusted in such
                        # way that at exit from the
                        # loop inX-in7 are loaded
                        # with last "words"
    subfe.        r0,r0,r0        # borrow?-1:0
    vcipher        $out0,$out0,v30
     vxor        $in0,$in0,v31        # xor with last round key
    vcipher        $out1,$out1,v30
     vxor        $in1,$in1,v31
    vcipher        $out2,$out2,v30
     vxor        $in2,$in2,v31
    vcipher        $out3,$out3,v30
     vxor        $in3,$in3,v31
    vcipher        $out4,$out4,v30
     vxor        $in4,$in4,v31
    vcipher        $out5,$out5,v30
     vxor        $in5,$in5,v31
    vcipher        $out6,$out6,v30
     vxor        $in6,$in6,v31
    vcipher        $out7,$out7,v30
     vxor        $in7,$in7,v31

    bne        Lctr32_enc8x_break    # did $len-129 borrow?

    vcipherlast    $in0,$out0,$in0
    vcipherlast    $in1,$out1,$in1
     vadduqm    $out1,$ivec,$one    # counter values ...
    vcipherlast    $in2,$out2,$in2
     vadduqm    $out2,$ivec,$two
     vxor        $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
    vcipherlast    $in3,$out3,$in3
     vadduqm    $out3,$out1,$two
     vxor        $out1,$out1,$rndkey0
    vcipherlast    $in4,$out4,$in4
     vadduqm    $out4,$out2,$two
     vxor        $out2,$out2,$rndkey0
    vcipherlast    $in5,$out5,$in5
     vadduqm    $out5,$out3,$two
     vxor        $out3,$out3,$rndkey0
    vcipherlast    $in6,$out6,$in6
     vadduqm    $out6,$out4,$two
     vxor        $out4,$out4,$rndkey0
    vcipherlast    $in7,$out7,$in7
     vadduqm    $out7,$out5,$two
     vxor        $out5,$out5,$rndkey0
    le?vperm    $in0,$in0,$in0,$inpperm
     vadduqm    $ivec,$out6,$two    # next counter value
     vxor        $out6,$out6,$rndkey0
    le?vperm    $in1,$in1,$in1,$inpperm
     vxor        $out7,$out7,$rndkey0
    mtctr        $rounds

     vcipher    $out0,$out0,v24
    stvx_u        $in0,$x00,$out
    le?vperm    $in2,$in2,$in2,$inpperm
     vcipher    $out1,$out1,v24
    stvx_u        $in1,$x10,$out
    le?vperm    $in3,$in3,$in3,$inpperm
     vcipher    $out2,$out2,v24
    stvx_u        $in2,$x20,$out
    le?vperm    $in4,$in4,$in4,$inpperm
     vcipher    $out3,$out3,v24
    stvx_u        $in3,$x30,$out
    le?vperm    $in5,$in5,$in5,$inpperm
     vcipher    $out4,$out4,v24
    stvx_u        $in4,$x40,$out
    le?vperm    $in6,$in6,$in6,$inpperm
     vcipher    $out5,$out5,v24
    stvx_u        $in5,$x50,$out
    le?vperm    $in7,$in7,$in7,$inpperm
     vcipher    $out6,$out6,v24
    stvx_u        $in6,$x60,$out
     vcipher    $out7,$out7,v24
    stvx_u        $in7,$x70,$out
    addi        $out,$out,0x80

    b        Loop_ctr32_enc8x_middle

.align    5
Lctr32_enc8x_break:
    cmpwi        $len,-0x60
    blt        Lctr32_enc8x_one
    nop
    beq        Lctr32_enc8x_two
    cmpwi        $len,-0x40
    blt        Lctr32_enc8x_three
    nop
    beq        Lctr32_enc8x_four
    cmpwi        $len,-0x20
    blt        Lctr32_enc8x_five
    nop
    beq        Lctr32_enc8x_six
    cmpwi        $len,0x00
    blt        Lctr32_enc8x_seven

Lctr32_enc8x_eight:
    vcipherlast    $out0,$out0,$in0
    vcipherlast    $out1,$out1,$in1
    vcipherlast    $out2,$out2,$in2
    vcipherlast    $out3,$out3,$in3
    vcipherlast    $out4,$out4,$in4
    vcipherlast    $out5,$out5,$in5
    vcipherlast    $out6,$out6,$in6
    vcipherlast    $out7,$out7,$in7

    le?vperm    $out0,$out0,$out0,$inpperm
    le?vperm    $out1,$out1,$out1,$inpperm
    stvx_u        $out0,$x00,$out
    le?vperm    $out2,$out2,$out2,$inpperm
    stvx_u        $out1,$x10,$out
    le?vperm    $out3,$out3,$out3,$inpperm
    stvx_u        $out2,$x20,$out
    le?vperm    $out4,$out4,$out4,$inpperm
    stvx_u        $out3,$x30,$out
    le?vperm    $out5,$out5,$out5,$inpperm
    stvx_u        $out4,$x40,$out
    le?vperm    $out6,$out6,$out6,$inpperm
    stvx_u        $out5,$x50,$out
    le?vperm    $out7,$out7,$out7,$inpperm
    stvx_u        $out6,$x60,$out
    stvx_u        $out7,$x70,$out
    addi        $out,$out,0x80
    b        Lctr32_enc8x_done

.align    5
Lctr32_enc8x_seven:
    vcipherlast    $out0,$out0,$in1
    vcipherlast    $out1,$out1,$in2
    vcipherlast    $out2,$out2,$in3
    vcipherlast    $out3,$out3,$in4
    vcipherlast    $out4,$out4,$in5
    vcipherlast    $out5,$out5,$in6
    vcipherlast    $out6,$out6,$in7

    le?vperm    $out0,$out0,$out0,$inpperm
    le?vperm    $out1,$out1,$out1,$inpperm
    stvx_u        $out0,$x00,$out
    le?vperm    $out2,$out2,$out2,$inpperm
    stvx_u        $out1,$x10,$out
    le?vperm    $out3,$out3,$out3,$inpperm
    stvx_u        $out2,$x20,$out
    le?vperm    $out4,$out4,$out4,$inpperm
    stvx_u        $out3,$x30,$out
    le?vperm    $out5,$out5,$out5,$inpperm
    stvx_u        $out4,$x40,$out
    le?vperm    $out6,$out6,$out6,$inpperm
    stvx_u        $out5,$x50,$out
    stvx_u        $out6,$x60,$out
    addi        $out,$out,0x70
    b        Lctr32_enc8x_done

.align    5
Lctr32_enc8x_six:
    vcipherlast    $out0,$out0,$in2
    vcipherlast    $out1,$out1,$in3
    vcipherlast    $out2,$out2,$in4
    vcipherlast    $out3,$out3,$in5
    vcipherlast    $out4,$out4,$in6
    vcipherlast    $out5,$out5,$in7

    le?vperm    $out0,$out0,$out0,$inpperm
    le?vperm    $out1,$out1,$out1,$inpperm
    stvx_u        $out0,$x00,$out
    le?vperm    $out2,$out2,$out2,$inpperm
    stvx_u        $out1,$x10,$out
    le?vperm    $out3,$out3,$out3,$inpperm
    stvx_u        $out2,$x20,$out
    le?vperm    $out4,$out4,$out4,$inpperm
    stvx_u        $out3,$x30,$out
    le?vperm    $out5,$out5,$out5,$inpperm
    stvx_u        $out4,$x40,$out
    stvx_u        $out5,$x50,$out
    addi        $out,$out,0x60
    b        Lctr32_enc8x_done

.align    5
Lctr32_enc8x_five:
    vcipherlast    $out0,$out0,$in3
    vcipherlast    $out1,$out1,$in4
    vcipherlast    $out2,$out2,$in5
    vcipherlast    $out3,$out3,$in6
    vcipherlast    $out4,$out4,$in7

    le?vperm    $out0,$out0,$out0,$inpperm
    le?vperm    $out1,$out1,$out1,$inpperm
    stvx_u        $out0,$x00,$out
    le?vperm    $out2,$out2,$out2,$inpperm
    stvx_u        $out1,$x10,$out
    le?vperm    $out3,$out3,$out3,$inpperm
    stvx_u        $out2,$x20,$out
    le?vperm    $out4,$out4,$out4,$inpperm
    stvx_u        $out3,$x30,$out
    stvx_u        $out4,$x40,$out
    addi        $out,$out,0x50
    b        Lctr32_enc8x_done

.align    5
Lctr32_enc8x_four:
    vcipherlast    $out0,$out0,$in4
    vcipherlast    $out1,$out1,$in5
    vcipherlast    $out2,$out2,$in6
    vcipherlast    $out3,$out3,$in7

    le?vperm    $out0,$out0,$out0,$inpperm
    le?vperm    $out1,$out1,$out1,$inpperm
    stvx_u        $out0,$x00,$out
    le?vperm    $out2,$out2,$out2,$inpperm
    stvx_u        $out1,$x10,$out
    le?vperm    $out3,$out3,$out3,$inpperm
    stvx_u        $out2,$x20,$out
    stvx_u        $out3,$x30,$out
    addi        $out,$out,0x40
    b        Lctr32_enc8x_done

.align    5
Lctr32_enc8x_three:
    vcipherlast    $out0,$out0,$in5
    vcipherlast    $out1,$out1,$in6
    vcipherlast    $out2,$out2,$in7

    le?vperm    $out0,$out0,$out0,$inpperm
    le?vperm    $out1,$out1,$out1,$inpperm
    stvx_u        $out0,$x00,$out
    le?vperm    $out2,$out2,$out2,$inpperm
    stvx_u        $out1,$x10,$out
    stvx_u        $out2,$x20,$out
    addi        $out,$out,0x30
    b        Lctr32_enc8x_done

.align    5
Lctr32_enc8x_two:
    vcipherlast    $out0,$out0,$in6
    vcipherlast    $out1,$out1,$in7

    le?vperm    $out0,$out0,$out0,$inpperm
    le?vperm    $out1,$out1,$out1,$inpperm
    stvx_u        $out0,$x00,$out
    stvx_u        $out1,$x10,$out
    addi        $out,$out,0x20
    b        Lctr32_enc8x_done

.align    5
Lctr32_enc8x_one:
    vcipherlast    $out0,$out0,$in7

    le?vperm    $out0,$out0,$out0,$inpperm
    stvx_u        $out0,0,$out
    addi        $out,$out,0x10

Lctr32_enc8x_done:
    li        r10,`$FRAME+15`
    li        r11,`$FRAME+31`
    stvx        $inpperm,r10,$sp    # wipe copies of round keys
    addi        r10,r10,32
    stvx        $inpperm,r11,$sp
    addi        r11,r11,32
    stvx        $inpperm,r10,$sp
    addi        r10,r10,32
    stvx        $inpperm,r11,$sp
    addi        r11,r11,32
    stvx        $inpperm,r10,$sp
    addi        r10,r10,32
    stvx        $inpperm,r11,$sp
    addi        r11,r11,32
    stvx        $inpperm,r10,$sp
    addi        r10,r10,32
    stvx        $inpperm,r11,$sp
    addi        r11,r11,32

    mtspr        256,$vrsave
    lvx        v20,r10,$sp        # ABI says so
    addi        r10,r10,32
    lvx        v21,r11,$sp
    addi        r11,r11,32
    lvx        v22,r10,$sp
    addi        r10,r10,32
    lvx        v23,r11,$sp
    addi        r11,r11,32
    lvx        v24,r10,$sp
    addi        r10,r10,32
    lvx        v25,r11,$sp
    addi        r11,r11,32
    lvx        v26,r10,$sp
    addi        r10,r10,32
    lvx        v27,r11,$sp
    addi        r11,r11,32
    lvx        v28,r10,$sp
    addi        r10,r10,32
    lvx        v29,r11,$sp
    addi        r11,r11,32
    lvx        v30,r10,$sp
    lvx        v31,r11,$sp
    $POP        r26,`$FRAME+21*16+0*$SIZE_T`($sp)
    $POP        r27,`$FRAME+21*16+1*$SIZE_T`($sp)
    $POP        r28,`$FRAME+21*16+2*$SIZE_T`($sp)
    $POP        r29,`$FRAME+21*16+3*$SIZE_T`($sp)
    $POP        r30,`$FRAME+21*16+4*$SIZE_T`($sp)
    $POP        r31,`$FRAME+21*16+5*$SIZE_T`($sp)
    addi        $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
    blr
    .long        0
    .byte        0,12,0x14,0,0x80,6,6,0
    .long        0
.size    .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
___
}}    }}}

#########################################################################
{{{    # XTS procedures                        #
# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,    #
#                             const AES_KEY *key1, const AES_KEY *key2,    #
#                             [const] unsigned char iv[16]);        #
# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which    #
# input tweak value is assumed to be encrypted already, and last tweak    #
# value, one suitable for consecutive call on same chunk of data, is    #
# written back to original buffer. In addition, in "tweak chaining"    #
# mode only complete input blocks are processed.            #

my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =    map("r$_",(3..10));
my ($rndkey0,$rndkey1,$inout) =                map("v$_",(0..2));
my ($output,$inptail,$inpperm,$leperm,$keyperm) =    map("v$_",(3..7));
my ($tweak,$seven,$eighty7,$tmp,$tweak1) =        map("v$_",(8..12));
my $taillen = $key2;

   ($inp,$idx) = ($idx,$inp);                # reassign

$code.=<<___;
.globl    .${prefix}_xts_encrypt
    mr        $inp,r3                # reassign
    li        r3,-1
    ${UCMP}i    $len,16
    bltlr-

    lis        r0,0xfff0
    mfspr        r12,256                # save vrsave
    li        r11,0
    mtspr        256,r0

    vspltisb    $seven,0x07            # 0x070707..07
    le?lvsl        $leperm,r11,r11
    le?vspltisb    $tmp,0x0f
    le?vxor        $leperm,$leperm,$seven

    li        $idx,15
    lvx        $tweak,0,$ivp            # load [unaligned] iv
    lvsl        $inpperm,0,$ivp
    lvx        $inptail,$idx,$ivp
    le?vxor        $inpperm,$inpperm,$tmp
    vperm        $tweak,$tweak,$inptail,$inpperm

    neg        r11,$inp
    lvsr        $inpperm,0,r11            # prepare for unaligned load
    lvx        $inout,0,$inp
    addi        $inp,$inp,15            # 15 is not typo
    le?vxor        $inpperm,$inpperm,$tmp

    ${UCMP}i    $key2,0                # key2==NULL?
    beq        Lxts_enc_no_key2

    ?lvsl        $keyperm,0,$key2        # prepare for unaligned key
    lwz        $rounds,240($key2)
    srwi        $rounds,$rounds,1
    subi        $rounds,$rounds,1
    li        $idx,16

    lvx        $rndkey0,0,$key2
    lvx        $rndkey1,$idx,$key2
    addi        $idx,$idx,16
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vxor        $tweak,$tweak,$rndkey0
    lvx        $rndkey0,$idx,$key2
    addi        $idx,$idx,16
    mtctr        $rounds

Ltweak_xts_enc:
    ?vperm        $rndkey1,$rndkey1,$rndkey0,$keyperm
    vcipher        $tweak,$tweak,$rndkey1
    lvx        $rndkey1,$idx,$key2
    addi        $idx,$idx,16
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vcipher        $tweak,$tweak,$rndkey0
    lvx        $rndkey0,$idx,$key2
    addi        $idx,$idx,16
    bdnz        Ltweak_xts_enc

    ?vperm        $rndkey1,$rndkey1,$rndkey0,$keyperm
    vcipher        $tweak,$tweak,$rndkey1
    lvx        $rndkey1,$idx,$key2
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vcipherlast    $tweak,$tweak,$rndkey0

    li        $ivp,0                # don't chain the tweak
    b        Lxts_enc

Lxts_enc_no_key2:
    li        $idx,-16
    and        $len,$len,$idx            # in "tweak chaining"
                            # mode only complete
                            # blocks are processed
Lxts_enc:
    lvx        $inptail,0,$inp
    addi        $inp,$inp,16

    ?lvsl        $keyperm,0,$key1        # prepare for unaligned key
    lwz        $rounds,240($key1)
    srwi        $rounds,$rounds,1
    subi        $rounds,$rounds,1
    li        $idx,16

    vslb        $eighty7,$seven,$seven        # 0x808080..80
    vor        $eighty7,$eighty7,$seven    # 0x878787..87
    vspltisb    $tmp,1                # 0x010101..01
    vsldoi        $eighty7,$eighty7,$tmp,15    # 0x870101..01

    ${UCMP}i    $len,96
    bge        _aesp8_xts_encrypt6x

    andi.        $taillen,$len,15
    subic        r0,$len,32
    subi        $taillen,$taillen,16
    subfe        r0,r0,r0
    and        r0,r0,$taillen
    add        $inp,$inp,r0

    lvx        $rndkey0,0,$key1
    lvx        $rndkey1,$idx,$key1
    addi        $idx,$idx,16
    vperm        $inout,$inout,$inptail,$inpperm
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vxor        $inout,$inout,$tweak
    vxor        $inout,$inout,$rndkey0
    lvx        $rndkey0,$idx,$key1
    addi        $idx,$idx,16
    mtctr        $rounds
    b        Loop_xts_enc

.align    5
Loop_xts_enc:
    ?vperm        $rndkey1,$rndkey1,$rndkey0,$keyperm
    vcipher        $inout,$inout,$rndkey1
    lvx        $rndkey1,$idx,$key1
    addi        $idx,$idx,16
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vcipher        $inout,$inout,$rndkey0
    lvx        $rndkey0,$idx,$key1
    addi        $idx,$idx,16
    bdnz        Loop_xts_enc

    ?vperm        $rndkey1,$rndkey1,$rndkey0,$keyperm
    vcipher        $inout,$inout,$rndkey1
    lvx        $rndkey1,$idx,$key1
    li        $idx,16
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vxor        $rndkey0,$rndkey0,$tweak
    vcipherlast    $output,$inout,$rndkey0

    le?vperm    $tmp,$output,$output,$leperm
    be?nop
    le?stvx_u    $tmp,0,$out
    be?stvx_u    $output,0,$out
    addi        $out,$out,16

    subic.        $len,$len,16
    beq        Lxts_enc_done

    vmr        $inout,$inptail
    lvx        $inptail,0,$inp
    addi        $inp,$inp,16
    lvx        $rndkey0,0,$key1
    lvx        $rndkey1,$idx,$key1
    addi        $idx,$idx,16

    subic        r0,$len,32
    subfe        r0,r0,r0
    and        r0,r0,$taillen
    add        $inp,$inp,r0

    vsrab        $tmp,$tweak,$seven        # next tweak value
    vaddubm        $tweak,$tweak,$tweak
    vsldoi        $tmp,$tmp,$tmp,15
    vand        $tmp,$tmp,$eighty7
    vxor        $tweak,$tweak,$tmp

    vperm        $inout,$inout,$inptail,$inpperm
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vxor        $inout,$inout,$tweak
    vxor        $output,$output,$rndkey0    # just in case $len<16
    vxor        $inout,$inout,$rndkey0
    lvx        $rndkey0,$idx,$key1
    addi        $idx,$idx,16

    mtctr        $rounds
    ${UCMP}i    $len,16
    bge        Loop_xts_enc

    vxor        $output,$output,$tweak
    lvsr        $inpperm,0,$len            # $inpperm is no longer needed
    vxor        $inptail,$inptail,$inptail    # $inptail is no longer needed
    vspltisb    $tmp,-1
    vperm        $inptail,$inptail,$tmp,$inpperm
    vsel        $inout,$inout,$output,$inptail

    subi        r11,$out,17
    subi        $out,$out,16
    mtctr        $len
    li        $len,16
Loop_xts_enc_steal:
    lbzu        r0,1(r11)
    stb        r0,16(r11)
    bdnz        Loop_xts_enc_steal

    mtctr        $rounds
    b        Loop_xts_enc            # one more time...

Lxts_enc_done:
    ${UCMP}i    $ivp,0
    beq        Lxts_enc_ret

    vsrab        $tmp,$tweak,$seven        # next tweak value
    vaddubm        $tweak,$tweak,$tweak
    vsldoi        $tmp,$tmp,$tmp,15
    vand        $tmp,$tmp,$eighty7
    vxor        $tweak,$tweak,$tmp

    le?vperm    $tweak,$tweak,$tweak,$leperm
    stvx_u        $tweak,0,$ivp

Lxts_enc_ret:
    mtspr        256,r12                # restore vrsave
    li        r3,0
    blr
    .long        0
    .byte        0,12,0x04,0,0x80,6,6,0
    .long        0
.size    .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt

.globl    .${prefix}_xts_decrypt
    mr        $inp,r3                # reassign
    li        r3,-1
    ${UCMP}i    $len,16
    bltlr-

    lis        r0,0xfff8
    mfspr        r12,256                # save vrsave
    li        r11,0
    mtspr        256,r0

    andi.        r0,$len,15
    neg        r0,r0
    andi.        r0,r0,16
    sub        $len,$len,r0

    vspltisb    $seven,0x07            # 0x070707..07
    le?lvsl        $leperm,r11,r11
    le?vspltisb    $tmp,0x0f
    le?vxor        $leperm,$leperm,$seven

    li        $idx,15
    lvx        $tweak,0,$ivp            # load [unaligned] iv
    lvsl        $inpperm,0,$ivp
    lvx        $inptail,$idx,$ivp
    le?vxor        $inpperm,$inpperm,$tmp
    vperm        $tweak,$tweak,$inptail,$inpperm

    neg        r11,$inp
    lvsr        $inpperm,0,r11            # prepare for unaligned load
    lvx        $inout,0,$inp
    addi        $inp,$inp,15            # 15 is not typo
    le?vxor        $inpperm,$inpperm,$tmp

    ${UCMP}i    $key2,0                # key2==NULL?
    beq        Lxts_dec_no_key2

    ?lvsl        $keyperm,0,$key2        # prepare for unaligned key
    lwz        $rounds,240($key2)
    srwi        $rounds,$rounds,1
    subi        $rounds,$rounds,1
    li        $idx,16

    lvx        $rndkey0,0,$key2
    lvx        $rndkey1,$idx,$key2
    addi        $idx,$idx,16
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vxor        $tweak,$tweak,$rndkey0
    lvx        $rndkey0,$idx,$key2
    addi        $idx,$idx,16
    mtctr        $rounds

Ltweak_xts_dec:
    ?vperm        $rndkey1,$rndkey1,$rndkey0,$keyperm
    vcipher        $tweak,$tweak,$rndkey1
    lvx        $rndkey1,$idx,$key2
    addi        $idx,$idx,16
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vcipher        $tweak,$tweak,$rndkey0
    lvx        $rndkey0,$idx,$key2
    addi        $idx,$idx,16
    bdnz        Ltweak_xts_dec

    ?vperm        $rndkey1,$rndkey1,$rndkey0,$keyperm
    vcipher        $tweak,$tweak,$rndkey1
    lvx        $rndkey1,$idx,$key2
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vcipherlast    $tweak,$tweak,$rndkey0

    li        $ivp,0                # don't chain the tweak
    b        Lxts_dec

Lxts_dec_no_key2:
    neg        $idx,$len
    andi.        $idx,$idx,15
    add        $len,$len,$idx            # in "tweak chaining"
                            # mode only complete
                            # blocks are processed
Lxts_dec:
    lvx        $inptail,0,$inp
    addi        $inp,$inp,16

    ?lvsl        $keyperm,0,$key1        # prepare for unaligned key
    lwz        $rounds,240($key1)
    srwi        $rounds,$rounds,1
    subi        $rounds,$rounds,1
    li        $idx,16

    vslb        $eighty7,$seven,$seven        # 0x808080..80
    vor        $eighty7,$eighty7,$seven    # 0x878787..87
    vspltisb    $tmp,1                # 0x010101..01
    vsldoi        $eighty7,$eighty7,$tmp,15    # 0x870101..01

    ${UCMP}i    $len,96
    bge        _aesp8_xts_decrypt6x

    lvx        $rndkey0,0,$key1
    lvx        $rndkey1,$idx,$key1
    addi        $idx,$idx,16
    vperm        $inout,$inout,$inptail,$inpperm
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vxor        $inout,$inout,$tweak
    vxor        $inout,$inout,$rndkey0
    lvx        $rndkey0,$idx,$key1
    addi        $idx,$idx,16
    mtctr        $rounds

    ${UCMP}i    $len,16
    blt        Ltail_xts_dec
    be?b        Loop_xts_dec

.align    5
Loop_xts_dec:
    ?vperm        $rndkey1,$rndkey1,$rndkey0,$keyperm
    vncipher    $inout,$inout,$rndkey1
    lvx        $rndkey1,$idx,$key1
    addi        $idx,$idx,16
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vncipher    $inout,$inout,$rndkey0
    lvx        $rndkey0,$idx,$key1
    addi        $idx,$idx,16
    bdnz        Loop_xts_dec

    ?vperm        $rndkey1,$rndkey1,$rndkey0,$keyperm
    vncipher    $inout,$inout,$rndkey1
    lvx        $rndkey1,$idx,$key1
    li        $idx,16
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vxor        $rndkey0,$rndkey0,$tweak
    vncipherlast    $output,$inout,$rndkey0

    le?vperm    $tmp,$output,$output,$leperm
    be?nop
    le?stvx_u    $tmp,0,$out
    be?stvx_u    $output,0,$out
    addi        $out,$out,16

    subic.        $len,$len,16
    beq        Lxts_dec_done

    vmr        $inout,$inptail
    lvx        $inptail,0,$inp
    addi        $inp,$inp,16
    lvx        $rndkey0,0,$key1
    lvx        $rndkey1,$idx,$key1
    addi        $idx,$idx,16

    vsrab        $tmp,$tweak,$seven        # next tweak value
    vaddubm        $tweak,$tweak,$tweak
    vsldoi        $tmp,$tmp,$tmp,15
    vand        $tmp,$tmp,$eighty7
    vxor        $tweak,$tweak,$tmp

    vperm        $inout,$inout,$inptail,$inpperm
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vxor        $inout,$inout,$tweak
    vxor        $inout,$inout,$rndkey0
    lvx        $rndkey0,$idx,$key1
    addi        $idx,$idx,16

    mtctr        $rounds
    ${UCMP}i    $len,16
    bge        Loop_xts_dec

Ltail_xts_dec:
    vsrab        $tmp,$tweak,$seven        # next tweak value
    vaddubm        $tweak1,$tweak,$tweak
    vsldoi        $tmp,$tmp,$tmp,15
    vand        $tmp,$tmp,$eighty7
    vxor        $tweak1,$tweak1,$tmp

    subi        $inp,$inp,16
    add        $inp,$inp,$len

    vxor        $inout,$inout,$tweak        # :-(
    vxor        $inout,$inout,$tweak1        # :-)

Loop_xts_dec_short:
    ?vperm        $rndkey1,$rndkey1,$rndkey0,$keyperm
    vncipher    $inout,$inout,$rndkey1
    lvx        $rndkey1,$idx,$key1
    addi        $idx,$idx,16
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vncipher    $inout,$inout,$rndkey0
    lvx        $rndkey0,$idx,$key1
    addi        $idx,$idx,16
    bdnz        Loop_xts_dec_short

    ?vperm        $rndkey1,$rndkey1,$rndkey0,$keyperm
    vncipher    $inout,$inout,$rndkey1
    lvx        $rndkey1,$idx,$key1
    li        $idx,16
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm
    vxor        $rndkey0,$rndkey0,$tweak1
    vncipherlast    $output,$inout,$rndkey0

    le?vperm    $tmp,$output,$output,$leperm
    be?nop
    le?stvx_u    $tmp,0,$out
    be?stvx_u    $output,0,$out

    vmr        $inout,$inptail
    lvx        $inptail,0,$inp
    #addi        $inp,$inp,16
    lvx        $rndkey0,0,$key1
    lvx        $rndkey1,$idx,$key1
    addi        $idx,$idx,16
    vperm        $inout,$inout,$inptail,$inpperm
    ?vperm        $rndkey0,$rndkey0,$rndkey1,$keyperm

    lvsr        $inpperm,0,$len            # $inpperm is no longer needed
    vxor        $inptail,$inptail,$inptail    # $inptail is no longer needed
    vspltisb    $tmp,-1
    vperm        $inptail,$inptail,$tmp,$inpperm
    vsel        $inout,$inout,$output,$inptail

    vxor        $rndkey0,$rndkey0,$tweak
    vxor        $inout,$inout,$rndkey0
    lvx        $rndkey0,$idx,$key1
    addi        $idx,$idx,16

    subi        r11,$out,1
    mtctr        $len
    li        $len,16
Loop_xts_dec_steal:
    lbzu        r0,1(r11)
    stb        r0,16(r11)
    bdnz        Loop_xts_dec_steal

    mtctr        $rounds
    b        Loop_xts_dec            # one more time...

Lxts_dec_done:
    ${UCMP}i    $ivp,0
    beq        Lxts_dec_ret

    vsrab        $tmp,$tweak,$seven        # next tweak value
    vaddubm        $tweak,$tweak,$tweak
    vsldoi        $tmp,$tmp,$tmp,15
    vand        $tmp,$tmp,$eighty7
    vxor        $tweak,$tweak,$tmp

    le?vperm    $tweak,$tweak,$tweak,$leperm
    stvx_u        $tweak,0,$ivp

Lxts_dec_ret:
    mtspr        256,r12                # restore vrsave
    li        r3,0
    blr
    .long        0
    .byte        0,12,0x04,0,0x80,6,6,0
    .long        0
.size    .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
___
#########################################################################
{{    # Optimized XTS procedures                    #
my $key_=$key2;
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
    $x00=0 if ($flavour =~ /osx/);
my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
my $rndkey0="v23";    # v24-v25 rotating buffer for first found keys
            # v26-v31 last 6 round keys
my ($keyperm)=($out0);    # aliases with "caller", redundant assignment
my $taillen=$x70;

$code.=<<___;
.align    5
_aesp8_xts_encrypt6x:
    $STU        $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
    mflr        r11
    li        r7,`$FRAME+8*16+15`
    li        r3,`$FRAME+8*16+31`
    $PUSH        r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
    stvx        v20,r7,$sp        # ABI says so
    addi        r7,r7,32
    stvx        v21,r3,$sp
    addi        r3,r3,32
    stvx        v22,r7,$sp
    addi        r7,r7,32
    stvx        v23,r3,$sp
    addi        r3,r3,32
    stvx        v24,r7,$sp
    addi        r7,r7,32
    stvx        v25,r3,$sp
    addi        r3,r3,32
    stvx        v26,r7,$sp
    addi        r7,r7,32
    stvx        v27,r3,$sp
    addi        r3,r3,32
    stvx        v28,r7,$sp
    addi        r7,r7,32
    stvx        v29,r3,$sp
    addi        r3,r3,32
    stvx        v30,r7,$sp
    stvx        v31,r3,$sp
    li        r0,-1
    stw        $vrsave,`$FRAME+21*16-4`($sp)    # save vrsave
    li        $x10,0x10
    $PUSH        r26,`$FRAME+21*16+0*$SIZE_T`($sp)
    li        $x20,0x20
    $PUSH        r27,`$FRAME+21*16+1*$SIZE_T`($sp)
    li        $x30,0x30
    $PUSH        r28,`$FRAME+21*16+2*$SIZE_T`($sp)
    li        $x40,0x40
    $PUSH        r29,`$FRAME+21*16+3*$SIZE_T`($sp)
    li        $x50,0x50
    $PUSH        r30,`$FRAME+21*16+4*$SIZE_T`($sp)
    li        $x60,0x60
    $PUSH        r31,`$FRAME+21*16+5*$SIZE_T`($sp)
    li        $x70,0x70
    mtspr        256,r0

    xxlor        2, 32+$eighty7, 32+$eighty7
    vsldoi        $eighty7,$tmp,$eighty7,1        # 0x010101..87
    xxlor        1, 32+$eighty7, 32+$eighty7

    # Load XOR Lconsts.
    mr        $x70, r6
    bl        Lconsts
    lxvw4x        0, $x40, r6        # load XOR contents
    mr        r6, $x70
    li        $x70,0x70

    subi        $rounds,$rounds,3    # -4 in total

    lvx        $rndkey0,$x00,$key1    # load key schedule
    lvx        v30,$x10,$key1
    addi        $key1,$key1,0x20
    lvx        v31,$x00,$key1
    ?vperm        $rndkey0,$rndkey0,v30,$keyperm
    addi        $key_,$sp,$FRAME+15
    mtctr        $rounds

Load_xts_enc_key:
    ?vperm        v24,v30,v31,$keyperm
    lvx        v30,$x10,$key1
    addi        $key1,$key1,0x20
    stvx        v24,$x00,$key_        # off-load round[1]
    ?vperm        v25,v31,v30,$keyperm
    lvx        v31,$x00,$key1
    stvx        v25,$x10,$key_        # off-load round[2]
    addi        $key_,$key_,0x20
    bdnz        Load_xts_enc_key

    lvx        v26,$x10,$key1
    ?vperm        v24,v30,v31,$keyperm
    lvx        v27,$x20,$key1
    stvx        v24,$x00,$key_        # off-load round[3]
    ?vperm        v25,v31,v26,$keyperm
    lvx        v28,$x30,$key1
    stvx        v25,$x10,$key_        # off-load round[4]
    addi        $key_,$sp,$FRAME+15    # rewind $key_
    ?vperm        v26,v26,v27,$keyperm
    lvx        v29,$x40,$key1
    ?vperm        v27,v27,v28,$keyperm
    lvx        v30,$x50,$key1
    ?vperm        v28,v28,v29,$keyperm
    lvx        v31,$x60,$key1
    ?vperm        v29,v29,v30,$keyperm
    lvx        $twk5,$x70,$key1    # borrow $twk5
    ?vperm        v30,v30,v31,$keyperm
    lvx        v24,$x00,$key_        # pre-load round[1]
    ?vperm        v31,v31,$twk5,$keyperm
    lvx        v25,$x10,$key_        # pre-load round[2]

    # Switch to use the following codes with 0x010101..87 to generate tweak.
    #     eighty7 = 0x010101..87
    # vsrab         tmp, tweak, seven       # next tweak value, right shift 7 bits
    # vand          tmp, tmp, eighty7       # last byte with carry
    # vaddubm       tweak, tweak, tweak     # left shift 1 bit (x2)
    # xxlor         vsx, 0, 0
    # vpermxor      tweak, tweak, tmp, vsx

     vperm        $in0,$inout,$inptail,$inpperm
     subi        $inp,$inp,31        # undo "caller"
    vxor        $twk0,$tweak,$rndkey0
    vsrab        $tmp,$tweak,$seven    # next tweak value
    vaddubm        $tweak,$tweak,$tweak
    vand        $tmp,$tmp,$eighty7
     vxor        $out0,$in0,$twk0
    xxlor        32+$in1, 0, 0
    vpermxor    $tweak, $tweak, $tmp, $in1

     lvx_u        $in1,$x10,$inp
    vxor        $twk1,$tweak,$rndkey0
    vsrab        $tmp,$tweak,$seven    # next tweak value
    vaddubm        $tweak,$tweak,$tweak
     le?vperm    $in1,$in1,$in1,$leperm
    vand        $tmp,$tmp,$eighty7
     vxor        $out1,$in1,$twk1
    xxlor        32+$in2, 0, 0
    vpermxor    $tweak, $tweak, $tmp, $in2

     lvx_u        $in2,$x20,$inp
     andi.        $taillen,$len,15
    vxor        $twk2,$tweak,$rndkey0
    vsrab        $tmp,$tweak,$seven    # next tweak value
    vaddubm        $tweak,$tweak,$tweak
     le?vperm    $in2,$in2,$in2,$leperm
    vand        $tmp,$tmp,$eighty7
     vxor        $out2,$in2,$twk2
    xxlor        32+$in3, 0, 0
    vpermxor    $tweak, $tweak, $tmp, $in3

     lvx_u        $in3,$x30,$inp
     sub        $len,$len,$taillen
    vxor        $twk3,$tweak,$rndkey0
    vsrab        $tmp,$tweak,$seven    # next tweak value
    vaddubm        $tweak,$tweak,$tweak
     le?vperm    $in3,$in3,$in3,$leperm
    vand        $tmp,$tmp,$eighty7
     vxor        $out3,$in3,$twk3
    xxlor        32+$in4, 0, 0
    vpermxor    $tweak, $tweak, $tmp, $in4

     lvx_u        $in4,$x40,$inp
     subi        $len,$len,0x60
    vxor        $twk4,$tweak,$rndkey0
    vsrab        $tmp,$tweak,$seven    # next tweak value
    vaddubm        $tweak,$tweak,$tweak
     le?vperm    $in4,$in4,$in4,$leperm
    vand        $tmp,$tmp,$eighty7
     vxor        $out4,$in4,$twk4
    xxlor        32+$in5, 0, 0
    vpermxor    $tweak, $tweak, $tmp, $in5

     lvx_u        $in5,$x50,$inp
     addi        $inp,$inp,0x60
    vxor        $twk5,$tweak,$rndkey0
    vsrab        $tmp,$tweak,$seven    # next tweak value
    vaddubm        $tweak,$tweak,$tweak
     le?vperm    $in5,$in5,$in5,$leperm
    vand        $tmp,$tmp,$eighty7
     vxor        $out5,$in5,$twk5
    xxlor        32+$in0, 0, 0
    vpermxor    $tweak, $tweak, $tmp, $in0

    vxor        v31,v31,$rndkey0
    mtctr        $rounds
    b        Loop_xts_enc6x

.align    5
Loop_xts_enc6x:
    vcipher        $out0,$out0,v24
    vcipher        $out1,$out1,v24
    vcipher        $out2,$out2,v24
    vcipher        $out3,$out3,v24
    vcipher        $out4,$out4,v24
    vcipher        $out5,$out5,v24
    lvx        v24,$x20,$key_        # round[3]
    addi        $key_,$key_,0x20

    vcipher        $out0,$out0,v25
    vcipher        $out1,$out1,v25
    vcipher        $out2,$out2,v25
    vcipher        $out3,$out3,v25
    vcipher        $out4,$out4,v25
    vcipher        $out5,$out5,v25
    lvx        v25,$x10,$key_        # round[4]
    bdnz        Loop_xts_enc6x

    xxlor        32+$eighty7, 1, 1    # 0x010101..87

    subic        $len,$len,96        # $len-=96
     vxor        $in0,$twk0,v31        # xor with last round key
    vcipher        $out0,$out0,v24
    vcipher        $out1,$out1,v24
     vsrab        $tmp,$tweak,$seven    # next tweak value
     vxor        $twk0,$tweak,$rndkey0
     vaddubm    $tweak,$tweak,$tweak
    vcipher        $out2,$out2,v24
    vcipher        $out3,$out3,v24
    vcipher        $out4,$out4,v24
    vcipher        $out5,$out5,v24

    subfe.        r0,r0,r0        # borrow?-1:0
     vand        $tmp,$tmp,$eighty7
    vcipher        $out0,$out0,v25
    vcipher        $out1,$out1,v25
     xxlor        32+$in1, 0, 0
     vpermxor    $tweak, $tweak, $tmp, $in1
    vcipher        $out2,$out2,v25
    vcipher        $out3,$out3,v25
     vxor        $in1,$twk1,v31
     vsrab        $tmp,$tweak,$seven    # next tweak value
     vxor        $twk1,$tweak,$rndkey0
    vcipher        $out4,$out4,v25
    vcipher        $out5,$out5,v25

    and        r0,r0,$len
     vaddubm    $tweak,$tweak,$tweak
    vcipher        $out0,$out0,v26
    vcipher        $out1,$out1,v26
     vand        $tmp,$tmp,$eighty7
    vcipher        $out2,$out2,v26
    vcipher        $out3,$out3,v26
     xxlor        32+$in2, 0, 0
     vpermxor    $tweak, $tweak, $tmp, $in2
    vcipher        $out4,$out4,v26
    vcipher        $out5,$out5,v26

    add        $inp,$inp,r0        # $inp is adjusted in such
                        # way that at exit from the
                        # loop inX-in5 are loaded
                        # with last "words"
     vxor        $in2,$twk2,v31
     vsrab        $tmp,$tweak,$seven    # next tweak value
     vxor        $twk2,$tweak,$rndkey0
     vaddubm    $tweak,$tweak,$tweak
    vcipher        $out0,$out0,v27
    vcipher        $out1,$out1,v27
    vcipher        $out2,$out2,v27
    vcipher        $out3,$out3,v27
     vand        $tmp,$tmp,$eighty7
    vcipher        $out4,$out4,v27
    vcipher        $out5,$out5,v27

    addi        $key_,$sp,$FRAME+15    # rewind $key_
     xxlor        32+$in3, 0, 0
     vpermxor    $tweak, $tweak, $tmp, $in3
    vcipher        $out0,$out0,v28
    vcipher        $out1,$out1,v28
     vxor        $in3,$twk3,v31
     vsrab        $tmp,$tweak,$seven    # next tweak value
     vxor        $twk3,$tweak,$rndkey0
    vcipher        $out2,$out2,v28
    vcipher        $out3,$out3,v28
     vaddubm    $tweak,$tweak,$tweak
    vcipher        $out4,$out4,v28
    vcipher        $out5,$out5,v28
    lvx        v24,$x00,$key_        # re-pre-load round[1]
     vand        $tmp,$tmp,$eighty7

    vcipher        $out0,$out0,v29
    vcipher        $out1,$out1,v29
     xxlor        32+$in4, 0, 0
     vpermxor    $tweak, $tweak, $tmp, $in4
    vcipher        $out2,$out2,v29
    vcipher        $out3,$out3,v29
     vxor        $in4,$twk4,v31
     vsrab        $tmp,$tweak,$seven    # next tweak value
     vxor        $twk4,$tweak,$rndkey0
    vcipher        $out4,$out4,v29
    vcipher        $out5,$out5,v29
    lvx        v25,$x10,$key_        # re-pre-load round[2]
     vaddubm    $tweak,$tweak,$tweak

    vcipher        $out0,$out0,v30
    vcipher        $out1,$out1,v30
     vand        $tmp,$tmp,$eighty7
    vcipher        $out2,$out2,v30
    vcipher        $out3,$out3,v30
     xxlor        32+$in5, 0, 0
     vpermxor    $tweak, $tweak, $tmp, $in5
    vcipher        $out4,$out4,v30
    vcipher        $out5,$out5,v30
     vxor        $in5,$twk5,v31
     vsrab        $tmp,$tweak,$seven    # next tweak value
     vxor        $twk5,$tweak,$rndkey0

    vcipherlast    $out0,$out0,$in0
     lvx_u        $in0,$x00,$inp        # load next input block
     vaddubm    $tweak,$tweak,$tweak
    vcipherlast    $out1,$out1,$in1
     lvx_u        $in1,$x10,$inp
    vcipherlast    $out2,$out2,$in2
     le?vperm    $in0,$in0,$in0,$leperm
     lvx_u        $in2,$x20,$inp
     vand        $tmp,$tmp,$eighty7
    vcipherlast    $out3,$out3,$in3
     le?vperm    $in1,$in1,$in1,$leperm
     lvx_u        $in3,$x30,$inp
    vcipherlast    $out4,$out4,$in4
     le?vperm    $in2,$in2,$in2,$leperm
     lvx_u        $in4,$x40,$inp
     xxlor        10, 32+$in0, 32+$in0
     xxlor        32+$in0, 0, 0
     vpermxor    $tweak, $tweak, $tmp, $in0
     xxlor        32+$in0, 10, 10
    vcipherlast    $tmp,$out5,$in5        # last block might be needed
                        # in stealing mode
     le?vperm    $in3,$in3,$in3,$leperm
     lvx_u        $in5,$x50,$inp
     addi        $inp,$inp,0x60
     le?vperm    $in4,$in4,$in4,$leperm
     le?vperm    $in5,$in5,$in5,$leperm

    le?vperm    $out0,$out0,$out0,$leperm
    le?vperm    $out1,$out1,$out1,$leperm
    stvx_u        $out0,$x00,$out        # store output
     vxor        $out0,$in0,$twk0
    le?vperm    $out2,$out2,$out2,$leperm
    stvx_u        $out1,$x10,$out
     vxor        $out1,$in1,$twk1
    le?vperm    $out3,$out3,$out3,$leperm
    stvx_u        $out2,$x20,$out
     vxor        $out2,$in2,$twk2
    le?vperm    $out4,$out4,$out4,$leperm
    stvx_u        $out3,$x30,$out
     vxor        $out3,$in3,$twk3
    le?vperm    $out5,$tmp,$tmp,$leperm
    stvx_u        $out4,$x40,$out
     vxor        $out4,$in4,$twk4
    le?stvx_u    $out5,$x50,$out
    be?stvx_u    $tmp, $x50,$out
     vxor        $out5,$in5,$twk5
    addi        $out,$out,0x60

    mtctr        $rounds
    beq        Loop_xts_enc6x        # did $len-=96 borrow?

    xxlor        32+$eighty7, 2, 2    # 0x010101..87

    addic.        $len,$len,0x60
    beq        Lxts_enc6x_zero
    cmpwi        $len,0x20
    blt        Lxts_enc6x_one
    nop
    beq        Lxts_enc6x_two
    cmpwi        $len,0x40
    blt        Lxts_enc6x_three
    nop
    beq        Lxts_enc6x_four

Lxts_enc6x_five:
    vxor        $out0,$in1,$twk0
    vxor        $out1,$in2,$twk1
    vxor        $out2,$in3,$twk2
    vxor        $out3,$in4,$twk3
    vxor        $out4,$in5,$twk4

    bl        _aesp8_xts_enc5x

    le?vperm    $out0,$out0,$out0,$leperm
    vmr        $twk0,$twk5        # unused tweak
    le?vperm    $out1,$out1,$out1,$leperm
    stvx_u        $out0,$x00,$out        # store output
    le?vperm    $out2,$out2,$out2,$leperm
    stvx_u        $out1,$x10,$out
    le?vperm    $out3,$out3,$out3,$leperm
    stvx_u        $out2,$x20,$out
    vxor        $tmp,$out4,$twk5    # last block prep for stealing
    le?vperm    $out4,$out4,$out4,$leperm
    stvx_u        $out3,$x30,$out
    stvx_u        $out4,$x40,$out
    addi        $out,$out,0x50
    bne        Lxts_enc6x_steal
    b        Lxts_enc6x_done

.align    4
Lxts_enc6x_four:
    vxor        $out0,$in2,$twk0
    vxor        $out1,$in3,$twk1
    vxor        $out2,$in4,$twk2
    vxor        $out3,$in5,$twk3
    vxor        $out4,$out4,$out4

    bl        _aesp8_xts_enc5x

    le?vperm    $out0,$out0,$out0,$leperm
    vmr        $twk0,$twk4        # unused tweak
    le?vperm    $out1,$out1,$out1,$leperm
    stvx_u        $out0,$x00,$out        # store output
    le?vperm    $out2,$out2,$out2,$leperm
    stvx_u        $out1,$x10,$out
    vxor        $tmp,$out3,$twk4    # last block prep for stealing
    le?vperm    $out3,$out3,$out3,$leperm
    stvx_u        $out2,$x20,$out
    stvx_u        $out3,$x30,$out
    addi        $out,$out,0x40
    bne        Lxts_enc6x_steal
    b        Lxts_enc6x_done

.align    4
Lxts_enc6x_three:
    vxor        $out0,$in3,$twk0
    vxor        $out1,$in4,$twk1
    vxor        $out2,$in5,$twk2
    vxor        $out3,$out3,$out3
    vxor        $out4,$out4,$out4

    bl        _aesp8_xts_enc5x

    le?vperm    $out0,$out0,$out0,$leperm
    vmr        $twk0,$twk3        # unused tweak
    le?vperm    $out1,$out1,$out1,$leperm
    stvx_u        $out0,$x00,$out        # store output
    vxor        $tmp,$out2,$twk3    # last block prep for stealing
    le?vperm    $out2,$out2,$out2,$leperm
    stvx_u        $out1,$x10,$out
    stvx_u        $out2,$x20,$out
    addi        $out,$out,0x30
    bne        Lxts_enc6x_steal
    b        Lxts_enc6x_done

.align    4
Lxts_enc6x_two:
    vxor        $out0,$in4,$twk0
    vxor        $out1,$in5,$twk1
    vxor        $out2,$out2,$out2
    vxor        $out3,$out3,$out3
    vxor        $out4,$out4,$out4

    bl        _aesp8_xts_enc5x

    le?vperm    $out0,$out0,$out0,$leperm
    vmr        $twk0,$twk2        # unused tweak
    vxor        $tmp,$out1,$twk2    # last block prep for stealing
    le?vperm    $out1,$out1,$out1,$leperm
    stvx_u        $out0,$x00,$out        # store output
    stvx_u        $out1,$x10,$out
    addi        $out,$out,0x20
    bne        Lxts_enc6x_steal
    b        Lxts_enc6x_done

.align    4
Lxts_enc6x_one:
    vxor        $out0,$in5,$twk0
    nop
Loop_xts_enc1x:
    vcipher        $out0,$out0,v24
    lvx        v24,$x20,$key_        # round[3]
    addi        $key_,$key_,0x20

    vcipher        $out0,$out0,v25
    lvx        v25,$x10,$key_        # round[4]
    bdnz        Loop_xts_enc1x

    add        $inp,$inp,$taillen
    cmpwi        $taillen,0
    vcipher        $out0,$out0,v24

    subi        $inp,$inp,16
    vcipher        $out0,$out0,v25

    lvsr        $inpperm,0,$taillen
    vcipher        $out0,$out0,v26

    lvx_u        $in0,0,$inp
    vcipher        $out0,$out0,v27

    addi        $key_,$sp,$FRAME+15    # rewind $key_
    vcipher        $out0,$out0,v28
    lvx        v24,$x00,$key_        # re-pre-load round[1]

    vcipher        $out0,$out0,v29
    lvx        v25,$x10,$key_        # re-pre-load round[2]
     vxor        $twk0,$twk0,v31

    le?vperm    $in0,$in0,$in0,$leperm
    vcipher        $out0,$out0,v30

    vperm        $in0,$in0,$in0,$inpperm
    vcipherlast    $out0,$out0,$twk0

    vmr        $twk0,$twk1        # unused tweak
    vxor        $tmp,$out0,$twk1    # last block prep for stealing
    le?vperm    $out0,$out0,$out0,$leperm
    stvx_u        $out0,$x00,$out        # store output
    addi        $out,$out,0x10
    bne        Lxts_enc6x_steal
    b        Lxts_enc6x_done

.align    4
Lxts_enc6x_zero:
    cmpwi        $taillen,0
    beq        Lxts_enc6x_done

    add        $inp,$inp,$taillen
    subi        $inp,$inp,16
    lvx_u        $in0,0,$inp
    lvsr        $inpperm,0,$taillen    # $in5 is no more
    le?vperm    $in0,$in0,$in0,$leperm
    vperm        $in0,$in0,$in0,$inpperm
    vxor        $tmp,$tmp,$twk0
Lxts_enc6x_steal:
    vxor        $in0,$in0,$twk0
    vxor        $out0,$out0,$out0
    vspltisb    $out1,-1
    vperm        $out0,$out0,$out1,$inpperm
    vsel        $out0,$in0,$tmp,$out0    # $tmp is last block, remember?

    subi        r30,$out,17
    subi        $out,$out,16
    mtctr        $taillen
Loop_xts_enc6x_steal:
    lbzu        r0,1(r30)
    stb        r0,16(r30)
    bdnz        Loop_xts_enc6x_steal

    li        $taillen,0
    mtctr        $rounds
    b        Loop_xts_enc1x        # one more time...

.align    4
Lxts_enc6x_done:
    ${UCMP}i    $ivp,0
    beq        Lxts_enc6x_ret

    vxor        $tweak,$twk0,$rndkey0
    le?vperm    $tweak,$tweak,$tweak,$leperm
    stvx_u        $tweak,0,$ivp

Lxts_enc6x_ret:
    mtlr        r11
    li        r10,`$FRAME+15`
    li        r11,`$FRAME+31`
    stvx        $seven,r10,$sp        # wipe copies of round keys
    addi        r10,r10,32
    stvx        $seven,r11,$sp
    addi        r11,r11,32
    stvx        $seven,r10,$sp
    addi        r10,r10,32
    stvx        $seven,r11,$sp
    addi        r11,r11,32
    stvx        $seven,r10,$sp
    addi        r10,r10,32
    stvx        $seven,r11,$sp
    addi        r11,r11,32
    stvx        $seven,r10,$sp
    addi        r10,r10,32
    stvx        $seven,r11,$sp
    addi        r11,r11,32

    mtspr        256,$vrsave
    lvx        v20,r10,$sp        # ABI says so
    addi        r10,r10,32
    lvx        v21,r11,$sp
    addi        r11,r11,32
    lvx        v22,r10,$sp
    addi        r10,r10,32
    lvx        v23,r11,$sp
    addi        r11,r11,32
    lvx        v24,r10,$sp
    addi        r10,r10,32
    lvx        v25,r11,$sp
    addi        r11,r11,32
    lvx        v26,r10,$sp
    addi        r10,r10,32
    lvx        v27,r11,$sp
    addi        r11,r11,32
    lvx        v28,r10,$sp
    addi        r10,r10,32
    lvx        v29,r11,$sp
    addi        r11,r11,32
    lvx        v30,r10,$sp
    lvx        v31,r11,$sp
    $POP        r26,`$FRAME+21*16+0*$SIZE_T`($sp)
    $POP        r27,`$FRAME+21*16+1*$SIZE_T`($sp)
    $POP        r28,`$FRAME+21*16+2*$SIZE_T`($sp)
    $POP        r29,`$FRAME+21*16+3*$SIZE_T`($sp)
    $POP        r30,`$FRAME+21*16+4*$SIZE_T`($sp)
    $POP        r31,`$FRAME+21*16+5*$SIZE_T`($sp)
    addi        $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
    blr
    .long        0
    .byte        0,12,0x04,1,0x80,6,6,0
    .long        0

.align    5
_aesp8_xts_enc5x:
    vcipher        $out0,$out0,v24
    vcipher        $out1,$out1,v24
    vcipher        $out2,$out2,v24
    vcipher        $out3,$out3,v24
    vcipher        $out4,$out4,v24
    lvx        v24,$x20,$key_        # round[3]
    addi        $key_,$key_,0x20

    vcipher        $out0,$out0,v25
    vcipher        $out1,$out1,v25
    vcipher        $out2,$out2,v25
    vcipher        $out3,$out3,v25
    vcipher        $out4,$out4,v25
    lvx        v25,$x10,$key_        # round[4]
    bdnz        _aesp8_xts_enc5x

    add        $inp,$inp,$taillen
    cmpwi        $taillen,0
    vcipher        $out0,$out0,v24
    vcipher        $out1,$out1,v24
    vcipher        $out2,$out2,v24
    vcipher        $out3,$out3,v24
    vcipher        $out4,$out4,v24

    subi        $inp,$inp,16
    vcipher        $out0,$out0,v25
    vcipher        $out1,$out1,v25
    vcipher        $out2,$out2,v25
    vcipher        $out3,$out3,v25
    vcipher        $out4,$out4,v25
     vxor        $twk0,$twk0,v31

    vcipher        $out0,$out0,v26
    lvsr        $inpperm,r0,$taillen    # $in5 is no more
    vcipher        $out1,$out1,v26
    vcipher        $out2,$out2,v26
    vcipher        $out3,$out3,v26
    vcipher        $out4,$out4,v26
     vxor        $in1,$twk1,v31

    vcipher        $out0,$out0,v27
    lvx_u        $in0,0,$inp
    vcipher        $out1,$out1,v27
    vcipher        $out2,$out2,v27
    vcipher        $out3,$out3,v27
    vcipher        $out4,$out4,v27
     vxor        $in2,$twk2,v31

    addi        $key_,$sp,$FRAME+15    # rewind $key_
    vcipher        $out0,$out0,v28
    vcipher        $out1,$out1,v28
    vcipher        $out2,$out2,v28
    vcipher        $out3,$out3,v28
    vcipher        $out4,$out4,v28
    lvx        v24,$x00,$key_        # re-pre-load round[1]
     vxor        $in3,$twk3,v31

    vcipher        $out0,$out0,v29
    le?vperm    $in0,$in0,$in0,$leperm
    vcipher        $out1,$out1,v29
    vcipher        $out2,$out2,v29
    vcipher        $out3,$out3,v29
    vcipher        $out4,$out4,v29
    lvx        v25,$x10,$key_        # re-pre-load round[2]
     vxor        $in4,$twk4,v31

    vcipher        $out0,$out0,v30
    vperm        $in0,$in0,$in0,$inpperm
    vcipher        $out1,$out1,v30
    vcipher        $out2,$out2,v30
    vcipher        $out3,$out3,v30
    vcipher        $out4,$out4,v30

    vcipherlast    $out0,$out0,$twk0
    vcipherlast    $out1,$out1,$in1
    vcipherlast    $out2,$out2,$in2
    vcipherlast    $out3,$out3,$in3
    vcipherlast    $out4,$out4,$in4
    blr
        .long       0
        .byte       0,12,0x14,0,0,0,0,0

.align    5
_aesp8_xts_decrypt6x:
    $STU        $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
    mflr        r11
    li        r7,`$FRAME+8*16+15`
    li        r3,`$FRAME+8*16+31`
    $PUSH        r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
    stvx        v20,r7,$sp        # ABI says so
    addi        r7,r7,32
    stvx        v21,r3,$sp
    addi        r3,r3,32
    stvx        v22,r7,$sp
    addi        r7,r7,32
    stvx        v23,r3,$sp
    addi        r3,r3,32
    stvx        v24,r7,$sp
    addi        r7,r7,32
    stvx        v25,r3,$sp
    addi        r3,r3,32
    stvx        v26,r7,$sp
    addi        r7,r7,32
    stvx        v27,r3,$sp
    addi        r3,r3,32
    stvx        v28,r7,$sp
    addi        r7,r7,32
    stvx        v29,r3,$sp
    addi        r3,r3,32
    stvx        v30,r7,$sp
    stvx        v31,r3,$sp
    li        r0,-1
    stw        $vrsave,`$FRAME+21*16-4`($sp)    # save vrsave
    li        $x10,0x10
    $PUSH        r26,`$FRAME+21*16+0*$SIZE_T`($sp)
    li        $x20,0x20
    $PUSH        r27,`$FRAME+21*16+1*$SIZE_T`($sp)
    li        $x30,0x30
    $PUSH        r28,`$FRAME+21*16+2*$SIZE_T`($sp)
    li        $x40,0x40
    $PUSH        r29,`$FRAME+21*16+3*$SIZE_T`($sp)
    li        $x50,0x50
    $PUSH        r30,`$FRAME+21*16+4*$SIZE_T`($sp)
    li        $x60,0x60
    $PUSH        r31,`$FRAME+21*16+5*$SIZE_T`($sp)
    li        $x70,0x70
    mtspr        256,r0

    xxlor        2, 32+$eighty7, 32+$eighty7
    vsldoi        $eighty7,$tmp,$eighty7,1        # 0x010101..87
    xxlor        1, 32+$eighty7, 32+$eighty7

    # Load XOR Lconsts.
    mr        $x70, r6
    bl        Lconsts
    lxvw4x        0, $x40, r6        # load XOR contents
    mr        r6, $x70
    li        $x70,0x70

    subi        $rounds,$rounds,3    # -4 in total

    lvx        $rndkey0,$x00,$key1    # load key schedule
    lvx        v30,$x10,$key1
    addi        $key1,$key1,0x20
    lvx        v31,$x00,$key1
    ?vperm        $rndkey0,$rndkey0,v30,$keyperm
    addi        $key_,$sp,$FRAME+15
    mtctr        $rounds

Load_xts_dec_key:
    ?vperm        v24,v30,v31,$keyperm
    lvx        v30,$x10,$key1
    addi        $key1,$key1,0x20
    stvx        v24,$x00,$key_        # off-load round[1]
    ?vperm        v25,v31,v30,$keyperm
    lvx        v31,$x00,$key1
    stvx        v25,$x10,$key_        # off-load round[2]
    addi        $key_,$key_,0x20
    bdnz        Load_xts_dec_key

    lvx        v26,$x10,$key1
    ?vperm        v24,v30,v31,$keyperm
    lvx        v27,$x20,$key1
    stvx        v24,$x00,$key_        # off-load round[3]
    ?vperm        v25,v31,v26,$keyperm
    lvx        v28,$x30,$key1
    stvx        v25,$x10,$key_        # off-load round[4]
    addi        $key_,$sp,$FRAME+15    # rewind $key_
    ?vperm        v26,v26,v27,$keyperm
    lvx        v29,$x40,$key1
    ?vperm        v27,v27,v28,$keyperm
    lvx        v30,$x50,$key1
    ?vperm        v28,v28,v29,$keyperm
    lvx        v31,$x60,$key1
    ?vperm        v29,v29,v30,$keyperm
    lvx        $twk5,$x70,$key1    # borrow $twk5
    ?vperm        v30,v30,v31,$keyperm
    lvx        v24,$x00,$key_        # pre-load round[1]
    ?vperm        v31,v31,$twk5,$keyperm
    lvx        v25,$x10,$key_        # pre-load round[2]

     vperm        $in0,$inout,$inptail,$inpperm
     subi        $inp,$inp,31        # undo "caller"
    vxor        $twk0,$tweak,$rndkey0
    vsrab        $tmp,$tweak,$seven    # next tweak value
    vaddubm        $tweak,$tweak,$tweak
    vand        $tmp,$tmp,$eighty7
     vxor        $out0,$in0,$twk0
    xxlor        32+$in1, 0, 0
    vpermxor    $tweak, $tweak, $tmp, $in1

     lvx_u        $in1,$x10,$inp
    vxor        $twk1,$tweak,$rndkey0
    vsrab        $tmp,$tweak,$seven    # next tweak value
    vaddubm        $tweak,$tweak,$tweak
     le?vperm    $in1,$in1,$in1,$leperm
    vand        $tmp,$tmp,$eighty7
     vxor        $out1,$in1,$twk1
    xxlor        32+$in2, 0, 0
    vpermxor    $tweak, $tweak, $tmp, $in2

     lvx_u        $in2,$x20,$inp
     andi.        $taillen,$len,15
    vxor        $twk2,$tweak,$rndkey0
    vsrab        $tmp,$tweak,$seven    # next tweak value
    vaddubm        $tweak,$tweak,$tweak
     le?vperm    $in2,$in2,$in2,$leperm
    vand        $tmp,$tmp,$eighty7
     vxor        $out2,$in2,$twk2
    xxlor        32+$in3, 0, 0
    vpermxor    $tweak, $tweak, $tmp, $in3

     lvx_u        $in3,$x30,$inp
     sub        $len,$len,$taillen
    vxor        $twk3,$tweak,$rndkey0
    vsrab        $tmp,$tweak,$seven    # next tweak value
    vaddubm        $tweak,$tweak,$tweak
     le?vperm    $in3,$in3,$in3,$leperm
    vand        $tmp,$tmp,$eighty7
     vxor        $out3,$in3,$twk3
    xxlor        32+$in4, 0, 0
    vpermxor    $tweak, $tweak, $tmp, $in4

     lvx_u        $in4,$x40,$inp
     subi        $len,$len,0x60
    vxor        $twk4,$tweak,$rndkey0
    vsrab        $tmp,$tweak,$seven    # next tweak value
    vaddubm        $tweak,$tweak,$tweak
     le?vperm    $in4,$in4,$in4,$leperm
    vand        $tmp,$tmp,$eighty7
     vxor        $out4,$in4,$twk4
    xxlor        32+$in5, 0, 0
    vpermxor    $tweak, $tweak, $tmp, $in5

     lvx_u        $in5,$x50,$inp
     addi        $inp,$inp,0x60
    vxor        $twk5,$tweak,$rndkey0
    vsrab        $tmp,$tweak,$seven    # next tweak value
    vaddubm        $tweak,$tweak,$tweak
     le?vperm    $in5,$in5,$in5,$leperm
    vand        $tmp,$tmp,$eighty7
     vxor        $out5,$in5,$twk5
    xxlor        32+$in0, 0, 0
    vpermxor    $tweak, $tweak, $tmp, $in0

    vxor        v31,v31,$rndkey0
    mtctr        $rounds
    b        Loop_xts_dec6x

.align    5
Loop_xts_dec6x:
    vncipher    $out0,$out0,v24
    vncipher    $out1,$out1,v24
    vncipher    $out2,$out2,v24
    vncipher    $out3,$out3,v24
    vncipher    $out4,$out4,v24
    vncipher    $out5,$out5,v24
    lvx        v24,$x20,$key_        # round[3]
    addi        $key_,$key_,0x20

    vncipher    $out0,$out0,v25
    vncipher    $out1,$out1,v25
    vncipher    $out2,$out2,v25
    vncipher    $out3,$out3,v25
    vncipher    $out4,$out4,v25
    vncipher    $out5,$out5,v25
    lvx        v25,$x10,$key_        # round[4]
    bdnz        Loop_xts_dec6x

    xxlor        32+$eighty7, 1, 1    # 0x010101..87

    subic        $len,$len,96        # $len-=96
     vxor        $in0,$twk0,v31        # xor with last round key
    vncipher    $out0,$out0,v24
    vncipher    $out1,$out1,v24
     vsrab        $tmp,$tweak,$seven    # next tweak value
     vxor        $twk0,$tweak,$rndkey0
     vaddubm    $tweak,$tweak,$tweak
    vncipher    $out2,$out2,v24
    vncipher    $out3,$out3,v24
    vncipher    $out4,$out4,v24
    vncipher    $out5,$out5,v24

    subfe.        r0,r0,r0        # borrow?-1:0
     vand        $tmp,$tmp,$eighty7
    vncipher    $out0,$out0,v25
    vncipher    $out1,$out1,v25
     xxlor        32+$in1, 0, 0
     vpermxor    $tweak, $tweak, $tmp, $in1
    vncipher    $out2,$out2,v25
    vncipher    $out3,$out3,v25
     vxor        $in1,$twk1,v31
     vsrab        $tmp,$tweak,$seven    # next tweak value
     vxor        $twk1,$tweak,$rndkey0
    vncipher    $out4,$out4,v25
    vncipher    $out5,$out5,v25

    and        r0,r0,$len
     vaddubm    $tweak,$tweak,$tweak
    vncipher    $out0,$out0,v26
    vncipher    $out1,$out1,v26
     vand        $tmp,$tmp,$eighty7
    vncipher    $out2,$out2,v26
    vncipher    $out3,$out3,v26
     xxlor        32+$in2, 0, 0
     vpermxor    $tweak, $tweak, $tmp, $in2
    vncipher    $out4,$out4,v26
    vncipher    $out5,$out5,v26

    add        $inp,$inp,r0        # $inp is adjusted in such
                        # way that at exit from the
                        # loop inX-in5 are loaded
                        # with last "words"
     vxor        $in2,$twk2,v31
     vsrab        $tmp,$tweak,$seven    # next tweak value
     vxor        $twk2,$tweak,$rndkey0
     vaddubm    $tweak,$tweak,$tweak
    vncipher    $out0,$out0,v27
    vncipher    $out1,$out1,v27
    vncipher    $out2,$out2,v27
    vncipher    $out3,$out3,v27
     vand        $tmp,$tmp,$eighty7
    vncipher    $out4,$out4,v27
    vncipher    $out5,$out5,v27

    addi        $key_,$sp,$FRAME+15    # rewind $key_
     xxlor        32+$in3, 0, 0
     vpermxor    $tweak, $tweak, $tmp, $in3
    vncipher    $out0,$out0,v28
    vncipher    $out1,$out1,v28
     vxor        $in3,$twk3,v31
     vsrab        $tmp,$tweak,$seven    # next tweak value
     vxor        $twk3,$tweak,$rndkey0
    vncipher    $out2,$out2,v28
    vncipher    $out3,$out3,v28
     vaddubm    $tweak,$tweak,$tweak
    vncipher    $out4,$out4,v28
    vncipher    $out5,$out5,v28
    lvx        v24,$x00,$key_        # re-pre-load round[1]
     vand        $tmp,$tmp,$eighty7

    vncipher    $out0,$out0,v29
    vncipher    $out1,$out1,v29
     xxlor        32+$in4, 0, 0
     vpermxor    $tweak, $tweak, $tmp, $in4
    vncipher    $out2,$out2,v29
    vncipher    $out3,$out3,v29
     vxor        $in4,$twk4,v31
     vsrab        $tmp,$tweak,$seven    # next tweak value
     vxor        $twk4,$tweak,$rndkey0
    vncipher    $out4,$out4,v29
    vncipher    $out5,$out5,v29
    lvx        v25,$x10,$key_        # re-pre-load round[2]
     vaddubm    $tweak,$tweak,$tweak

    vncipher    $out0,$out0,v30
    vncipher    $out1,$out1,v30
     vand        $tmp,$tmp,$eighty7
    vncipher    $out2,$out2,v30
    vncipher    $out3,$out3,v30
     xxlor        32+$in5, 0, 0
     vpermxor    $tweak, $tweak, $tmp, $in5
    vncipher    $out4,$out4,v30
    vncipher    $out5,$out5,v30
     vxor        $in5,$twk5,v31
     vsrab        $tmp,$tweak,$seven    # next tweak value
     vxor        $twk5,$tweak,$rndkey0

    vncipherlast    $out0,$out0,$in0
     lvx_u        $in0,$x00,$inp        # load next input block
     vaddubm    $tweak,$tweak,$tweak
    vncipherlast    $out1,$out1,$in1
     lvx_u        $in1,$x10,$inp
    vncipherlast    $out2,$out2,$in2
     le?vperm    $in0,$in0,$in0,$leperm
     lvx_u        $in2,$x20,$inp
     vand        $tmp,$tmp,$eighty7
    vncipherlast    $out3,$out3,$in3
     le?vperm    $in1,$in1,$in1,$leperm
     lvx_u        $in3,$x30,$inp
    vncipherlast    $out4,$out4,$in4
     le?vperm    $in2,$in2,$in2,$leperm
     lvx_u        $in4,$x40,$inp
     xxlor        10, 32+$in0, 32+$in0
     xxlor        32+$in0, 0, 0
     vpermxor    $tweak, $tweak, $tmp, $in0
     xxlor        32+$in0, 10, 10
    vncipherlast    $out5,$out5,$in5
     le?vperm    $in3,$in3,$in3,$leperm
     lvx_u        $in5,$x50,$inp
     addi        $inp,$inp,0x60
     le?vperm    $in4,$in4,$in4,$leperm
     le?vperm    $in5,$in5,$in5,$leperm

    le?vperm    $out0,$out0,$out0,$leperm
    le?vperm    $out1,$out1,$out1,$leperm
    stvx_u        $out0,$x00,$out        # store output
     vxor        $out0,$in0,$twk0
    le?vperm    $out2,$out2,$out2,$leperm
    stvx_u        $out1,$x10,$out
     vxor        $out1,$in1,$twk1
    le?vperm    $out3,$out3,$out3,$leperm
    stvx_u        $out2,$x20,$out
     vxor        $out2,$in2,$twk2
    le?vperm    $out4,$out4,$out4,$leperm
    stvx_u        $out3,$x30,$out
     vxor        $out3,$in3,$twk3
    le?vperm    $out5,$out5,$out5,$leperm
    stvx_u        $out4,$x40,$out
     vxor        $out4,$in4,$twk4
    stvx_u        $out5,$x50,$out
     vxor        $out5,$in5,$twk5
    addi        $out,$out,0x60

    mtctr        $rounds
    beq        Loop_xts_dec6x        # did $len-=96 borrow?

    xxlor        32+$eighty7, 2, 2    # 0x010101..87

    addic.        $len,$len,0x60
    beq        Lxts_dec6x_zero
    cmpwi        $len,0x20
    blt        Lxts_dec6x_one
    nop
    beq        Lxts_dec6x_two
    cmpwi        $len,0x40
    blt        Lxts_dec6x_three
    nop
    beq        Lxts_dec6x_four

Lxts_dec6x_five:
    vxor        $out0,$in1,$twk0
    vxor        $out1,$in2,$twk1
    vxor        $out2,$in3,$twk2
    vxor        $out3,$in4,$twk3
    vxor        $out4,$in5,$twk4

    bl        _aesp8_xts_dec5x

    le?vperm    $out0,$out0,$out0,$leperm
    vmr        $twk0,$twk5        # unused tweak
    vxor        $twk1,$tweak,$rndkey0
    le?vperm    $out1,$out1,$out1,$leperm
    stvx_u        $out0,$x00,$out        # store output
    vxor        $out0,$in0,$twk1
    le?vperm    $out2,$out2,$out2,$leperm
    stvx_u        $out1,$x10,$out
    le?vperm    $out3,$out3,$out3,$leperm
    stvx_u        $out2,$x20,$out
    le?vperm    $out4,$out4,$out4,$leperm
    stvx_u        $out3,$x30,$out
    stvx_u        $out4,$x40,$out
    addi        $out,$out,0x50
    bne        Lxts_dec6x_steal
    b        Lxts_dec6x_done

.align    4
Lxts_dec6x_four:
    vxor        $out0,$in2,$twk0
    vxor        $out1,$in3,$twk1
    vxor        $out2,$in4,$twk2
    vxor        $out3,$in5,$twk3
    vxor        $out4,$out4,$out4

    bl        _aesp8_xts_dec5x

    le?vperm    $out0,$out0,$out0,$leperm
    vmr        $twk0,$twk4        # unused tweak
    vmr        $twk1,$twk5
    le?vperm    $out1,$out1,$out1,$leperm
    stvx_u        $out0,$x00,$out        # store output
    vxor        $out0,$in0,$twk5
    le?vperm    $out2,$out2,$out2,$leperm
    stvx_u        $out1,$x10,$out
    le?vperm    $out3,$out3,$out3,$leperm
    stvx_u        $out2,$x20,$out
    stvx_u        $out3,$x30,$out
    addi        $out,$out,0x40
    bne        Lxts_dec6x_steal
    b        Lxts_dec6x_done

.align    4
Lxts_dec6x_three:
    vxor        $out0,$in3,$twk0
    vxor        $out1,$in4,$twk1
    vxor        $out2,$in5,$twk2
    vxor        $out3,$out3,$out3
    vxor        $out4,$out4,$out4

    bl        _aesp8_xts_dec5x

    le?vperm    $out0,$out0,$out0,$leperm
    vmr        $twk0,$twk3        # unused tweak
    vmr        $twk1,$twk4
    le?vperm    $out1,$out1,$out1,$leperm
    stvx_u        $out0,$x00,$out        # store output
    vxor        $out0,$in0,$twk4
    le?vperm    $out2,$out2,$out2,$leperm
    stvx_u        $out1,$x10,$out
    stvx_u        $out2,$x20,$out
    addi        $out,$out,0x30
    bne        Lxts_dec6x_steal
    b        Lxts_dec6x_done

.align    4
Lxts_dec6x_two:
    vxor        $out0,$in4,$twk0
    vxor        $out1,$in5,$twk1
    vxor        $out2,$out2,$out2
    vxor        $out3,$out3,$out3
    vxor        $out4,$out4,$out4

    bl        _aesp8_xts_dec5x

    le?vperm    $out0,$out0,$out0,$leperm
    vmr        $twk0,$twk2        # unused tweak
    vmr        $twk1,$twk3
    le?vperm    $out1,$out1,$out1,$leperm
    stvx_u        $out0,$x00,$out        # store output
    vxor        $out0,$in0,$twk3
    stvx_u        $out1,$x10,$out
    addi        $out,$out,0x20
    bne        Lxts_dec6x_steal
    b        Lxts_dec6x_done

.align    4
Lxts_dec6x_one:
    vxor        $out0,$in5,$twk0
    nop
Loop_xts_dec1x:
    vncipher    $out0,$out0,v24
    lvx        v24,$x20,$key_        # round[3]
    addi        $key_,$key_,0x20

    vncipher    $out0,$out0,v25
    lvx        v25,$x10,$key_        # round[4]
    bdnz        Loop_xts_dec1x

    subi        r0,$taillen,1
    vncipher    $out0,$out0,v24

    andi.        r0,r0,16
    cmpwi        $taillen,0
    vncipher    $out0,$out0,v25

    sub        $inp,$inp,r0
    vncipher    $out0,$out0,v26

    lvx_u        $in0,0,$inp
    vncipher    $out0,$out0,v27

    addi        $key_,$sp,$FRAME+15    # rewind $key_
    vncipher    $out0,$out0,v28
    lvx        v24,$x00,$key_        # re-pre-load round[1]

    vncipher    $out0,$out0,v29
    lvx        v25,$x10,$key_        # re-pre-load round[2]
     vxor        $twk0,$twk0,v31

    le?vperm    $in0,$in0,$in0,$leperm
    vncipher    $out0,$out0,v30

    mtctr        $rounds
    vncipherlast    $out0,$out0,$twk0

    vmr        $twk0,$twk1        # unused tweak
    vmr        $twk1,$twk2
    le?vperm    $out0,$out0,$out0,$leperm
    stvx_u        $out0,$x00,$out        # store output
    addi        $out,$out,0x10
    vxor        $out0,$in0,$twk2
    bne        Lxts_dec6x_steal
    b        Lxts_dec6x_done

.align    4
Lxts_dec6x_zero:
    cmpwi        $taillen,0
    beq        Lxts_dec6x_done

    lvx_u        $in0,0,$inp
    le?vperm    $in0,$in0,$in0,$leperm
    vxor        $out0,$in0,$twk1
Lxts_dec6x_steal:
    vncipher    $out0,$out0,v24
    lvx        v24,$x20,$key_        # round[3]
    addi        $key_,$key_,0x20

    vncipher    $out0,$out0,v25
    lvx        v25,$x10,$key_        # round[4]
    bdnz        Lxts_dec6x_steal

    add        $inp,$inp,$taillen
    vncipher    $out0,$out0,v24

    cmpwi        $taillen,0
    vncipher    $out0,$out0,v25

    lvx_u        $in0,0,$inp
    vncipher    $out0,$out0,v26

    lvsr        $inpperm,0,$taillen    # $in5 is no more
    vncipher    $out0,$out0,v27

    addi        $key_,$sp,$FRAME+15    # rewind $key_
    vncipher    $out0,$out0,v28
    lvx        v24,$x00,$key_        # re-pre-load round[1]

    vncipher    $out0,$out0,v29
    lvx        v25,$x10,$key_        # re-pre-load round[2]
     vxor        $twk1,$twk1,v31

    le?vperm    $in0,$in0,$in0,$leperm
    vncipher    $out0,$out0,v30

    vperm        $in0,$in0,$in0,$inpperm
    vncipherlast    $tmp,$out0,$twk1

    le?vperm    $out0,$tmp,$tmp,$leperm
    le?stvx_u    $out0,0,$out
    be?stvx_u    $tmp,0,$out

    vxor        $out0,$out0,$out0
    vspltisb    $out1,-1
    vperm        $out0,$out0,$out1,$inpperm
    vsel        $out0,$in0,$tmp,$out0
    vxor        $out0,$out0,$twk0

    subi        r30,$out,1
    mtctr        $taillen
Loop_xts_dec6x_steal:
    lbzu        r0,1(r30)
    stb        r0,16(r30)
    bdnz        Loop_xts_dec6x_steal

    li        $taillen,0
    mtctr        $rounds
    b        Loop_xts_dec1x        # one more time...

.align    4
Lxts_dec6x_done:
    ${UCMP}i    $ivp,0
    beq        Lxts_dec6x_ret

    vxor        $tweak,$twk0,$rndkey0
    le?vperm    $tweak,$tweak,$tweak,$leperm
    stvx_u        $tweak,0,$ivp

Lxts_dec6x_ret:
    mtlr        r11
    li        r10,`$FRAME+15`
    li        r11,`$FRAME+31`
    stvx        $seven,r10,$sp        # wipe copies of round keys
    addi        r10,r10,32
    stvx        $seven,r11,$sp
    addi        r11,r11,32
    stvx        $seven,r10,$sp
    addi        r10,r10,32
    stvx        $seven,r11,$sp
    addi        r11,r11,32
    stvx        $seven,r10,$sp
    addi        r10,r10,32
    stvx        $seven,r11,$sp
    addi        r11,r11,32
    stvx        $seven,r10,$sp
    addi        r10,r10,32
    stvx        $seven,r11,$sp
    addi        r11,r11,32

    mtspr        256,$vrsave
    lvx        v20,r10,$sp        # ABI says so
    addi        r10,r10,32
    lvx        v21,r11,$sp
    addi        r11,r11,32
    lvx        v22,r10,$sp
    addi        r10,r10,32
    lvx        v23,r11,$sp
    addi        r11,r11,32
    lvx        v24,r10,$sp
    addi        r10,r10,32
    lvx        v25,r11,$sp
    addi        r11,r11,32
    lvx        v26,r10,$sp
    addi        r10,r10,32
    lvx        v27,r11,$sp
    addi        r11,r11,32
    lvx        v28,r10,$sp
    addi        r10,r10,32
    lvx        v29,r11,$sp
    addi        r11,r11,32
    lvx        v30,r10,$sp
    lvx        v31,r11,$sp
    $POP        r26,`$FRAME+21*16+0*$SIZE_T`($sp)
    $POP        r27,`$FRAME+21*16+1*$SIZE_T`($sp)
    $POP        r28,`$FRAME+21*16+2*$SIZE_T`($sp)
    $POP        r29,`$FRAME+21*16+3*$SIZE_T`($sp)
    $POP        r30,`$FRAME+21*16+4*$SIZE_T`($sp)
    $POP        r31,`$FRAME+21*16+5*$SIZE_T`($sp)
    addi        $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
    blr
    .long        0
    .byte        0,12,0x04,1,0x80,6,6,0
    .long        0

.align    5
_aesp8_xts_dec5x:
    vncipher    $out0,$out0,v24
    vncipher    $out1,$out1,v24
    vncipher    $out2,$out2,v24
    vncipher    $out3,$out3,v24
    vncipher    $out4,$out4,v24
    lvx        v24,$x20,$key_        # round[3]
    addi        $key_,$key_,0x20

    vncipher    $out0,$out0,v25
    vncipher    $out1,$out1,v25
    vncipher    $out2,$out2,v25
    vncipher    $out3,$out3,v25
    vncipher    $out4,$out4,v25
    lvx        v25,$x10,$key_        # round[4]
    bdnz        _aesp8_xts_dec5x

    subi        r0,$taillen,1
    vncipher    $out0,$out0,v24
    vncipher    $out1,$out1,v24
    vncipher    $out2,$out2,v24
    vncipher    $out3,$out3,v24
    vncipher    $out4,$out4,v24

    andi.        r0,r0,16
    cmpwi        $taillen,0
    vncipher    $out0,$out0,v25
    vncipher    $out1,$out1,v25
    vncipher    $out2,$out2,v25
    vncipher    $out3,$out3,v25
    vncipher    $out4,$out4,v25
     vxor        $twk0,$twk0,v31

    sub        $inp,$inp,r0
    vncipher    $out0,$out0,v26
    vncipher    $out1,$out1,v26
    vncipher    $out2,$out2,v26
    vncipher    $out3,$out3,v26
    vncipher    $out4,$out4,v26
     vxor        $in1,$twk1,v31

    vncipher    $out0,$out0,v27
    lvx_u        $in0,0,$inp
    vncipher    $out1,$out1,v27
    vncipher    $out2,$out2,v27
    vncipher    $out3,$out3,v27
    vncipher    $out4,$out4,v27
     vxor        $in2,$twk2,v31

    addi        $key_,$sp,$FRAME+15    # rewind $key_
    vncipher    $out0,$out0,v28
    vncipher    $out1,$out1,v28
    vncipher    $out2,$out2,v28
    vncipher    $out3,$out3,v28
    vncipher    $out4,$out4,v28
    lvx        v24,$x00,$key_        # re-pre-load round[1]
     vxor        $in3,$twk3,v31

    vncipher    $out0,$out0,v29
    le?vperm    $in0,$in0,$in0,$leperm
    vncipher    $out1,$out1,v29
    vncipher    $out2,$out2,v29
    vncipher    $out3,$out3,v29
    vncipher    $out4,$out4,v29
    lvx        v25,$x10,$key_        # re-pre-load round[2]
     vxor        $in4,$twk4,v31

    vncipher    $out0,$out0,v30
    vncipher    $out1,$out1,v30
    vncipher    $out2,$out2,v30
    vncipher    $out3,$out3,v30
    vncipher    $out4,$out4,v30

    vncipherlast    $out0,$out0,$twk0
    vncipherlast    $out1,$out1,$in1
    vncipherlast    $out2,$out2,$in2
    vncipherlast    $out3,$out3,$in3
    vncipherlast    $out4,$out4,$in4
    mtctr        $rounds
    blr
        .long       0
        .byte       0,12,0x14,0,0,0,0,0
___
}}    }}}

my $consts=1;
foreach(split("\n",$code)) {
        s/\`([^\`]*)\`/eval($1)/geo;

    # constants table endian-specific conversion
    if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
        my $conv=$3;
        my @bytes=();

        # convert to endian-agnostic format
        if ($1 eq "long") {
          foreach (split(/,\s*/,$2)) {
        my $l = /^0/?oct:int;
        push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
          }
        } else {
        @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
        }

        # little-endian conversion
        if ($flavour =~ /le$/o) {
        SWITCH: for($conv)  {
            /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
            /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
        }
        }

        #emit
        print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
        next;
    }
    $consts=0 if (m/Lconsts:/o);    # end of table

    # instructions prefixed with '?' are endian-specific and need
    # to be adjusted accordingly...
    if ($flavour =~ /le$/o) {    # little-endian
        s/le\?//o        or
        s/be\?/#be#/o    or
        s/\?lvsr/lvsl/o    or
        s/\?lvsl/lvsr/o    or
        s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
        s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
        s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
    } else {            # big-endian
        s/le\?/#le#/o    or
        s/be\?//o        or
        s/\?([a-z]+)/$1/o;
    }

        print $_,"\n";
}

close STDOUT;

:: Command execute ::

Enter:
 
Select:
 

:: Search ::
  - regexp 

:: Upload ::
 
[ Read-Only ]

:: Make Dir ::
 
[ Read-Only ]
:: Make File ::
 
[ Read-Only ]

:: Go Dir ::
 
:: Go File ::
 

--[ c99shell v. 2.0 [PHP 7 Update] [25.02.2019] maintained by HackingTool | HackingTool | Generation time: 0.008 ]--