# This code is taken from the OpenSSL project but the author (Andy Polyakov) # has relicensed it under the GPLv2. Therefore this program is free software; # you can redistribute it and/or modify it under the terms of the GNU General # Public License version 2 as published by the Free Software Foundation. # # The original headers, including the original license headers, are # included below for completeness.
# ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see https://www.openssl.org/~appro/cryptogams/. # ==================================================================== # # GHASH for PowerISA v2.07. # # July 2014 # # Accurate performance measurements are problematic, because it's # always virtualized setup with possibly throttled processor. # Relative comparison is therefore more informative. This initial # version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x # faster than "4-bit" integer-only compiler-generated 64-bit code. # "Initial version" means that there is room for futher improvement.
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
die "can't locate ppc-xlate.pl";
open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
my $vrsave="r12";
$code=<<___;
.machine "any"
.text
.globl .gcm_init_p8
lis r0,0xfff0
li r8,0x10
mfspr $vrsave,256
li r9,0x20
mtspr 256,r0
li r10,0x30
lvx_u $H,0,r4 # load H
le?xor r7,r7,r7
le?addi r7,r7,0x8 # need a vperm start with 08
le?lvsr 5,0,r7
le?vspltisb 6,0x0f
le?vxor 5,5,6 # set a b-endian mask
le?vperm $H,$H,$H,5
vspltisb $xC2,-16 # 0xf0
vspltisb $t0,1 # one
vaddubm $xC2,$xC2,$xC2 # 0xe0
vxor $zero,$zero,$zero
vor $xC2,$xC2,$t0 # 0xe1
vsldoi $xC2,$xC2,$zero,15 # 0xe1...
vsldoi $t1,$zero,$t0,1 # ...1
vaddubm $xC2,$xC2,$xC2 # 0xc2...
vspltisb $t2,7
vor $xC2,$xC2,$t1 # 0xc2....01
vspltb $t1,$H,0 # most significant byte
vsl $H,$H,$t0 # H<<=1
vsrab $t1,$t1,$t2 # broadcast carry bit
vand $t1,$t1,$xC2
vxor $H,$H,$t1 # twisted H
vsldoi $H,$H,$H,8 # twist even more ...
vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
vsldoi $Hl,$zero,$H,8 # ... and split
vsldoi $Hh,$H,$zero,8
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.