/*
* Written by Solar Designer <solar at openwall.com> in 1998-2010. * No copyright is claimed, and the software is hereby placed in the public * domain. In case this attempt to disclaim copyright and place the software * in the public domain is deemed null and void, then the software is * Copyright (c) 1998-2010 Solar Designer and it is hereby released to the * general public under the following terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted. * * There's ABSOLUTELY NO WARRANTY, express or implied. * * See crypt_blowfish.c for more information. */
ifdef i386
if defined(__OpenBSD__) && !defined(__ELF__) define UNDERSCORES define ALIGN_LOG endif
if defined(CYGWIN32) || defined(MINGW32) define UNDERSCORES endif
ifdef __DJGPP__ define UNDERSCORES define ALIGN_LOG endif
ifdef UNDERSCORES define _BF_body_r __BF_body_r endif
ifdef ALIGN_LOG define DO_ALIGN(log) .align (log) elif defined(DUMBAS) define DO_ALIGN(log) .align 1 << log else define DO_ALIGN(log) .align (1 << (log)) endif
define BF_FRAME 0x200 define ctx %esp
define BF_ptr (ctx)
define S(N, r) N+BF_FRAME(ctx,r,4) ifdef DUMBAS define P(N) 0x1000+N+N+N+N+BF_FRAME(ctx) else define P(N) 0x1000+4*N+BF_FRAME(ctx) endif
/*
* This version of the assembly code is optimized primarily for the original * Intel Pentium but is also careful to avoid partial register stalls on the * Pentium Pro family of processors (tested up to Pentium III Coppermine). * * It is possible to do 15% faster on the Pentium Pro family and probably on * many non-Intel x86 processors, but, unfortunately, that would make things * twice slower for the original Pentium. * * An additional 2% speedup may be achieved with non-reentrant code. */
define L %esi define R %edi define tmp1 %eax define tmp1_lo %al define tmp2 %ecx define tmp2_hi %ch define tmp3 %edx define tmp3_lo %dl define tmp4 %ebx define tmp4_hi %bh define tmp5 %ebp
.text
define BF_ROUND(L, R, N) \
xorl L,tmp2; \ xorl tmp1,tmp1; \ movl tmp2,L; \ shrl $16,tmp2; \ movl L,tmp4; \ movb tmp2_hi,tmp1_lo; \ andl $0xFF,tmp2; \ movb tmp4_hi,tmp3_lo; \ andl $0xFF,tmp4; \ movl S(0,tmp1),tmp1; \ movl S(0x400,tmp2),tmp5; \ addl tmp5,tmp1; \ movl S(0x800,tmp3),tmp5; \ xorl tmp5,tmp1; \ movl S(0xC00,tmp4),tmp5; \ addl tmp1,tmp5; \ movl 4+P(N),tmp2; \ xorl tmp5,R
define BF_ENCRYPT_START \
BF_ROUND(L, R, 0); \ BF_ROUND(R, L, 1); \ BF_ROUND(L, R, 2); \ BF_ROUND(R, L, 3); \ BF_ROUND(L, R, 4); \ BF_ROUND(R, L, 5); \ BF_ROUND(L, R, 6); \ BF_ROUND(R, L, 7); \ BF_ROUND(L, R, 8); \ BF_ROUND(R, L, 9); \ BF_ROUND(L, R, 10); \ BF_ROUND(R, L, 11); \ BF_ROUND(L, R, 12); \ BF_ROUND(R, L, 13); \ BF_ROUND(L, R, 14); \ BF_ROUND(R, L, 15); \ movl BF_ptr,tmp5; \ xorl L,tmp2; \ movl P(17),L
define BF_ENCRYPT_END \
xorl R,L; \ movl tmp2,R
DO_ALIGN(5) .globl _BF_body_r _BF_body_r:
movl 4(%esp),%eax pushl %ebp pushl %ebx pushl %esi pushl %edi subl $BF_FRAME-8,%eax xorl L,L cmpl %esp,%eax ja BF_die xchgl %eax,%esp xorl R,R pushl %eax leal 0x1000+BF_FRAME-4(ctx),%eax movl 0x1000+BF_FRAME-4(ctx),tmp2 pushl %eax xorl tmp3,tmp3
BF_loop_P:
BF_ENCRYPT_START addl $8,tmp5 BF_ENCRYPT_END leal 0x1000+18*4+BF_FRAME(ctx),tmp1 movl tmp5,BF_ptr cmpl tmp5,tmp1 movl L,-8(tmp5) movl R,-4(tmp5) movl P(0),tmp2 ja BF_loop_P leal BF_FRAME(ctx),tmp5 xorl tmp3,tmp3 movl tmp5,BF_ptr
BF_loop_S:
BF_ENCRYPT_START BF_ENCRYPT_END movl P(0),tmp2 movl L,(tmp5) movl R,4(tmp5) BF_ENCRYPT_START BF_ENCRYPT_END movl P(0),tmp2 movl L,8(tmp5) movl R,12(tmp5) BF_ENCRYPT_START BF_ENCRYPT_END movl P(0),tmp2 movl L,16(tmp5) movl R,20(tmp5) BF_ENCRYPT_START addl $32,tmp5 BF_ENCRYPT_END leal 0x1000+BF_FRAME(ctx),tmp1 movl tmp5,BF_ptr cmpl tmp5,tmp1 movl P(0),tmp2 movl L,-8(tmp5) movl R,-4(tmp5) ja BF_loop_S movl 4(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret
BF_die: /* Oops, need to re-compile with a larger BF_FRAME. */
hlt jmp BF_die
endif
if defined(__ELF__) && defined(__linux__) .section .note.GNU-stack,“”,%progbits endif