Replace the vector load from memory sequence with a simple instruction
sequence to compose the tweak vector directly.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
        eor             \out\().16b, \out\().16b, \tmp\().16b
        .endm
 
-       .align          4
-.Lxts_mul_x:
-CPU_LE(        .quad           1, 0x87         )
-CPU_BE(        .quad           0x87, 1         )
-
        /*
         * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
         *                   int blocks, u8 iv[])
        mov             x23, x4
        mov             x24, x5
 
-0:     ldr             q30, .Lxts_mul_x
+0:     movi            v30.2s, #0x1
+       movi            v25.2s, #0x87
+       uzp1            v30.4s, v30.4s, v25.4s
        ld1             {v25.16b}, [x24]
 
 99:    adr             x7, \do8