Merge branch 'rust-team'

Change-Id: Iee31c5de29c357c822f60df4fa8ce758779eb349
author: Efraim Flashner <efraim@flashner.co.il> 2024-02-28 12:18:45 +0200
committer: Efraim Flashner <efraim@flashner.co.il> 2024-02-28 12:18:45 +0200
commit: f29f80c194d0c534a92354b2bc19022a9b70ecf8 (patch)
tree: adc3c4114f59ef88ed3e097a8ec8517979f71562 /gnu/packages/patches
parent: c034088e37b51018d5bfeb88d822c559b38d51db (diff)
parent: 7947d47c9b891d2461ca9e7c53048d0e44294b5d (diff)
download: guix-f29f80c194d0c534a92354b2bc19022a9b70ecf8.tar
guix-f29f80c194d0c534a92354b2bc19022a9b70ecf8.tar.gz
9 files changed, 899 insertions, 2402 deletions
diff --git a/gnu/packages/patches/i3status-rust-enable-unstable-features.patch b/gnu/packages/patches/i3status-rust-enable-unstable-features.patch
deleted file mode 100644
index 535b45d865..0000000000
--- a/gnu/packages/patches/i3status-rust-enable-unstable-features.patch
+++ /dev/null
@@ -1,20 +0,0 @@
-From 3006390c0058f06c255d9850327163aebcdc3b40 Mon Sep 17 00:00:00 2001
-From: Petr Hodina <phodina@protonmail.com>
-Date: Mon, 11 Oct 2021 22:35:39 +0200
-Subject: [PATCH] Enable unstable features.
-
-
-diff --git a/src/main.rs b/src/main.rs
-index d9e964b..820e22e 100644
---- a/src/main.rs
-+++ b/src/main.rs
-@@ -1,3 +1,6 @@
-+#![feature(bool_to_option)]
-+#![feature(clamp)]
-+
- #[macro_use]
- mod de;
- #[macro_use]
---
-2.33.0
-
diff --git a/gnu/packages/patches/maturin-no-cross-compile.patch b/gnu/packages/patches/maturin-no-cross-compile.patch
index 7394d0854e..98af33d3c7 100644
--- a/gnu/packages/patches/maturin-no-cross-compile.patch
+++ b/gnu/packages/patches/maturin-no-cross-compile.patch
@@ -2,27 +2,27 @@ Remove dependencies on xwin and zig.  We're not offering cross-compilation
 options using these crates.
 
 diff --git a/Cargo.toml b/Cargo.toml
-index 6cbdca3..22ea5ef 100644
+index 6704e46..ff126a9 100644
 --- a/Cargo.toml
 +++ b/Cargo.toml
-@@ -76,16 +76,6 @@ version = "0.1.4"
+@@ -83,16 +83,6 @@ version = "0.1.16"
  [dependencies.cargo-options]
- version = "0.6.0"
+ version = "0.7.2"
  
 -[dependencies.cargo-xwin]
--version = "0.14.3"
+-version = "0.16.2"
 -optional = true
 -default-features = false
 -
 -[dependencies.cargo-zigbuild]
--version = "0.16.10"
+-version = "0.18.0"
 -optional = true
 -default-features = false
 -
  [dependencies.cargo_metadata]
- version = "0.15.3"
+ version = "0.18.0"
  
-@@ -310,8 +300,6 @@ version = "4.3.0"
+@@ -321,8 +311,6 @@ version = "5.0.0"
  [features]
  cli-completion = ["dep:clap_complete_command"]
  cross-compile = [
@@ -31,7 +31,7 @@ index 6cbdca3..22ea5ef 100644
  ]
  default = [
      "full",
-@@ -330,7 +318,6 @@ log = ["tracing-subscriber"]
+@@ -341,7 +329,6 @@ log = ["tracing-subscriber"]
  native-tls = [
      "dep:native-tls",
      "ureq?/native-tls",
@@ -39,7 +39,7 @@ index 6cbdca3..22ea5ef 100644
      "dep:rustls-pemfile",
  ]
  password-storage = [
-@@ -340,7 +327,6 @@ password-storage = [
+@@ -351,7 +338,6 @@ password-storage = [
  rustls = [
      "dep:rustls",
      "ureq?/tls",
@@ -47,7 +47,7 @@ index 6cbdca3..22ea5ef 100644
      "dep:rustls-pemfile",
  ]
  scaffolding = [
-@@ -358,5 +344,3 @@ upload = [
+@@ -369,5 +355,3 @@ upload = [
      "wild",
      "dep:dirs",
  ]
diff --git a/gnu/packages/patches/rust-cargo-edit-remove-ureq.patch b/gnu/packages/patches/rust-cargo-edit-remove-ureq.patch
new file mode 100644
index 0000000000..99c58103c7
--- /dev/null
+++ b/gnu/packages/patches/rust-cargo-edit-remove-ureq.patch
@@ -0,0 +1,33 @@
+This is modeled after the upstream commit which removes ureq as a dependency.
+
+diff --git a/Cargo.toml b/Cargo.toml
+index 5a787e1..27171c7 100644
+--- a/Cargo.toml
++++ b/Cargo.toml
+@@ -201,26 +201,3 @@ test-external-apis = []
+ upgrade = ["cli"]
+ vendored-libgit2 = ["git2/vendored-libgit2"]
+ vendored-openssl = ["git2/vendored-openssl"]
+-
+-[target."cfg(any(target_arch = \"x86_64\", target_arch = \"arm\", target_arch = \"x86\", target_arch = \"aarch64\"))".dependencies.ureq]
+-version = "2.7.1"
+-features = [
+-    "tls",
+-    "json",
+-    "socks",
+-    "socks-proxy",
+-]
+-default-features = false
+-
+-[target."cfg(not(any(target_arch = \"x86_64\", target_arch = \"arm\", target_arch = \"x86\", target_arch = \"aarch64\")))".dependencies.native-tls]
+-version = "^0.2"
+-
+-[target."cfg(not(any(target_arch = \"x86_64\", target_arch = \"arm\", target_arch = \"x86\", target_arch = \"aarch64\")))".dependencies.ureq]
+-version = "2.7.1"
+-features = [
+-    "native-tls",
+-    "json",
+-    "socks",
+-    "socks-proxy",
+-]
+-default-features = false
diff --git a/gnu/packages/patches/rust-ring-0.16-missing-files.patch b/gnu/packages/patches/rust-ring-0.16-missing-files.patch
deleted file mode 100644
index fa2f94a801..0000000000
--- a/gnu/packages/patches/rust-ring-0.16-missing-files.patch
+++ /dev/null
@@ -1,2293 +0,0 @@
-These 4 files exist in the git repository for rust-ring, and are from
-the same commit where 0.16.20 is taken from. They were not added to the
-include list in Cargo.toml, so they were not added to the tarball.
-
----
- crypto/curve25519/make_curve25519_tables.py   | 222 +++++
- crypto/fipsmodule/aes/asm/vpaes-armv7.pl      | 896 ++++++++++++++++++
- crypto/fipsmodule/aes/asm/vpaes-armv8.pl      | 837 ++++++++++++++++
- .../fipsmodule/modes/asm/ghash-neon-armv8.pl  | 294 ++++++
- 4 files changed, 2249 insertions(+)
- create mode 100755 crypto/curve25519/make_curve25519_tables.py
- create mode 100644 crypto/fipsmodule/aes/asm/vpaes-armv7.pl
- create mode 100755 crypto/fipsmodule/aes/asm/vpaes-armv8.pl
- create mode 100644 crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl
-
-diff --git a/crypto/curve25519/make_curve25519_tables.py b/crypto/curve25519/make_curve25519_tables.py
-new file mode 100755
-index 0000000..50dee2a
---- /dev/null
-+++ b/crypto/curve25519/make_curve25519_tables.py
-@@ -0,0 +1,222 @@
-+#!/usr/bin/env python
-+# coding=utf-8
-+# Copyright (c) 2020, Google Inc.
-+#
-+# Permission to use, copy, modify, and/or distribute this software for any
-+# purpose with or without fee is hereby granted, provided that the above
-+# copyright notice and this permission notice appear in all copies.
-+#
-+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
-+# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
-+# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
-+# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-+
-+import StringIO
-+import subprocess
-+
-+# Base field Z_p
-+p = 2**255 - 19
-+
-+def modp_inv(x):
-+    return pow(x, p-2, p)
-+
-+# Square root of -1
-+modp_sqrt_m1 = pow(2, (p-1) // 4, p)
-+
-+# Compute corresponding x-coordinate, with low bit corresponding to
-+# sign, or return None on failure
-+def recover_x(y, sign):
-+    if y >= p:
-+        return None
-+    x2 = (y*y-1) * modp_inv(d*y*y+1)
-+    if x2 == 0:
-+        if sign:
-+            return None
-+        else:
-+            return 0
-+
-+    # Compute square root of x2
-+    x = pow(x2, (p+3) // 8, p)
-+    if (x*x - x2) % p != 0:
-+        x = x * modp_sqrt_m1 % p
-+    if (x*x - x2) % p != 0:
-+        return None
-+
-+    if (x & 1) != sign:
-+        x = p - x
-+    return x
-+
-+# Curve constant
-+d = -121665 * modp_inv(121666) % p
-+
-+# Base point
-+g_y = 4 * modp_inv(5) % p
-+g_x = recover_x(g_y, 0)
-+
-+# Points are represented as affine tuples (x, y).
-+
-+def point_add(P, Q):
-+    x1, y1 = P
-+    x2, y2 = Q
-+    x3 = ((x1*y2 + y1*x2) * modp_inv(1 + d*x1*x2*y1*y2)) % p
-+    y3 = ((y1*y2 + x1*x2) * modp_inv(1 - d*x1*x2*y1*y2)) % p
-+    return (x3, y3)
-+
-+# Computes Q = s * P
-+def point_mul(s, P):
-+    Q = (0, 1)  # Neutral element
-+    while s > 0:
-+        if s & 1:
-+            Q = point_add(Q, P)
-+        P = point_add(P, P)
-+        s >>= 1
-+    return Q
-+
-+def to_bytes(x):
-+    ret = bytearray(32)
-+    for i in range(len(ret)):
-+        ret[i] = x % 256
-+        x >>= 8
-+    assert x == 0
-+    return ret
-+
-+def to_ge_precomp(P):
-+    # typedef struct {
-+    #   fe_loose yplusx;
-+    #   fe_loose yminusx;
-+    #   fe_loose xy2d;
-+    # } ge_precomp;
-+    x, y = P
-+    return ((y + x) % p, (y - x) % p, (x * y * 2 * d) % p)
-+
-+def to_base_25_5(x):
-+    limbs = (26, 25, 26, 25, 26, 25, 26, 25, 26, 25)
-+    ret = []
-+    for l in limbs:
-+        ret.append(x & ((1<<l) - 1))
-+        x >>= l
-+    assert x == 0
-+    return ret
-+
-+def to_base_51(x):
-+    ret = []
-+    for _ in range(5):
-+        ret.append(x & ((1<<51) - 1))
-+        x >>= 51
-+    assert x == 0
-+    return ret
-+
-+def to_literal(x):
-+    ret = "{{\n#if defined(BORINGSSL_CURVE25519_64BIT)\n"
-+    ret += ", ".join(map(str, to_base_51(x)))
-+    ret += "\n#else\n"
-+    ret += ", ".join(map(str, to_base_25_5(x)))
-+    ret += "\n#endif\n}}"
-+    return ret
-+
-+def main():
-+    d2 = (2 * d) % p
-+
-+    small_precomp = bytearray()
-+    for i in range(1, 16):
-+        s = (i&1) | ((i&2) << (64-1)) | ((i&4) << (128-2)) | ((i&8) << (192-3))
-+        P = point_mul(s, (g_x, g_y))
-+        small_precomp += to_bytes(P[0])
-+        small_precomp += to_bytes(P[1])
-+
-+    large_precomp = []
-+    for i in range(32):
-+        large_precomp.append([])
-+        for j in range(8):
-+            P = point_mul((j + 1) << (i * 8), (g_x, g_y))
-+            large_precomp[-1].append(to_ge_precomp(P))
-+
-+    bi_precomp = []
-+    for i in range(8):
-+        P = point_mul(2*i + 1, (g_x, g_y))
-+        bi_precomp.append(to_ge_precomp(P))
-+
-+
-+    buf = StringIO.StringIO()
-+    buf.write("""/* Copyright (c) 2020, Google Inc.
-+ *
-+ * Permission to use, copy, modify, and/or distribute this software for any
-+ * purpose with or without fee is hereby granted, provided that the above
-+ * copyright notice and this permission notice appear in all copies.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
-+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
-+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
-+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
-+
-+// This file is generated from
-+//    ./make_curve25519_tables.py > curve25519_tables.h
-+
-+
-+static const fe d = """)
-+    buf.write(to_literal(d))
-+    buf.write(""";
-+
-+static const fe sqrtm1 = """)
-+    buf.write(to_literal(modp_sqrt_m1))
-+    buf.write(""";
-+
-+static const fe d2 = """)
-+    buf.write(to_literal(d2))
-+    buf.write(""";
-+
-+#if defined(OPENSSL_SMALL)
-+
-+// This block of code replaces the standard base-point table with a much smaller
-+// one. The standard table is 30,720 bytes while this one is just 960.
-+//
-+// This table contains 15 pairs of group elements, (x, y), where each field
-+// element is serialised with |fe_tobytes|. If |i| is the index of the group
-+// element then consider i+1 as a four-bit number: (i₀, i₁, i₂, i₃) (where i₀
-+// is the most significant bit). The value of the group element is then:
-+// (i₀×2^192 + i₁×2^128 + i₂×2^64 + i₃)G, where G is the generator.
-+static const uint8_t k25519SmallPrecomp[15 * 2 * 32] = {""")
-+    for i, b in enumerate(small_precomp):
-+        buf.write("0x%02x, " % b)
-+    buf.write("""
-+};
-+
-+#else
-+
-+// k25519Precomp[i][j] = (j+1)*256^i*B
-+static const ge_precomp k25519Precomp[32][8] = {
-+""")
-+    for child in large_precomp:
-+        buf.write("{\n")
-+        for val in child:
-+            buf.write("{\n")
-+            for term in val:
-+                buf.write(to_literal(term) + ",\n")
-+            buf.write("},\n")
-+        buf.write("},\n")
-+    buf.write("""};
-+
-+#endif  // OPENSSL_SMALL
-+
-+// Bi[i] = (2*i+1)*B
-+static const ge_precomp Bi[8] = {
-+""")
-+    for val in bi_precomp:
-+        buf.write("{\n")
-+        for term in val:
-+                buf.write(to_literal(term) + ",\n")
-+        buf.write("},\n")
-+    buf.write("""};
-+""")
-+
-+    proc = subprocess.Popen(["clang-format"], stdin=subprocess.PIPE)
-+    proc.communicate(buf.getvalue())
-+
-+if __name__ == "__main__":
-+    main()
-diff --git a/crypto/fipsmodule/aes/asm/vpaes-armv7.pl b/crypto/fipsmodule/aes/asm/vpaes-armv7.pl
-new file mode 100644
-index 0000000..d36a97a
---- /dev/null
-+++ b/crypto/fipsmodule/aes/asm/vpaes-armv7.pl
-@@ -0,0 +1,896 @@
-+#! /usr/bin/env perl
-+# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved.
-+#
-+# Licensed under the OpenSSL license (the "License").  You may not use
-+# this file except in compliance with the License.  You can obtain a copy
-+# in the file LICENSE in the source distribution or at
-+# https://www.openssl.org/source/license.html
-+
-+
-+######################################################################
-+## Constant-time SSSE3 AES core implementation.
-+## version 0.1
-+##
-+## By Mike Hamburg (Stanford University), 2009
-+## Public domain.
-+##
-+## For details see http://shiftleft.org/papers/vector_aes/ and
-+## http://crypto.stanford.edu/vpaes/.
-+##
-+######################################################################
-+# Adapted from the original x86_64 version and <appro@openssl.org>'s ARMv8
-+# version.
-+#
-+# armv7, aarch64, and x86_64 differ in several ways:
-+#
-+# * x86_64 SSSE3 instructions are two-address (destination operand is also a
-+#   source), while NEON is three-address (destination operand is separate from
-+#   two sources).
-+#
-+# * aarch64 has 32 SIMD registers available, while x86_64 and armv7 have 16.
-+#
-+# * x86_64 instructions can take memory references, while ARM is a load/store
-+#   architecture. This means we sometimes need a spare register.
-+#
-+# * aarch64 and x86_64 have 128-bit byte shuffle instructions (tbl and pshufb),
-+#   while armv7 only has a 64-bit byte shuffle (vtbl).
-+#
-+# This means this armv7 version must be a mix of both aarch64 and x86_64
-+# implementations. armv7 and aarch64 have analogous SIMD instructions, so we
-+# base the instructions on aarch64. However, we cannot use aarch64's register
-+# allocation. x86_64's register count matches, but x86_64 is two-address.
-+# vpaes-armv8.pl already accounts for this in the comments, which use
-+# three-address AVX instructions instead of the original SSSE3 ones. We base
-+# register usage on these comments, which are preserved in this file.
-+#
-+# This means we do not use separate input and output registers as in aarch64 and
-+# cannot pin as many constants in the preheat functions. However, the load/store
-+# architecture means we must still deviate from x86_64 in places.
-+#
-+# Next, we account for the byte shuffle instructions. vtbl takes 64-bit source
-+# and destination and 128-bit table. Fortunately, armv7 also allows addressing
-+# upper and lower halves of each 128-bit register. The lower half of q{N} is
-+# d{2*N}. The upper half is d{2*N+1}. Instead of the following non-existent
-+# instruction,
-+#
-+#     vtbl.8 q0, q1, q2   @ Index each of q2's 16 bytes into q1. Store in q0.
-+#
-+# we write:
-+#
-+#     vtbl.8 d0, q1, d4   @ Index each of d4's 8 bytes into q1. Store in d0.
-+#     vtbl.8 d1, q1, d5   @ Index each of d5's 8 bytes into q1. Store in d1.
-+#
-+# For readability, we write d0 and d1 as q0#lo and q0#hi, respectively and
-+# post-process before outputting. (This is adapted from ghash-armv4.pl.) Note,
-+# however, that destination (q0) and table (q1) registers may no longer match.
-+# We adjust the register usage from x86_64 to avoid this. (Unfortunately, the
-+# two-address pshufb always matched these operands, so this is common.)
-+#
-+# This file also runs against the limit of ARMv7's ADR pseudo-instruction. ADR
-+# expands to an ADD or SUB of the pc register to find an address. That immediate
-+# must fit in ARM's encoding scheme: 8 bits of constant and 4 bits of rotation.
-+# This means larger values must be more aligned.
-+#
-+# ARM additionally has two encodings, ARM and Thumb mode. Our assembly files may
-+# use either encoding (do we actually need to support this?). In ARM mode, the
-+# distances get large enough to require 16-byte alignment. Moving constants
-+# closer to their use resolves most of this, but common constants in
-+# _vpaes_consts are used by the whole file. Affected ADR instructions must be
-+# placed at 8 mod 16 (the pc register is 8 ahead). Instructions with this
-+# constraint have been commented.
-+#
-+# For details on ARM's immediate value encoding scheme, see
-+# https://alisdair.mcdiarmid.org/arm-immediate-value-encoding/
-+#
-+# Finally, a summary of armv7 and aarch64 SIMD syntax differences:
-+#
-+# * armv7 prefixes SIMD instructions with 'v', while aarch64 does not.
-+#
-+# * armv7 SIMD registers are named like q0 (and d0 for the half-width ones).
-+#   aarch64 names registers like v0, and denotes half-width operations in an
-+#   instruction suffix (see below).
-+#
-+# * aarch64 embeds size and lane information in register suffixes. v0.16b is
-+#   16 bytes, v0.8h is eight u16s, v0.4s is four u32s, and v0.2d is two u64s.
-+#   armv7 embeds the total size in the register name (see above) and the size of
-+#   each element in an instruction suffix, which may look like vmov.i8,
-+#   vshr.u8, or vtbl.8, depending on instruction.
-+
-+use strict;
-+
-+my $flavour = shift;
-+my $output;
-+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
-+
-+$0 =~ m/(.*[\/\\])[^\/\\]+$/;
-+my $dir=$1;
-+my $xlate;
-+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
-+( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
-+die "can't locate arm-xlate.pl";
-+
-+open OUT,"| \"$^X\" $xlate $flavour $output";
-+*STDOUT=*OUT;
-+
-+my $code = "";
-+
-+$code.=<<___;
-+.syntax	unified
-+
-+.arch	armv7-a
-+.fpu	neon
-+
-+#if defined(__thumb2__)
-+.thumb
-+#else
-+.code	32
-+#endif
-+
-+.text
-+
-+.type	_vpaes_consts,%object
-+.align	7	@ totally strategic alignment
-+_vpaes_consts:
-+.Lk_mc_forward:	@ mc_forward
-+	.quad	0x0407060500030201, 0x0C0F0E0D080B0A09
-+	.quad	0x080B0A0904070605, 0x000302010C0F0E0D
-+	.quad	0x0C0F0E0D080B0A09, 0x0407060500030201
-+	.quad	0x000302010C0F0E0D, 0x080B0A0904070605
-+.Lk_mc_backward:@ mc_backward
-+	.quad	0x0605040702010003, 0x0E0D0C0F0A09080B
-+	.quad	0x020100030E0D0C0F, 0x0A09080B06050407
-+	.quad	0x0E0D0C0F0A09080B, 0x0605040702010003
-+	.quad	0x0A09080B06050407, 0x020100030E0D0C0F
-+.Lk_sr:		@ sr
-+	.quad	0x0706050403020100, 0x0F0E0D0C0B0A0908
-+	.quad	0x030E09040F0A0500, 0x0B06010C07020D08
-+	.quad	0x0F060D040B020900, 0x070E050C030A0108
-+	.quad	0x0B0E0104070A0D00, 0x0306090C0F020508
-+
-+@
-+@ "Hot" constants
-+@
-+.Lk_inv:	@ inv, inva
-+	.quad	0x0E05060F0D080180, 0x040703090A0B0C02
-+	.quad	0x01040A060F0B0780, 0x030D0E0C02050809
-+.Lk_ipt:	@ input transform (lo, hi)
-+	.quad	0xC2B2E8985A2A7000, 0xCABAE09052227808
-+	.quad	0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
-+.Lk_sbo:	@ sbou, sbot
-+	.quad	0xD0D26D176FBDC700, 0x15AABF7AC502A878
-+	.quad	0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
-+.Lk_sb1:	@ sb1u, sb1t
-+	.quad	0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
-+	.quad	0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
-+.Lk_sb2:	@ sb2u, sb2t
-+	.quad	0x69EB88400AE12900, 0xC2A163C8AB82234A
-+	.quad	0xE27A93C60B712400, 0x5EB7E955BC982FCD
-+
-+.asciz  "Vector Permutation AES for ARMv7 NEON, Mike Hamburg (Stanford University)"
-+.size	_vpaes_consts,.-_vpaes_consts
-+.align	6
-+___
-+
-+{
-+my ($inp,$out,$key) = map("r$_", (0..2));
-+
-+my ($invlo,$invhi) = map("q$_", (10..11));
-+my ($sb1u,$sb1t,$sb2u,$sb2t) = map("q$_", (12..15));
-+
-+$code.=<<___;
-+@@
-+@@  _aes_preheat
-+@@
-+@@  Fills q9-q15 as specified below.
-+@@
-+.type	_vpaes_preheat,%function
-+.align	4
-+_vpaes_preheat:
-+	adr	r10, .Lk_inv
-+	vmov.i8	q9, #0x0f		@ .Lk_s0F
-+	vld1.64	{q10,q11}, [r10]!	@ .Lk_inv
-+	add	r10, r10, #64		@ Skip .Lk_ipt, .Lk_sbo
-+	vld1.64	{q12,q13}, [r10]!	@ .Lk_sb1
-+	vld1.64	{q14,q15}, [r10]	@ .Lk_sb2
-+	bx	lr
-+
-+@@
-+@@  _aes_encrypt_core
-+@@
-+@@  AES-encrypt q0.
-+@@
-+@@  Inputs:
-+@@     q0 = input
-+@@     q9-q15 as in _vpaes_preheat
-+@@    [$key] = scheduled keys
-+@@
-+@@  Output in q0
-+@@  Clobbers  q1-q5, r8-r11
-+@@  Preserves q6-q8 so you get some local vectors
-+@@
-+@@
-+.type	_vpaes_encrypt_core,%function
-+.align 4
-+_vpaes_encrypt_core:
-+	mov	r9, $key
-+	ldr	r8, [$key,#240]		@ pull rounds
-+	adr	r11, .Lk_ipt
-+	@ vmovdqa	.Lk_ipt(%rip),	%xmm2	# iptlo
-+	@ vmovdqa	.Lk_ipt+16(%rip), %xmm3	# ipthi
-+	vld1.64	{q2, q3}, [r11]
-+	adr	r11, .Lk_mc_forward+16
-+	vld1.64	{q5}, [r9]!		@ vmovdqu	(%r9),	%xmm5		# round0 key
-+	vand	q1, q0, q9		@ vpand	%xmm9,	%xmm0,	%xmm1
-+	vshr.u8	q0, q0, #4		@ vpsrlb	\$4,	%xmm0,	%xmm0
-+	vtbl.8	q1#lo, {q2}, q1#lo	@ vpshufb	%xmm1,	%xmm2,	%xmm1
-+	vtbl.8	q1#hi, {q2}, q1#hi
-+	vtbl.8	q2#lo, {q3}, q0#lo	@ vpshufb	%xmm0,	%xmm3,	%xmm2
-+	vtbl.8	q2#hi, {q3}, q0#hi
-+	veor	q0, q1, q5		@ vpxor	%xmm5,	%xmm1,	%xmm0
-+	veor	q0, q0, q2		@ vpxor	%xmm2,	%xmm0,	%xmm0
-+
-+	@ .Lenc_entry ends with a bnz instruction which is normally paired with
-+	@ subs in .Lenc_loop.
-+	tst	r8, r8
-+	b	.Lenc_entry
-+
-+.align 4
-+.Lenc_loop:
-+	@ middle of middle round
-+	add	r10, r11, #0x40
-+	vtbl.8	q4#lo, {$sb1t}, q2#lo	@ vpshufb	%xmm2,	%xmm13,	%xmm4	# 4 = sb1u
-+	vtbl.8	q4#hi, {$sb1t}, q2#hi
-+	vld1.64	{q1}, [r11]!		@ vmovdqa	-0x40(%r11,%r10), %xmm1	# .Lk_mc_forward[]
-+	vtbl.8	q0#lo, {$sb1u}, q3#lo	@ vpshufb	%xmm3,	%xmm12,	%xmm0	# 0 = sb1t
-+	vtbl.8	q0#hi, {$sb1u}, q3#hi
-+	veor	q4, q4, q5		@ vpxor		%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
-+	vtbl.8	q5#lo, {$sb2t}, q2#lo	@ vpshufb	%xmm2,	%xmm15,	%xmm5	# 4 = sb2u
-+	vtbl.8	q5#hi, {$sb2t}, q2#hi
-+	veor	q0, q0, q4		@ vpxor		%xmm4,	%xmm0,	%xmm0	# 0 = A
-+	vtbl.8	q2#lo, {$sb2u}, q3#lo	@ vpshufb	%xmm3,	%xmm14,	%xmm2	# 2 = sb2t
-+	vtbl.8	q2#hi, {$sb2u}, q3#hi
-+	vld1.64	{q4}, [r10]		@ vmovdqa	(%r11,%r10), %xmm4	# .Lk_mc_backward[]
-+	vtbl.8	q3#lo, {q0}, q1#lo	@ vpshufb	%xmm1,	%xmm0,	%xmm3	# 0 = B
-+	vtbl.8	q3#hi, {q0}, q1#hi
-+	veor	q2, q2, q5		@ vpxor		%xmm5,	%xmm2,	%xmm2	# 2 = 2A
-+	@ Write to q5 instead of q0, so the table and destination registers do
-+	@ not overlap.
-+	vtbl.8	q5#lo, {q0}, q4#lo	@ vpshufb	%xmm4,	%xmm0,	%xmm0	# 3 = D
-+	vtbl.8	q5#hi, {q0}, q4#hi
-+	veor	q3, q3, q2		@ vpxor		%xmm2,	%xmm3,	%xmm3	# 0 = 2A+B
-+	vtbl.8	q4#lo, {q3}, q1#lo	@ vpshufb	%xmm1,	%xmm3,	%xmm4	# 0 = 2B+C
-+	vtbl.8	q4#hi, {q3}, q1#hi
-+	@ Here we restore the original q0/q5 usage.
-+	veor	q0, q5, q3		@ vpxor		%xmm3,	%xmm0,	%xmm0	# 3 = 2A+B+D
-+	and	r11, r11, #~(1<<6)	@ and		\$0x30,	%r11		# ... mod 4
-+	veor	q0, q0, q4		@ vpxor		%xmm4,	%xmm0, %xmm0	# 0 = 2A+3B+C+D
-+	subs	r8, r8, #1		@ nr--
-+
-+.Lenc_entry:
-+	@ top of round
-+	vand	q1, q0, q9		@ vpand		%xmm0,	%xmm9,	%xmm1   # 0 = k
-+	vshr.u8	q0, q0, #4		@ vpsrlb	\$4,	%xmm0,	%xmm0	# 1 = i
-+	vtbl.8	q5#lo, {$invhi}, q1#lo	@ vpshufb	%xmm1,	%xmm11,	%xmm5	# 2 = a/k
-+	vtbl.8	q5#hi, {$invhi}, q1#hi
-+	veor	q1, q1, q0		@ vpxor		%xmm0,	%xmm1,	%xmm1	# 0 = j
-+	vtbl.8	q3#lo, {$invlo}, q0#lo	@ vpshufb	%xmm0, 	%xmm10,	%xmm3  	# 3 = 1/i
-+	vtbl.8	q3#hi, {$invlo}, q0#hi
-+	vtbl.8	q4#lo, {$invlo}, q1#lo	@ vpshufb	%xmm1, 	%xmm10,	%xmm4  	# 4 = 1/j
-+	vtbl.8	q4#hi, {$invlo}, q1#hi
-+	veor	q3, q3, q5		@ vpxor		%xmm5,	%xmm3,	%xmm3	# 3 = iak = 1/i + a/k
-+	veor	q4, q4, q5		@ vpxor		%xmm5,	%xmm4,	%xmm4  	# 4 = jak = 1/j + a/k
-+	vtbl.8	q2#lo, {$invlo}, q3#lo	@ vpshufb	%xmm3,	%xmm10,	%xmm2  	# 2 = 1/iak
-+	vtbl.8	q2#hi, {$invlo}, q3#hi
-+	vtbl.8	q3#lo, {$invlo}, q4#lo	@ vpshufb	%xmm4,	%xmm10,	%xmm3	# 3 = 1/jak
-+	vtbl.8	q3#hi, {$invlo}, q4#hi
-+	veor	q2, q2, q1		@ vpxor		%xmm1,	%xmm2,	%xmm2  	# 2 = io
-+	veor	q3, q3, q0		@ vpxor		%xmm0,	%xmm3,	%xmm3	# 3 = jo
-+	vld1.64	{q5}, [r9]!		@ vmovdqu	(%r9),	%xmm5
-+	bne	.Lenc_loop
-+
-+	@ middle of last round
-+	add	r10, r11, #0x80
-+
-+	adr	r11, .Lk_sbo
-+	@ Read to q1 instead of q4, so the vtbl.8 instruction below does not
-+	@ overlap table and destination registers.
-+	vld1.64 {q1}, [r11]!		@ vmovdqa	-0x60(%r10), %xmm4	# 3 : sbou
-+	vld1.64 {q0}, [r11]		@ vmovdqa	-0x50(%r10), %xmm0	# 0 : sbot	.Lk_sbo+16
-+	vtbl.8	q4#lo, {q1}, q2#lo	@ vpshufb	%xmm2,	%xmm4,	%xmm4	# 4 = sbou
-+	vtbl.8	q4#hi, {q1}, q2#hi
-+	vld1.64	{q1}, [r10]		@ vmovdqa	0x40(%r11,%r10), %xmm1	# .Lk_sr[]
-+	@ Write to q2 instead of q0 below, to avoid overlapping table and
-+	@ destination registers.
-+	vtbl.8	q2#lo, {q0}, q3#lo	@ vpshufb	%xmm3,	%xmm0,	%xmm0	# 0 = sb1t
-+	vtbl.8	q2#hi, {q0}, q3#hi
-+	veor	q4, q4, q5		@ vpxor	%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
-+	veor	q2, q2, q4		@ vpxor	%xmm4,	%xmm0,	%xmm0	# 0 = A
-+	@ Here we restore the original q0/q2 usage.
-+	vtbl.8	q0#lo, {q2}, q1#lo	@ vpshufb	%xmm1,	%xmm0,	%xmm0
-+	vtbl.8	q0#hi, {q2}, q1#hi
-+	bx	lr
-+.size	_vpaes_encrypt_core,.-_vpaes_encrypt_core
-+
-+.globl	GFp_vpaes_encrypt
-+.type	GFp_vpaes_encrypt,%function
-+.align	4
-+GFp_vpaes_encrypt:
-+	@ _vpaes_encrypt_core uses r8-r11. Round up to r7-r11 to maintain stack
-+	@ alignment.
-+	stmdb	sp!, {r7-r11,lr}
-+	@ _vpaes_encrypt_core uses q4-q5 (d8-d11), which are callee-saved.
-+	vstmdb	sp!, {d8-d11}
-+
-+	vld1.64	{q0}, [$inp]
-+	bl	_vpaes_preheat
-+	bl	_vpaes_encrypt_core
-+	vst1.64	{q0}, [$out]
-+
-+	vldmia	sp!, {d8-d11}
-+	ldmia	sp!, {r7-r11, pc}	@ return
-+.size	GFp_vpaes_encrypt,.-GFp_vpaes_encrypt
-+___
-+}
-+{
-+my ($inp,$bits,$out,$dir)=("r0","r1","r2","r3");
-+my ($rcon,$s0F,$invlo,$invhi,$s63) = map("q$_",(8..12));
-+
-+$code.=<<___;
-+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-+@@                                                    @@
-+@@                  AES key schedule                  @@
-+@@                                                    @@
-+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-+
-+@ This function diverges from both x86_64 and armv7 in which constants are
-+@ pinned. x86_64 has a common preheat function for all operations. aarch64
-+@ separates them because it has enough registers to pin nearly all constants.
-+@ armv7 does not have enough registers, but needing explicit loads and stores
-+@ also complicates using x86_64's register allocation directly.
-+@
-+@ We pin some constants for convenience and leave q14 and q15 free to load
-+@ others on demand.
-+
-+@
-+@  Key schedule constants
-+@
-+.type	_vpaes_key_consts,%object
-+.align	4
-+_vpaes_key_consts:
-+.Lk_rcon:	@ rcon
-+	.quad	0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
-+
-+.Lk_opt:	@ output transform
-+	.quad	0xFF9F4929D6B66000, 0xF7974121DEBE6808
-+	.quad	0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
-+.Lk_deskew:	@ deskew tables: inverts the sbox's "skew"
-+	.quad	0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
-+	.quad	0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
-+.size	_vpaes_key_consts,.-_vpaes_key_consts
-+
-+.type	_vpaes_key_preheat,%function
-+.align	4
-+_vpaes_key_preheat:
-+	adr	r11, .Lk_rcon
-+	vmov.i8	$s63, #0x5b			@ .Lk_s63
-+	adr	r10, .Lk_inv			@ Must be aligned to 8 mod 16.
-+	vmov.i8	$s0F, #0x0f			@ .Lk_s0F
-+	vld1.64	{$invlo,$invhi}, [r10]		@ .Lk_inv
-+	vld1.64	{$rcon}, [r11]			@ .Lk_rcon
-+	bx	lr
-+.size	_vpaes_key_preheat,.-_vpaes_key_preheat
-+
-+.type	_vpaes_schedule_core,%function
-+.align	4
-+_vpaes_schedule_core:
-+	@ We only need to save lr, but ARM requires an 8-byte stack alignment,
-+	@ so save an extra register.
-+	stmdb	sp!, {r3,lr}
-+
-+	bl	_vpaes_key_preheat	@ load the tables
-+
-+	adr	r11, .Lk_ipt		@ Must be aligned to 8 mod 16.
-+	vld1.64	{q0}, [$inp]!		@ vmovdqu	(%rdi),	%xmm0		# load key (unaligned)
-+
-+	@ input transform
-+	@ Use q4 here rather than q3 so .Lschedule_am_decrypting does not
-+	@ overlap table and destination.
-+	vmov	q4, q0			@ vmovdqa	%xmm0,	%xmm3
-+	bl	_vpaes_schedule_transform
-+	adr	r10, .Lk_sr		@ Must be aligned to 8 mod 16.
-+	vmov	q7, q0			@ vmovdqa	%xmm0,	%xmm7
-+
-+	add	r8, r8, r10
-+
-+	@ encrypting, output zeroth round key after transform
-+	vst1.64	{q0}, [$out]		@ vmovdqu	%xmm0,	(%rdx)
-+
-+	@ *ring*: Decryption removed.
-+
-+.Lschedule_go:
-+	cmp	$bits, #192		@ cmp	\$192,	%esi
-+	bhi	.Lschedule_256
-+	@ 128: fall though
-+
-+@@
-+@@  .schedule_128
-+@@
-+@@  128-bit specific part of key schedule.
-+@@
-+@@  This schedule is really simple, because all its parts
-+@@  are accomplished by the subroutines.
-+@@
-+.Lschedule_128:
-+	mov	$inp, #10		@ mov	\$10, %esi
-+
-+.Loop_schedule_128:
-+	bl 	_vpaes_schedule_round
-+	subs	$inp, $inp, #1		@ dec	%esi
-+	beq 	.Lschedule_mangle_last
-+	bl	_vpaes_schedule_mangle	@ write output
-+	b 	.Loop_schedule_128
-+
-+@@
-+@@  .aes_schedule_256
-+@@
-+@@  256-bit specific part of key schedule.
-+@@
-+@@  The structure here is very similar to the 128-bit
-+@@  schedule, but with an additional "low side" in
-+@@  q6.  The low side's rounds are the same as the
-+@@  high side's, except no rcon and no rotation.
-+@@
-+.align	4
-+.Lschedule_256:
-+	vld1.64	{q0}, [$inp]			@ vmovdqu	16(%rdi),%xmm0		# load key part 2 (unaligned)
-+	bl	_vpaes_schedule_transform	@ input transform
-+	mov	$inp, #7			@ mov	\$7, %esi
-+
-+.Loop_schedule_256:
-+	bl	_vpaes_schedule_mangle		@ output low result
-+	vmov	q6, q0				@ vmovdqa	%xmm0,	%xmm6		# save cur_lo in xmm6
-+
-+	@ high round
-+	bl	_vpaes_schedule_round
-+	subs	$inp, $inp, #1			@ dec	%esi
-+	beq 	.Lschedule_mangle_last
-+	bl	_vpaes_schedule_mangle
-+
-+	@ low round. swap xmm7 and xmm6
-+	vdup.32	q0, q0#hi[1]		@ vpshufd	\$0xFF,	%xmm0,	%xmm0
-+	vmov.i8	q4, #0
-+	vmov	q5, q7			@ vmovdqa	%xmm7,	%xmm5
-+	vmov	q7, q6			@ vmovdqa	%xmm6,	%xmm7
-+	bl	_vpaes_schedule_low_round
-+	vmov	q7, q5			@ vmovdqa	%xmm5,	%xmm7
-+
-+	b	.Loop_schedule_256
-+
-+@@
-+@@  .aes_schedule_mangle_last
-+@@
-+@@  Mangler for last round of key schedule
-+@@  Mangles q0
-+@@    when encrypting, outputs out(q0) ^ 63
-+@@    when decrypting, outputs unskew(q0)
-+@@
-+@@  Always called right before return... jumps to cleanup and exits
-+@@
-+.align	4
-+.Lschedule_mangle_last:
-+	@ schedule last round key from xmm0
-+	adr	r11, .Lk_deskew			@ lea	.Lk_deskew(%rip),%r11	# prepare to deskew
-+
-+	@ encrypting
-+	vld1.64	{q1}, [r8]		@ vmovdqa	(%r8,%r10),%xmm1
-+	adr	r11, .Lk_opt		@ lea		.Lk_opt(%rip),	%r11		# prepare to output transform
-+	add	$out, $out, #32		@ add		\$32,	%rdx
-+	vmov	q2, q0
-+	vtbl.8	q0#lo, {q2}, q1#lo	@ vpshufb	%xmm1,	%xmm0,	%xmm0		# output permute
-+	vtbl.8	q0#hi, {q2}, q1#hi
-+
-+.Lschedule_mangle_last_dec:
-+	sub	$out, $out, #16			@ add	\$-16,	%rdx
-+	veor	q0, q0, $s63			@ vpxor	.Lk_s63(%rip),	%xmm0,	%xmm0
-+	bl	_vpaes_schedule_transform	@ output transform
-+	vst1.64	{q0}, [$out]			@ vmovdqu	%xmm0,	(%rdx)		# save last key
-+
-+	@ cleanup
-+	veor	q0, q0, q0		@ vpxor	%xmm0,	%xmm0,	%xmm0
-+	veor	q1, q1, q1		@ vpxor	%xmm1,	%xmm1,	%xmm1
-+	veor	q2, q2, q2		@ vpxor	%xmm2,	%xmm2,	%xmm2
-+	veor	q3, q3, q3		@ vpxor	%xmm3,	%xmm3,	%xmm3
-+	veor	q4, q4, q4		@ vpxor	%xmm4,	%xmm4,	%xmm4
-+	veor	q5, q5, q5		@ vpxor	%xmm5,	%xmm5,	%xmm5
-+	veor	q6, q6, q6		@ vpxor	%xmm6,	%xmm6,	%xmm6
-+	veor	q7, q7, q7		@ vpxor	%xmm7,	%xmm7,	%xmm7
-+	ldmia	sp!, {r3,pc}		@ return
-+.size	_vpaes_schedule_core,.-_vpaes_schedule_core
-+
-+@@
-+@@  .aes_schedule_round
-+@@
-+@@  Runs one main round of the key schedule on q0, q7
-+@@
-+@@  Specifically, runs subbytes on the high dword of q0
-+@@  then rotates it by one byte and xors into the low dword of
-+@@  q7.
-+@@
-+@@  Adds rcon from low byte of q8, then rotates q8 for
-+@@  next rcon.
-+@@
-+@@  Smears the dwords of q7 by xoring the low into the
-+@@  second low, result into third, result into highest.
-+@@
-+@@  Returns results in q7 = q0.
-+@@  Clobbers q1-q4, r11.
-+@@
-+.type	_vpaes_schedule_round,%function
-+.align	4
-+_vpaes_schedule_round:
-+	@ extract rcon from xmm8
-+	vmov.i8	q4, #0				@ vpxor		%xmm4,	%xmm4,	%xmm4
-+	vext.8	q1, $rcon, q4, #15		@ vpalignr	\$15,	%xmm8,	%xmm4,	%xmm1
-+	vext.8	$rcon, $rcon, $rcon, #15	@ vpalignr	\$15,	%xmm8,	%xmm8,	%xmm8
-+	veor	q7, q7, q1			@ vpxor		%xmm1,	%xmm7,	%xmm7
-+
-+	@ rotate
-+	vdup.32	q0, q0#hi[1]			@ vpshufd	\$0xFF,	%xmm0,	%xmm0
-+	vext.8	q0, q0, q0, #1			@ vpalignr	\$1,	%xmm0,	%xmm0,	%xmm0
-+
-+	@ fall through...
-+
-+	@ low round: same as high round, but no rotation and no rcon.
-+_vpaes_schedule_low_round:
-+	@ The x86_64 version pins .Lk_sb1 in %xmm13 and .Lk_sb1+16 in %xmm12.
-+	@ We pin other values in _vpaes_key_preheat, so load them now.
-+	adr	r11, .Lk_sb1
-+	vld1.64	{q14,q15}, [r11]
-+
-+	@ smear xmm7
-+	vext.8	q1, q4, q7, #12			@ vpslldq	\$4,	%xmm7,	%xmm1
-+	veor	q7, q7, q1			@ vpxor	%xmm1,	%xmm7,	%xmm7
-+	vext.8	q4, q4, q7, #8			@ vpslldq	\$8,	%xmm7,	%xmm4
-+
-+	@ subbytes
-+	vand	q1, q0, $s0F			@ vpand		%xmm9,	%xmm0,	%xmm1		# 0 = k
-+	vshr.u8	q0, q0, #4			@ vpsrlb	\$4,	%xmm0,	%xmm0		# 1 = i
-+	 veor	q7, q7, q4			@ vpxor		%xmm4,	%xmm7,	%xmm7
-+	vtbl.8	q2#lo, {$invhi}, q1#lo		@ vpshufb	%xmm1,	%xmm11,	%xmm2		# 2 = a/k
-+	vtbl.8	q2#hi, {$invhi}, q1#hi
-+	veor	q1, q1, q0			@ vpxor		%xmm0,	%xmm1,	%xmm1		# 0 = j
-+	vtbl.8	q3#lo, {$invlo}, q0#lo		@ vpshufb	%xmm0, 	%xmm10,	%xmm3		# 3 = 1/i
-+	vtbl.8	q3#hi, {$invlo}, q0#hi
-+	veor	q3, q3, q2			@ vpxor		%xmm2,	%xmm3,	%xmm3		# 3 = iak = 1/i + a/k
-+	vtbl.8	q4#lo, {$invlo}, q1#lo		@ vpshufb	%xmm1,	%xmm10,	%xmm4		# 4 = 1/j
-+	vtbl.8	q4#hi, {$invlo}, q1#hi
-+	 veor	q7, q7, $s63			@ vpxor		.Lk_s63(%rip),	%xmm7,	%xmm7
-+	vtbl.8	q3#lo, {$invlo}, q3#lo		@ vpshufb	%xmm3,	%xmm10,	%xmm3		# 2 = 1/iak
-+	vtbl.8	q3#hi, {$invlo}, q3#hi
-+	veor	q4, q4, q2			@ vpxor		%xmm2,	%xmm4,	%xmm4		# 4 = jak = 1/j + a/k
-+	vtbl.8	q2#lo, {$invlo}, q4#lo		@ vpshufb	%xmm4,	%xmm10,	%xmm2		# 3 = 1/jak
-+	vtbl.8	q2#hi, {$invlo}, q4#hi
-+	veor	q3, q3, q1			@ vpxor		%xmm1,	%xmm3,	%xmm3		# 2 = io
-+	veor	q2, q2, q0			@ vpxor		%xmm0,	%xmm2,	%xmm2		# 3 = jo
-+	vtbl.8	q4#lo, {q15}, q3#lo		@ vpshufb	%xmm3,	%xmm13,	%xmm4		# 4 = sbou
-+	vtbl.8	q4#hi, {q15}, q3#hi
-+	vtbl.8	q1#lo, {q14}, q2#lo		@ vpshufb	%xmm2,	%xmm12,	%xmm1		# 0 = sb1t
-+	vtbl.8	q1#hi, {q14}, q2#hi
-+	veor	q1, q1, q4			@ vpxor		%xmm4,	%xmm1,	%xmm1		# 0 = sbox output
-+
-+	@ add in smeared stuff
-+	veor	q0, q1, q7			@ vpxor	%xmm7,	%xmm1,	%xmm0
-+	veor	q7, q1, q7			@ vmovdqa	%xmm0,	%xmm7
-+	bx	lr
-+.size	_vpaes_schedule_round,.-_vpaes_schedule_round
-+
-+@@
-+@@  .aes_schedule_transform
-+@@
-+@@  Linear-transform q0 according to tables at [r11]
-+@@
-+@@  Requires that q9 = 0x0F0F... as in preheat
-+@@  Output in q0
-+@@  Clobbers q1, q2, q14, q15
-+@@
-+.type	_vpaes_schedule_transform,%function
-+.align	4
-+_vpaes_schedule_transform:
-+	vld1.64	{q14,q15}, [r11]	@ vmovdqa	(%r11),	%xmm2 	# lo
-+					@ vmovdqa	16(%r11),	%xmm1 # hi
-+	vand	q1, q0, $s0F		@ vpand	%xmm9,	%xmm0,	%xmm1
-+	vshr.u8	q0, q0, #4		@ vpsrlb	\$4,	%xmm0,	%xmm0
-+	vtbl.8	q2#lo, {q14}, q1#lo	@ vpshufb	%xmm1,	%xmm2,	%xmm2
-+	vtbl.8	q2#hi, {q14}, q1#hi
-+	vtbl.8	q0#lo, {q15}, q0#lo	@ vpshufb	%xmm0,	%xmm1,	%xmm0
-+	vtbl.8	q0#hi, {q15}, q0#hi
-+	veor	q0, q0, q2		@ vpxor	%xmm2,	%xmm0,	%xmm0
-+	bx	lr
-+.size	_vpaes_schedule_transform,.-_vpaes_schedule_transform
-+
-+@@
-+@@  .aes_schedule_mangle
-+@@
-+@@  Mangles q0 from (basis-transformed) standard version
-+@@  to our version.
-+@@
-+@@  On encrypt,
-+@@    xor with 0x63
-+@@    multiply by circulant 0,1,1,1
-+@@    apply shiftrows transform
-+@@
-+@@  On decrypt,
-+@@    xor with 0x63
-+@@    multiply by "inverse mixcolumns" circulant E,B,D,9
-+@@    deskew
-+@@    apply shiftrows transform
-+@@
-+@@
-+@@  Writes out to [r2], and increments or decrements it
-+@@  Keeps track of round number mod 4 in r8
-+@@  Preserves q0
-+@@  Clobbers q1-q5
-+@@
-+.type	_vpaes_schedule_mangle,%function
-+.align	4
-+_vpaes_schedule_mangle:
-+	tst	$dir, $dir
-+	vmov	q4, q0			@ vmovdqa	%xmm0,	%xmm4	# save xmm0 for later
-+	adr	r11, .Lk_mc_forward	@ Must be aligned to 8 mod 16.
-+	vld1.64	{q5}, [r11]		@ vmovdqa	.Lk_mc_forward(%rip),%xmm5
-+
-+	@ encrypting
-+	@ Write to q2 so we do not overlap table and destination below.
-+	veor	q2, q0, $s63		@ vpxor		.Lk_s63(%rip),	%xmm0,	%xmm4
-+	add	$out, $out, #16		@ add		\$16,	%rdx
-+	vtbl.8	q4#lo, {q2}, q5#lo	@ vpshufb	%xmm5,	%xmm4,	%xmm4
-+	vtbl.8	q4#hi, {q2}, q5#hi
-+	vtbl.8	q1#lo, {q4}, q5#lo	@ vpshufb	%xmm5,	%xmm4,	%xmm1
-+	vtbl.8	q1#hi, {q4}, q5#hi
-+	vtbl.8	q3#lo, {q1}, q5#lo	@ vpshufb	%xmm5,	%xmm1,	%xmm3
-+	vtbl.8	q3#hi, {q1}, q5#hi
-+	veor	q4, q4, q1		@ vpxor		%xmm1,	%xmm4,	%xmm4
-+	vld1.64	{q1}, [r8]		@ vmovdqa	(%r8,%r10),	%xmm1
-+	veor	q3, q3, q4		@ vpxor		%xmm4,	%xmm3,	%xmm3
-+
-+.Lschedule_mangle_both:
-+	@ Write to q2 so table and destination do not overlap.
-+	vtbl.8	q2#lo, {q3}, q1#lo	@ vpshufb	%xmm1,	%xmm3,	%xmm3
-+	vtbl.8	q2#hi, {q3}, q1#hi
-+	add	r8, r8, #64-16		@ add	\$-16,	%r8
-+	and	r8, r8, #~(1<<6)	@ and	\$0x30,	%r8
-+	vst1.64	{q2}, [$out]		@ vmovdqu	%xmm3,	(%rdx)
-+	bx	lr
-+.size	_vpaes_schedule_mangle,.-_vpaes_schedule_mangle
-+
-+.globl	GFp_vpaes_set_encrypt_key
-+.type	GFp_vpaes_set_encrypt_key,%function
-+.align	4
-+GFp_vpaes_set_encrypt_key:
-+	stmdb	sp!, {r7-r11, lr}
-+	vstmdb	sp!, {d8-d15}
-+
-+	lsr	r9, $bits, #5		@ shr	\$5,%eax
-+	add	r9, r9, #5		@ \$5,%eax
-+	str	r9, [$out,#240]		@ mov	%eax,240(%rdx)	# AES_KEY->rounds = nbits/32+5;
-+
-+	mov	$dir, #0		@ mov	\$0,%ecx
-+	mov	r8, #0x30		@ mov	\$0x30,%r8d
-+	bl	_vpaes_schedule_core
-+	eor	r0, r0, r0
-+
-+	vldmia	sp!, {d8-d15}
-+	ldmia	sp!, {r7-r11, pc}	@ return
-+.size	GFp_vpaes_set_encrypt_key,.-GFp_vpaes_set_encrypt_key
-+___
-+}
-+
-+{
-+my ($out, $inp) = map("r$_", (0..1));
-+my ($s0F, $s63, $s63_raw, $mc_forward) = map("q$_", (9..12));
-+
-+$code .= <<___;
-+
-+@ Additional constants for converting to bsaes.
-+.type	_vpaes_convert_consts,%object
-+.align	4
-+_vpaes_convert_consts:
-+@ .Lk_opt_then_skew applies skew(opt(x)) XOR 0x63, where skew is the linear
-+@ transform in the AES S-box. 0x63 is incorporated into the low half of the
-+@ table. This was computed with the following script:
-+@
-+@   def u64s_to_u128(x, y):
-+@       return x | (y << 64)
-+@   def u128_to_u64s(w):
-+@       return w & ((1<<64)-1), w >> 64
-+@   def get_byte(w, i):
-+@       return (w >> (i*8)) & 0xff
-+@   def apply_table(table, b):
-+@       lo = b & 0xf
-+@       hi = b >> 4
-+@       return get_byte(table[0], lo) ^ get_byte(table[1], hi)
-+@   def opt(b):
-+@       table = [
-+@           u64s_to_u128(0xFF9F4929D6B66000, 0xF7974121DEBE6808),
-+@           u64s_to_u128(0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0),
-+@       ]
-+@       return apply_table(table, b)
-+@   def rot_byte(b, n):
-+@       return 0xff & ((b << n) | (b >> (8-n)))
-+@   def skew(x):
-+@       return (x ^ rot_byte(x, 1) ^ rot_byte(x, 2) ^ rot_byte(x, 3) ^
-+@               rot_byte(x, 4))
-+@   table = [0, 0]
-+@   for i in range(16):
-+@       table[0] |= (skew(opt(i)) ^ 0x63) << (i*8)
-+@       table[1] |= skew(opt(i<<4)) << (i*8)
-+@   print("\t.quad\t0x%016x, 0x%016x" % u128_to_u64s(table[0]))
-+@   print("\t.quad\t0x%016x, 0x%016x" % u128_to_u64s(table[1]))
-+.Lk_opt_then_skew:
-+	.quad	0x9cb8436798bc4763, 0x6440bb9f6044bf9b
-+	.quad	0x1f30062936192f00, 0xb49bad829db284ab
-+
-+@ void GFp_vpaes_encrypt_key_to_bsaes(AES_KEY *bsaes, const AES_KEY *vpaes);
-+.globl	GFp_vpaes_encrypt_key_to_bsaes
-+.type	GFp_vpaes_encrypt_key_to_bsaes,%function
-+.align	4
-+GFp_vpaes_encrypt_key_to_bsaes:
-+	stmdb	sp!, {r11, lr}
-+
-+	@ See _vpaes_schedule_core for the key schedule logic. In particular,
-+	@ _vpaes_schedule_transform(.Lk_ipt) (section 2.2 of the paper),
-+	@ _vpaes_schedule_mangle (section 4.3), and .Lschedule_mangle_last
-+	@ contain the transformations not in the bsaes representation. This
-+	@ function inverts those transforms.
-+	@
-+	@ Note also that bsaes-armv7.pl expects aes-armv4.pl's key
-+	@ representation, which does not match the other aes_nohw_*
-+	@ implementations. The ARM aes_nohw_* stores each 32-bit word
-+	@ byteswapped, as a convenience for (unsupported) big-endian ARM, at the
-+	@ cost of extra REV and VREV32 operations in little-endian ARM.
-+
-+	vmov.i8	$s0F, #0x0f		@ Required by _vpaes_schedule_transform
-+	adr	r2, .Lk_mc_forward	@ Must be aligned to 8 mod 16.
-+	add	r3, r2, 0x90		@ .Lk_sr+0x10-.Lk_mc_forward = 0x90 (Apple's toolchain doesn't support the expression)
-+
-+	vld1.64	{$mc_forward}, [r2]
-+	vmov.i8	$s63, #0x5b		@ .Lk_s63 from vpaes-x86_64
-+	adr	r11, .Lk_opt		@ Must be aligned to 8 mod 16.
-+	vmov.i8	$s63_raw, #0x63		@ .LK_s63 without .Lk_ipt applied
-+
-+	@ vpaes stores one fewer round count than bsaes, but the number of keys
-+	@ is the same.
-+	ldr	r2, [$inp,#240]
-+	add	r2, r2, #1
-+	str	r2, [$out,#240]
-+
-+	@ The first key is transformed with _vpaes_schedule_transform(.Lk_ipt).
-+	@ Invert this with .Lk_opt.
-+	vld1.64	{q0}, [$inp]!
-+	bl	_vpaes_schedule_transform
-+	vrev32.8	q0, q0
-+	vst1.64	{q0}, [$out]!
-+
-+	@ The middle keys have _vpaes_schedule_transform(.Lk_ipt) applied,
-+	@ followed by _vpaes_schedule_mangle. _vpaes_schedule_mangle XORs 0x63,
-+	@ multiplies by the circulant 0,1,1,1, then applies ShiftRows.
-+.Loop_enc_key_to_bsaes:
-+	vld1.64	{q0}, [$inp]!
-+
-+	@ Invert the ShiftRows step (see .Lschedule_mangle_both). Note we cycle
-+	@ r3 in the opposite direction and start at .Lk_sr+0x10 instead of 0x30.
-+	@ We use r3 rather than r8 to avoid a callee-saved register.
-+	vld1.64	{q1}, [r3]
-+	vtbl.8  q2#lo, {q0}, q1#lo
-+	vtbl.8  q2#hi, {q0}, q1#hi
-+	add	r3, r3, #16
-+	and	r3, r3, #~(1<<6)
-+	vmov	q0, q2
-+
-+	@ Handle the last key differently.
-+	subs	r2, r2, #1
-+	beq	.Loop_enc_key_to_bsaes_last
-+
-+	@ Multiply by the circulant. This is its own inverse.
-+	vtbl.8	q1#lo, {q0}, $mc_forward#lo
-+	vtbl.8	q1#hi, {q0}, $mc_forward#hi
-+	vmov	q0, q1
-+	vtbl.8	q2#lo, {q1}, $mc_forward#lo
-+	vtbl.8	q2#hi, {q1}, $mc_forward#hi
-+	veor	q0, q0, q2
-+	vtbl.8	q1#lo, {q2}, $mc_forward#lo
-+	vtbl.8	q1#hi, {q2}, $mc_forward#hi
-+	veor	q0, q0, q1
-+
-+	@ XOR and finish.
-+	veor	q0, q0, $s63
-+	bl	_vpaes_schedule_transform
-+	vrev32.8	q0, q0
-+	vst1.64	{q0}, [$out]!
-+	b	.Loop_enc_key_to_bsaes
-+
-+.Loop_enc_key_to_bsaes_last:
-+	@ The final key does not have a basis transform (note
-+	@ .Lschedule_mangle_last inverts the original transform). It only XORs
-+	@ 0x63 and applies ShiftRows. The latter was already inverted in the
-+	@ loop. Note that, because we act on the original representation, we use
-+	@ $s63_raw, not $s63.
-+	veor	q0, q0, $s63_raw
-+	vrev32.8	q0, q0
-+	vst1.64	{q0}, [$out]
-+
-+	@ Wipe registers which contained key material.
-+	veor	q0, q0, q0
-+	veor	q1, q1, q1
-+	veor	q2, q2, q2
-+
-+	ldmia	sp!, {r11, pc}	@ return
-+.size	GFp_vpaes_encrypt_key_to_bsaes,.-GFp_vpaes_encrypt_key_to_bsaes
-+___
-+}
-+
-+{
-+# Register-passed parameters.
-+my ($inp, $out, $len, $key) = map("r$_", 0..3);
-+# Temporaries. _vpaes_encrypt_core already uses r8..r11, so overlap $ivec and
-+# $tmp. $ctr is r7 because it must be preserved across calls.
-+my ($ctr, $ivec, $tmp) = map("r$_", 7..9);
-+
-+# void vpaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
-+#                                 const AES_KEY *key, const uint8_t ivec[16]);
-+$code .= <<___;
-+.globl	GFp_vpaes_ctr32_encrypt_blocks
-+.type	GFp_vpaes_ctr32_encrypt_blocks,%function
-+.align	4
-+GFp_vpaes_ctr32_encrypt_blocks:
-+	mov	ip, sp
-+	stmdb	sp!, {r7-r11, lr}
-+	@ This function uses q4-q7 (d8-d15), which are callee-saved.
-+	vstmdb	sp!, {d8-d15}
-+
-+	cmp	$len, #0
-+	@ $ivec is passed on the stack.
-+	ldr	$ivec, [ip]
-+	beq	.Lctr32_done
-+
-+	@ _vpaes_encrypt_core expects the key in r2, so swap $len and $key.
-+	mov	$tmp, $key
-+	mov	$key, $len
-+	mov	$len, $tmp
-+___
-+my ($len, $key) = ($key, $len);
-+$code .= <<___;
-+
-+	@ Load the IV and counter portion.
-+	ldr	$ctr, [$ivec, #12]
-+	vld1.8	{q7}, [$ivec]
-+
-+	bl	_vpaes_preheat
-+	rev	$ctr, $ctr		@ The counter is big-endian.
-+
-+.Lctr32_loop:
-+	vmov	q0, q7
-+	vld1.8	{q6}, [$inp]!		@ Load input ahead of time
-+	bl	_vpaes_encrypt_core
-+	veor	q0, q0, q6		@ XOR input and result
-+	vst1.8	{q0}, [$out]!
-+	subs	$len, $len, #1
-+	@ Update the counter.
-+	add	$ctr, $ctr, #1
-+	rev	$tmp, $ctr
-+	vmov.32	q7#hi[1], $tmp
-+	bne	.Lctr32_loop
-+
-+.Lctr32_done:
-+	vldmia	sp!, {d8-d15}
-+	ldmia	sp!, {r7-r11, pc}	@ return
-+.size	GFp_vpaes_ctr32_encrypt_blocks,.-GFp_vpaes_ctr32_encrypt_blocks
-+___
-+}
-+
-+foreach (split("\n",$code)) {
-+	s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo;
-+	print $_,"\n";
-+}
-+
-+close STDOUT;
-diff --git a/crypto/fipsmodule/aes/asm/vpaes-armv8.pl b/crypto/fipsmodule/aes/asm/vpaes-armv8.pl
-new file mode 100755
-index 0000000..b31bbb8
---- /dev/null
-+++ b/crypto/fipsmodule/aes/asm/vpaes-armv8.pl
-@@ -0,0 +1,837 @@
-+#! /usr/bin/env perl
-+# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved.
-+#
-+# Licensed under the OpenSSL license (the "License").  You may not use
-+# this file except in compliance with the License.  You can obtain a copy
-+# in the file LICENSE in the source distribution or at
-+# https://www.openssl.org/source/license.html
-+
-+
-+######################################################################
-+## Constant-time SSSE3 AES core implementation.
-+## version 0.1
-+##
-+## By Mike Hamburg (Stanford University), 2009
-+## Public domain.
-+##
-+## For details see http://shiftleft.org/papers/vector_aes/ and
-+## http://crypto.stanford.edu/vpaes/.
-+##
-+######################################################################
-+# ARMv8 NEON adaptation by <appro@openssl.org>
-+#
-+# Reason for undertaken effort is that there is at least one popular
-+# SoC based on Cortex-A53 that doesn't have crypto extensions.
-+#
-+#                   CBC enc     ECB enc/dec(*)   [bit-sliced enc/dec]
-+# Cortex-A53        21.5        18.1/20.6        [17.5/19.8         ]
-+# Cortex-A57        36.0(**)    20.4/24.9(**)    [14.4/16.6         ]
-+# X-Gene            45.9(**)    45.8/57.7(**)    [33.1/37.6(**)     ]
-+# Denver(***)       16.6(**)    15.1/17.8(**)    [8.80/9.93         ]
-+# Apple A7(***)     22.7(**)    10.9/14.3        [8.45/10.0         ]
-+# Mongoose(***)     26.3(**)    21.0/25.0(**)    [13.3/16.8         ]
-+#
-+# (*)	ECB denotes approximate result for parallelizable modes
-+#	such as CBC decrypt, CTR, etc.;
-+# (**)	these results are worse than scalar compiler-generated
-+#	code, but it's constant-time and therefore preferred;
-+# (***)	presented for reference/comparison purposes;
-+
-+$flavour = shift;
-+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
-+
-+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
-+( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
-+die "can't locate arm-xlate.pl";
-+
-+open OUT,"| \"$^X\" $xlate $flavour $output";
-+*STDOUT=*OUT;
-+
-+$code.=<<___;
-+#include <GFp/arm_arch.h>
-+
-+.section	.rodata
-+
-+.type	_vpaes_consts,%object
-+.align	7	// totally strategic alignment
-+_vpaes_consts:
-+.Lk_mc_forward:	// mc_forward
-+	.quad	0x0407060500030201, 0x0C0F0E0D080B0A09
-+	.quad	0x080B0A0904070605, 0x000302010C0F0E0D
-+	.quad	0x0C0F0E0D080B0A09, 0x0407060500030201
-+	.quad	0x000302010C0F0E0D, 0x080B0A0904070605
-+.Lk_mc_backward:// mc_backward
-+	.quad	0x0605040702010003, 0x0E0D0C0F0A09080B
-+	.quad	0x020100030E0D0C0F, 0x0A09080B06050407
-+	.quad	0x0E0D0C0F0A09080B, 0x0605040702010003
-+	.quad	0x0A09080B06050407, 0x020100030E0D0C0F
-+.Lk_sr:		// sr
-+	.quad	0x0706050403020100, 0x0F0E0D0C0B0A0908
-+	.quad	0x030E09040F0A0500, 0x0B06010C07020D08
-+	.quad	0x0F060D040B020900, 0x070E050C030A0108
-+	.quad	0x0B0E0104070A0D00, 0x0306090C0F020508
-+
-+//
-+// "Hot" constants
-+//
-+.Lk_inv:	// inv, inva
-+	.quad	0x0E05060F0D080180, 0x040703090A0B0C02
-+	.quad	0x01040A060F0B0780, 0x030D0E0C02050809
-+.Lk_ipt:	// input transform (lo, hi)
-+	.quad	0xC2B2E8985A2A7000, 0xCABAE09052227808
-+	.quad	0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
-+.Lk_sbo:	// sbou, sbot
-+	.quad	0xD0D26D176FBDC700, 0x15AABF7AC502A878
-+	.quad	0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
-+.Lk_sb1:	// sb1u, sb1t
-+	.quad	0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
-+	.quad	0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
-+.Lk_sb2:	// sb2u, sb2t
-+	.quad	0x69EB88400AE12900, 0xC2A163C8AB82234A
-+	.quad	0xE27A93C60B712400, 0x5EB7E955BC982FCD
-+
-+//
-+//  Key schedule constants
-+//
-+.Lk_dksd:	// decryption key schedule: invskew x*D
-+	.quad	0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
-+	.quad	0x41C277F4B5368300, 0x5FDC69EAAB289D1E
-+.Lk_dksb:	// decryption key schedule: invskew x*B
-+	.quad	0x9A4FCA1F8550D500, 0x03D653861CC94C99
-+	.quad	0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
-+.Lk_dkse:	// decryption key schedule: invskew x*E + 0x63
-+	.quad	0xD5031CCA1FC9D600, 0x53859A4C994F5086
-+	.quad	0xA23196054FDC7BE8, 0xCD5EF96A20B31487
-+.Lk_dks9:	// decryption key schedule: invskew x*9
-+	.quad	0xB6116FC87ED9A700, 0x4AED933482255BFC
-+	.quad	0x4576516227143300, 0x8BB89FACE9DAFDCE
-+
-+.Lk_rcon:	// rcon
-+	.quad	0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
-+
-+.Lk_opt:	// output transform
-+	.quad	0xFF9F4929D6B66000, 0xF7974121DEBE6808
-+	.quad	0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
-+.Lk_deskew:	// deskew tables: inverts the sbox's "skew"
-+	.quad	0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
-+	.quad	0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
-+
-+.asciz  "Vector Permutation AES for ARMv8, Mike Hamburg (Stanford University)"
-+.size	_vpaes_consts,.-_vpaes_consts
-+.align	6
-+
-+.text
-+___
-+
-+{
-+my ($inp,$out,$key) = map("x$_",(0..2));
-+
-+my ($invlo,$invhi,$iptlo,$ipthi,$sbou,$sbot) = map("v$_.16b",(18..23));
-+my ($sb1u,$sb1t,$sb2u,$sb2t) = map("v$_.16b",(24..27));
-+my ($sb9u,$sb9t,$sbdu,$sbdt,$sbbu,$sbbt,$sbeu,$sbet)=map("v$_.16b",(24..31));
-+
-+$code.=<<___;
-+##
-+##  _aes_preheat
-+##
-+##  Fills register %r10 -> .aes_consts (so you can -fPIC)
-+##  and %xmm9-%xmm15 as specified below.
-+##
-+.type	_vpaes_encrypt_preheat,%function
-+.align	4
-+_vpaes_encrypt_preheat:
-+	adrp	x10, :pg_hi21:.Lk_inv
-+	add	x10, x10, :lo12:.Lk_inv
-+	movi	v17.16b, #0x0f
-+	ld1	{v18.2d-v19.2d}, [x10],#32	// .Lk_inv
-+	ld1	{v20.2d-v23.2d}, [x10],#64	// .Lk_ipt, .Lk_sbo
-+	ld1	{v24.2d-v27.2d}, [x10]		// .Lk_sb1, .Lk_sb2
-+	ret
-+.size	_vpaes_encrypt_preheat,.-_vpaes_encrypt_preheat
-+
-+##
-+##  _aes_encrypt_core
-+##
-+##  AES-encrypt %xmm0.
-+##
-+##  Inputs:
-+##     %xmm0 = input
-+##     %xmm9-%xmm15 as in _vpaes_preheat
-+##    (%rdx) = scheduled keys
-+##
-+##  Output in %xmm0
-+##  Clobbers  %xmm1-%xmm5, %r9, %r10, %r11, %rax
-+##  Preserves %xmm6 - %xmm8 so you get some local vectors
-+##
-+##
-+.type	_vpaes_encrypt_core,%function
-+.align 4
-+_vpaes_encrypt_core:
-+	mov	x9, $key
-+	ldr	w8, [$key,#240]			// pull rounds
-+	adrp	x11, :pg_hi21:.Lk_mc_forward+16
-+	add	x11, x11, :lo12:.Lk_mc_forward+16
-+						// vmovdqa	.Lk_ipt(%rip),	%xmm2	# iptlo
-+	ld1	{v16.2d}, [x9], #16		// vmovdqu	(%r9),	%xmm5		# round0 key
-+	and	v1.16b, v7.16b, v17.16b		// vpand	%xmm9,	%xmm0,	%xmm1
-+	ushr	v0.16b, v7.16b, #4		// vpsrlb	\$4,	%xmm0,	%xmm0
-+	tbl	v1.16b, {$iptlo}, v1.16b	// vpshufb	%xmm1,	%xmm2,	%xmm1
-+						// vmovdqa	.Lk_ipt+16(%rip), %xmm3	# ipthi
-+	tbl	v2.16b, {$ipthi}, v0.16b	// vpshufb	%xmm0,	%xmm3,	%xmm2
-+	eor	v0.16b, v1.16b, v16.16b		// vpxor	%xmm5,	%xmm1,	%xmm0
-+	eor	v0.16b, v0.16b, v2.16b		// vpxor	%xmm2,	%xmm0,	%xmm0
-+	b	.Lenc_entry
-+
-+.align 4
-+.Lenc_loop:
-+	// middle of middle round
-+	add	x10, x11, #0x40
-+	tbl	v4.16b, {$sb1t}, v2.16b		// vpshufb	%xmm2,	%xmm13,	%xmm4	# 4 = sb1u
-+	ld1	{v1.2d}, [x11], #16		// vmovdqa	-0x40(%r11,%r10), %xmm1	# .Lk_mc_forward[]
-+	tbl	v0.16b, {$sb1u}, v3.16b		// vpshufb	%xmm3,	%xmm12,	%xmm0	# 0 = sb1t
-+	eor	v4.16b, v4.16b, v16.16b		// vpxor	%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
-+	tbl	v5.16b,	{$sb2t}, v2.16b		// vpshufb	%xmm2,	%xmm15,	%xmm5	# 4 = sb2u
-+	eor	v0.16b, v0.16b, v4.16b		// vpxor	%xmm4,	%xmm0,	%xmm0	# 0 = A
-+	tbl	v2.16b, {$sb2u}, v3.16b		// vpshufb	%xmm3,	%xmm14,	%xmm2	# 2 = sb2t
-+	ld1	{v4.2d}, [x10]			// vmovdqa	(%r11,%r10), %xmm4	# .Lk_mc_backward[]
-+	tbl	v3.16b, {v0.16b}, v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm3	# 0 = B
-+	eor	v2.16b, v2.16b, v5.16b		// vpxor	%xmm5,	%xmm2,	%xmm2	# 2 = 2A
-+	tbl	v0.16b, {v0.16b}, v4.16b	// vpshufb	%xmm4,	%xmm0,	%xmm0	# 3 = D
-+	eor	v3.16b, v3.16b, v2.16b		// vpxor	%xmm2,	%xmm3,	%xmm3	# 0 = 2A+B
-+	tbl	v4.16b, {v3.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm4	# 0 = 2B+C
-+	eor	v0.16b, v0.16b, v3.16b		// vpxor	%xmm3,	%xmm0,	%xmm0	# 3 = 2A+B+D
-+	and	x11, x11, #~(1<<6)		// and		\$0x30,	%r11		# ... mod 4
-+	eor	v0.16b, v0.16b, v4.16b		// vpxor	%xmm4,	%xmm0, %xmm0	# 0 = 2A+3B+C+D
-+	sub	w8, w8, #1			// nr--
-+
-+.Lenc_entry:
-+	// top of round
-+	and	v1.16b, v0.16b, v17.16b		// vpand	%xmm0,	%xmm9,	%xmm1   # 0 = k
-+	ushr	v0.16b, v0.16b, #4		// vpsrlb	\$4,	%xmm0,	%xmm0	# 1 = i
-+	tbl	v5.16b, {$invhi}, v1.16b	// vpshufb	%xmm1,	%xmm11,	%xmm5	# 2 = a/k
-+	eor	v1.16b, v1.16b, v0.16b		// vpxor	%xmm0,	%xmm1,	%xmm1	# 0 = j
-+	tbl	v3.16b, {$invlo}, v0.16b	// vpshufb	%xmm0, 	%xmm10,	%xmm3  	# 3 = 1/i
-+	tbl	v4.16b, {$invlo}, v1.16b	// vpshufb	%xmm1, 	%xmm10,	%xmm4  	# 4 = 1/j
-+	eor	v3.16b, v3.16b, v5.16b		// vpxor	%xmm5,	%xmm3,	%xmm3	# 3 = iak = 1/i + a/k
-+	eor	v4.16b, v4.16b, v5.16b		// vpxor	%xmm5,	%xmm4,	%xmm4  	# 4 = jak = 1/j + a/k
-+	tbl	v2.16b, {$invlo}, v3.16b	// vpshufb	%xmm3,	%xmm10,	%xmm2  	# 2 = 1/iak
-+	tbl	v3.16b, {$invlo}, v4.16b	// vpshufb	%xmm4,	%xmm10,	%xmm3	# 3 = 1/jak
-+	eor	v2.16b, v2.16b, v1.16b		// vpxor	%xmm1,	%xmm2,	%xmm2  	# 2 = io
-+	eor	v3.16b, v3.16b, v0.16b		// vpxor	%xmm0,	%xmm3,	%xmm3	# 3 = jo
-+	ld1	{v16.2d}, [x9],#16		// vmovdqu	(%r9),	%xmm5
-+	cbnz	w8, .Lenc_loop
-+
-+	// middle of last round
-+	add	x10, x11, #0x80
-+						// vmovdqa	-0x60(%r10), %xmm4	# 3 : sbou	.Lk_sbo
-+						// vmovdqa	-0x50(%r10), %xmm0	# 0 : sbot	.Lk_sbo+16
-+	tbl	v4.16b, {$sbou}, v2.16b		// vpshufb	%xmm2,	%xmm4,	%xmm4	# 4 = sbou
-+	ld1	{v1.2d}, [x10]			// vmovdqa	0x40(%r11,%r10), %xmm1	# .Lk_sr[]
-+	tbl	v0.16b, {$sbot}, v3.16b		// vpshufb	%xmm3,	%xmm0,	%xmm0	# 0 = sb1t
-+	eor	v4.16b, v4.16b, v16.16b		// vpxor	%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
-+	eor	v0.16b, v0.16b, v4.16b		// vpxor	%xmm4,	%xmm0,	%xmm0	# 0 = A
-+	tbl	v0.16b, {v0.16b}, v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm0
-+	ret
-+.size	_vpaes_encrypt_core,.-_vpaes_encrypt_core
-+
-+.globl	GFp_vpaes_encrypt
-+.type	GFp_vpaes_encrypt,%function
-+.align	4
-+GFp_vpaes_encrypt:
-+	AARCH64_SIGN_LINK_REGISTER
-+	stp	x29,x30,[sp,#-16]!
-+	add	x29,sp,#0
-+
-+	ld1	{v7.16b}, [$inp]
-+	bl	_vpaes_encrypt_preheat
-+	bl	_vpaes_encrypt_core
-+	st1	{v0.16b}, [$out]
-+
-+	ldp	x29,x30,[sp],#16
-+	AARCH64_VALIDATE_LINK_REGISTER
-+	ret
-+.size	GFp_vpaes_encrypt,.-GFp_vpaes_encrypt
-+
-+.type	_vpaes_encrypt_2x,%function
-+.align 4
-+_vpaes_encrypt_2x:
-+	mov	x9, $key
-+	ldr	w8, [$key,#240]			// pull rounds
-+	adrp	x11, :pg_hi21:.Lk_mc_forward+16
-+	add	x11, x11, :lo12:.Lk_mc_forward+16
-+						// vmovdqa	.Lk_ipt(%rip),	%xmm2	# iptlo
-+	ld1	{v16.2d}, [x9], #16		// vmovdqu	(%r9),	%xmm5		# round0 key
-+	and	v1.16b,  v14.16b,  v17.16b	// vpand	%xmm9,	%xmm0,	%xmm1
-+	ushr	v0.16b,  v14.16b,  #4		// vpsrlb	\$4,	%xmm0,	%xmm0
-+	 and	v9.16b,  v15.16b,  v17.16b
-+	 ushr	v8.16b,  v15.16b,  #4
-+	tbl	v1.16b,  {$iptlo}, v1.16b	// vpshufb	%xmm1,	%xmm2,	%xmm1
-+	 tbl	v9.16b,  {$iptlo}, v9.16b
-+						// vmovdqa	.Lk_ipt+16(%rip), %xmm3	# ipthi
-+	tbl	v2.16b,  {$ipthi}, v0.16b	// vpshufb	%xmm0,	%xmm3,	%xmm2
-+	 tbl	v10.16b, {$ipthi}, v8.16b
-+	eor	v0.16b,  v1.16b,   v16.16b	// vpxor	%xmm5,	%xmm1,	%xmm0
-+	 eor	v8.16b,  v9.16b,   v16.16b
-+	eor	v0.16b,  v0.16b,   v2.16b	// vpxor	%xmm2,	%xmm0,	%xmm0
-+	 eor	v8.16b,  v8.16b,   v10.16b
-+	b	.Lenc_2x_entry
-+
-+.align 4
-+.Lenc_2x_loop:
-+	// middle of middle round
-+	add	x10, x11, #0x40
-+	tbl	v4.16b,  {$sb1t}, v2.16b	// vpshufb	%xmm2,	%xmm13,	%xmm4	# 4 = sb1u
-+	 tbl	v12.16b, {$sb1t}, v10.16b
-+	ld1	{v1.2d}, [x11], #16		// vmovdqa	-0x40(%r11,%r10), %xmm1	# .Lk_mc_forward[]
-+	tbl	v0.16b,  {$sb1u}, v3.16b	// vpshufb	%xmm3,	%xmm12,	%xmm0	# 0 = sb1t
-+	 tbl	v8.16b,  {$sb1u}, v11.16b
-+	eor	v4.16b,  v4.16b,  v16.16b	// vpxor	%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
-+	 eor	v12.16b, v12.16b, v16.16b
-+	tbl	v5.16b,	 {$sb2t}, v2.16b	// vpshufb	%xmm2,	%xmm15,	%xmm5	# 4 = sb2u
-+	 tbl	v13.16b, {$sb2t}, v10.16b
-+	eor	v0.16b,  v0.16b,  v4.16b	// vpxor	%xmm4,	%xmm0,	%xmm0	# 0 = A
-+	 eor	v8.16b,  v8.16b,  v12.16b
-+	tbl	v2.16b,  {$sb2u}, v3.16b	// vpshufb	%xmm3,	%xmm14,	%xmm2	# 2 = sb2t
-+	 tbl	v10.16b, {$sb2u}, v11.16b
-+	ld1	{v4.2d}, [x10]			// vmovdqa	(%r11,%r10), %xmm4	# .Lk_mc_backward[]
-+	tbl	v3.16b,  {v0.16b}, v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm3	# 0 = B
-+	 tbl	v11.16b, {v8.16b}, v1.16b
-+	eor	v2.16b,  v2.16b,  v5.16b	// vpxor	%xmm5,	%xmm2,	%xmm2	# 2 = 2A
-+	 eor	v10.16b, v10.16b, v13.16b
-+	tbl	v0.16b,  {v0.16b}, v4.16b	// vpshufb	%xmm4,	%xmm0,	%xmm0	# 3 = D
-+	 tbl	v8.16b,  {v8.16b}, v4.16b
-+	eor	v3.16b,  v3.16b,  v2.16b	// vpxor	%xmm2,	%xmm3,	%xmm3	# 0 = 2A+B
-+	 eor	v11.16b, v11.16b, v10.16b
-+	tbl	v4.16b,  {v3.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm4	# 0 = 2B+C
-+	 tbl	v12.16b, {v11.16b},v1.16b
-+	eor	v0.16b,  v0.16b,  v3.16b	// vpxor	%xmm3,	%xmm0,	%xmm0	# 3 = 2A+B+D
-+	 eor	v8.16b,  v8.16b,  v11.16b
-+	and	x11, x11, #~(1<<6)		// and		\$0x30,	%r11		# ... mod 4
-+	eor	v0.16b,  v0.16b,  v4.16b	// vpxor	%xmm4,	%xmm0, %xmm0	# 0 = 2A+3B+C+D
-+	 eor	v8.16b,  v8.16b,  v12.16b
-+	sub	w8, w8, #1			// nr--
-+
-+.Lenc_2x_entry:
-+	// top of round
-+	and	v1.16b,  v0.16b, v17.16b	// vpand	%xmm0,	%xmm9,	%xmm1   # 0 = k
-+	ushr	v0.16b,  v0.16b, #4		// vpsrlb	\$4,	%xmm0,	%xmm0	# 1 = i
-+	 and	v9.16b,  v8.16b, v17.16b
-+	 ushr	v8.16b,  v8.16b, #4
-+	tbl	v5.16b,  {$invhi},v1.16b	// vpshufb	%xmm1,	%xmm11,	%xmm5	# 2 = a/k
-+	 tbl	v13.16b, {$invhi},v9.16b
-+	eor	v1.16b,  v1.16b,  v0.16b	// vpxor	%xmm0,	%xmm1,	%xmm1	# 0 = j
-+	 eor	v9.16b,  v9.16b,  v8.16b
-+	tbl	v3.16b,  {$invlo},v0.16b	// vpshufb	%xmm0, 	%xmm10,	%xmm3  	# 3 = 1/i
-+	 tbl	v11.16b, {$invlo},v8.16b
-+	tbl	v4.16b,  {$invlo},v1.16b	// vpshufb	%xmm1, 	%xmm10,	%xmm4  	# 4 = 1/j
-+	 tbl	v12.16b, {$invlo},v9.16b
-+	eor	v3.16b,  v3.16b,  v5.16b	// vpxor	%xmm5,	%xmm3,	%xmm3	# 3 = iak = 1/i + a/k
-+	 eor	v11.16b, v11.16b, v13.16b
-+	eor	v4.16b,  v4.16b,  v5.16b	// vpxor	%xmm5,	%xmm4,	%xmm4  	# 4 = jak = 1/j + a/k
-+	 eor	v12.16b, v12.16b, v13.16b
-+	tbl	v2.16b,  {$invlo},v3.16b	// vpshufb	%xmm3,	%xmm10,	%xmm2  	# 2 = 1/iak
-+	 tbl	v10.16b, {$invlo},v11.16b
-+	tbl	v3.16b,  {$invlo},v4.16b	// vpshufb	%xmm4,	%xmm10,	%xmm3	# 3 = 1/jak
-+	 tbl	v11.16b, {$invlo},v12.16b
-+	eor	v2.16b,  v2.16b,  v1.16b	// vpxor	%xmm1,	%xmm2,	%xmm2  	# 2 = io
-+	 eor	v10.16b, v10.16b, v9.16b
-+	eor	v3.16b,  v3.16b,  v0.16b	// vpxor	%xmm0,	%xmm3,	%xmm3	# 3 = jo
-+	 eor	v11.16b, v11.16b, v8.16b
-+	ld1	{v16.2d}, [x9],#16		// vmovdqu	(%r9),	%xmm5
-+	cbnz	w8, .Lenc_2x_loop
-+
-+	// middle of last round
-+	add	x10, x11, #0x80
-+						// vmovdqa	-0x60(%r10), %xmm4	# 3 : sbou	.Lk_sbo
-+						// vmovdqa	-0x50(%r10), %xmm0	# 0 : sbot	.Lk_sbo+16
-+	tbl	v4.16b,  {$sbou}, v2.16b	// vpshufb	%xmm2,	%xmm4,	%xmm4	# 4 = sbou
-+	 tbl	v12.16b, {$sbou}, v10.16b
-+	ld1	{v1.2d}, [x10]			// vmovdqa	0x40(%r11,%r10), %xmm1	# .Lk_sr[]
-+	tbl	v0.16b,  {$sbot}, v3.16b	// vpshufb	%xmm3,	%xmm0,	%xmm0	# 0 = sb1t
-+	 tbl	v8.16b,  {$sbot}, v11.16b
-+	eor	v4.16b,  v4.16b,  v16.16b	// vpxor	%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
-+	 eor	v12.16b, v12.16b, v16.16b
-+	eor	v0.16b,  v0.16b,  v4.16b	// vpxor	%xmm4,	%xmm0,	%xmm0	# 0 = A
-+	 eor	v8.16b,  v8.16b,  v12.16b
-+	tbl	v0.16b,  {v0.16b},v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm0
-+	 tbl	v1.16b,  {v8.16b},v1.16b
-+	ret
-+.size	_vpaes_encrypt_2x,.-_vpaes_encrypt_2x
-+___
-+}
-+{
-+my ($inp,$bits,$out,$dir)=("x0","w1","x2","w3");
-+my ($invlo,$invhi,$iptlo,$ipthi,$rcon) = map("v$_.16b",(18..21,8));
-+
-+$code.=<<___;
-+########################################################
-+##                                                    ##
-+##                  AES key schedule                  ##
-+##                                                    ##
-+########################################################
-+.type	_vpaes_key_preheat,%function
-+.align	4
-+_vpaes_key_preheat:
-+	adrp	x10, :pg_hi21:.Lk_inv
-+	add	x10, x10, :lo12:.Lk_inv
-+	movi	v16.16b, #0x5b			// .Lk_s63
-+	adrp	x11, :pg_hi21:.Lk_sb1
-+	add	x11, x11, :lo12:.Lk_sb1
-+	movi	v17.16b, #0x0f			// .Lk_s0F
-+	ld1	{v18.2d-v21.2d}, [x10]		// .Lk_inv, .Lk_ipt
-+	adrp	x10, :pg_hi21:.Lk_dksd
-+	add	x10, x10, :lo12:.Lk_dksd
-+	ld1	{v22.2d-v23.2d}, [x11]		// .Lk_sb1
-+	adrp	x11, :pg_hi21:.Lk_mc_forward
-+	add	x11, x11, :lo12:.Lk_mc_forward
-+	ld1	{v24.2d-v27.2d}, [x10],#64	// .Lk_dksd, .Lk_dksb
-+	ld1	{v28.2d-v31.2d}, [x10],#64	// .Lk_dkse, .Lk_dks9
-+	ld1	{v8.2d}, [x10]			// .Lk_rcon
-+	ld1	{v9.2d}, [x11]			// .Lk_mc_forward[0]
-+	ret
-+.size	_vpaes_key_preheat,.-_vpaes_key_preheat
-+
-+.type	_vpaes_schedule_core,%function
-+.align	4
-+_vpaes_schedule_core:
-+	AARCH64_SIGN_LINK_REGISTER
-+	stp	x29, x30, [sp,#-16]!
-+	add	x29,sp,#0
-+
-+	bl	_vpaes_key_preheat		// load the tables
-+
-+	ld1	{v0.16b}, [$inp],#16		// vmovdqu	(%rdi),	%xmm0		# load key (unaligned)
-+
-+	// input transform
-+	mov	v3.16b, v0.16b			// vmovdqa	%xmm0,	%xmm3
-+	bl	_vpaes_schedule_transform
-+	mov	v7.16b, v0.16b			// vmovdqa	%xmm0,	%xmm7
-+
-+	adrp	x10, :pg_hi21:.Lk_sr		// lea	.Lk_sr(%rip),%r10
-+	add	x10, x10, :lo12:.Lk_sr
-+
-+	add	x8, x8, x10
-+
-+	// encrypting, output zeroth round key after transform
-+	st1	{v0.2d}, [$out]			// vmovdqu	%xmm0,	(%rdx)
-+
-+	cmp	$bits, #192			// cmp	\$192,	%esi
-+	b.hi	.Lschedule_256
-+	b.eq	.Lschedule_192
-+	// 128: fall though
-+
-+##
-+##  .schedule_128
-+##
-+##  128-bit specific part of key schedule.
-+##
-+##  This schedule is really simple, because all its parts
-+##  are accomplished by the subroutines.
-+##
-+.Lschedule_128:
-+	mov	$inp, #10			// mov	\$10, %esi
-+
-+.Loop_schedule_128:
-+	sub	$inp, $inp, #1			// dec	%esi
-+	bl 	_vpaes_schedule_round
-+	cbz 	$inp, .Lschedule_mangle_last
-+	bl	_vpaes_schedule_mangle		// write output
-+	b 	.Loop_schedule_128
-+
-+##
-+##  .aes_schedule_192
-+##
-+##  192-bit specific part of key schedule.
-+##
-+##  The main body of this schedule is the same as the 128-bit
-+##  schedule, but with more smearing.  The long, high side is
-+##  stored in %xmm7 as before, and the short, low side is in
-+##  the high bits of %xmm6.
-+##
-+##  This schedule is somewhat nastier, however, because each
-+##  round produces 192 bits of key material, or 1.5 round keys.
-+##  Therefore, on each cycle we do 2 rounds and produce 3 round
-+##  keys.
-+##
-+.align	4
-+.Lschedule_192:
-+	sub	$inp, $inp, #8
-+	ld1	{v0.16b}, [$inp]		// vmovdqu	8(%rdi),%xmm0		# load key part 2 (very unaligned)
-+	bl	_vpaes_schedule_transform	// input transform
-+	mov	v6.16b, v0.16b			// vmovdqa	%xmm0,	%xmm6		# save short part
-+	eor	v4.16b, v4.16b, v4.16b		// vpxor	%xmm4,	%xmm4, %xmm4	# clear 4
-+	ins	v6.d[0], v4.d[0]		// vmovhlps	%xmm4,	%xmm6,	%xmm6		# clobber low side with zeros
-+	mov	$inp, #4			// mov	\$4,	%esi
-+
-+.Loop_schedule_192:
-+	sub	$inp, $inp, #1			// dec	%esi
-+	bl	_vpaes_schedule_round
-+	ext	v0.16b, v6.16b, v0.16b, #8	// vpalignr	\$8,%xmm6,%xmm0,%xmm0
-+	bl	_vpaes_schedule_mangle		// save key n
-+	bl	_vpaes_schedule_192_smear
-+	bl	_vpaes_schedule_mangle		// save key n+1
-+	bl	_vpaes_schedule_round
-+	cbz 	$inp, .Lschedule_mangle_last
-+	bl	_vpaes_schedule_mangle		// save key n+2
-+	bl	_vpaes_schedule_192_smear
-+	b	.Loop_schedule_192
-+
-+##
-+##  .aes_schedule_256
-+##
-+##  256-bit specific part of key schedule.
-+##
-+##  The structure here is very similar to the 128-bit
-+##  schedule, but with an additional "low side" in
-+##  %xmm6.  The low side's rounds are the same as the
-+##  high side's, except no rcon and no rotation.
-+##
-+.align	4
-+.Lschedule_256:
-+	ld1	{v0.16b}, [$inp]		// vmovdqu	16(%rdi),%xmm0		# load key part 2 (unaligned)
-+	bl	_vpaes_schedule_transform	// input transform
-+	mov	$inp, #7			// mov	\$7, %esi
-+
-+.Loop_schedule_256:
-+	sub	$inp, $inp, #1			// dec	%esi
-+	bl	_vpaes_schedule_mangle		// output low result
-+	mov	v6.16b, v0.16b			// vmovdqa	%xmm0,	%xmm6		# save cur_lo in xmm6
-+
-+	// high round
-+	bl	_vpaes_schedule_round
-+	cbz 	$inp, .Lschedule_mangle_last
-+	bl	_vpaes_schedule_mangle
-+
-+	// low round. swap xmm7 and xmm6
-+	dup	v0.4s, v0.s[3]			// vpshufd	\$0xFF,	%xmm0,	%xmm0
-+	movi	v4.16b, #0
-+	mov	v5.16b, v7.16b			// vmovdqa	%xmm7,	%xmm5
-+	mov	v7.16b, v6.16b			// vmovdqa	%xmm6,	%xmm7
-+	bl	_vpaes_schedule_low_round
-+	mov	v7.16b, v5.16b			// vmovdqa	%xmm5,	%xmm7
-+
-+	b	.Loop_schedule_256
-+
-+##
-+##  .aes_schedule_mangle_last
-+##
-+##  Mangler for last round of key schedule
-+##  Mangles %xmm0
-+##    when encrypting, outputs out(%xmm0) ^ 63
-+##    when decrypting, outputs unskew(%xmm0)
-+##
-+##  Always called right before return... jumps to cleanup and exits
-+##
-+.align	4
-+.Lschedule_mangle_last:
-+	// schedule last round key from xmm0
-+	adrp	x11, :pg_hi21:.Lk_deskew	// lea	.Lk_deskew(%rip),%r11	# prepare to deskew
-+	add	x11, x11, :lo12:.Lk_deskew
-+
-+	cbnz	$dir, .Lschedule_mangle_last_dec
-+
-+	// encrypting
-+	ld1	{v1.2d}, [x8]			// vmovdqa	(%r8,%r10),%xmm1
-+	adrp	x11, :pg_hi21:.Lk_opt		// lea	.Lk_opt(%rip),	%r11		# prepare to output transform
-+	add	x11, x11, :lo12:.Lk_opt
-+	add	$out, $out, #32			// add	\$32,	%rdx
-+	tbl	v0.16b, {v0.16b}, v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm0		# output permute
-+
-+.Lschedule_mangle_last_dec:
-+	ld1	{v20.2d-v21.2d}, [x11]		// reload constants
-+	sub	$out, $out, #16			// add	\$-16,	%rdx
-+	eor	v0.16b, v0.16b, v16.16b		// vpxor	.Lk_s63(%rip),	%xmm0,	%xmm0
-+	bl	_vpaes_schedule_transform	// output transform
-+	st1	{v0.2d}, [$out]			// vmovdqu	%xmm0,	(%rdx)		# save last key
-+
-+	// cleanup
-+	eor	v0.16b, v0.16b, v0.16b		// vpxor	%xmm0,	%xmm0,	%xmm0
-+	eor	v1.16b, v1.16b, v1.16b		// vpxor	%xmm1,	%xmm1,	%xmm1
-+	eor	v2.16b, v2.16b, v2.16b		// vpxor	%xmm2,	%xmm2,	%xmm2
-+	eor	v3.16b, v3.16b, v3.16b		// vpxor	%xmm3,	%xmm3,	%xmm3
-+	eor	v4.16b, v4.16b, v4.16b		// vpxor	%xmm4,	%xmm4,	%xmm4
-+	eor	v5.16b, v5.16b, v5.16b		// vpxor	%xmm5,	%xmm5,	%xmm5
-+	eor	v6.16b, v6.16b, v6.16b		// vpxor	%xmm6,	%xmm6,	%xmm6
-+	eor	v7.16b, v7.16b, v7.16b		// vpxor	%xmm7,	%xmm7,	%xmm7
-+	ldp	x29, x30, [sp],#16
-+	AARCH64_VALIDATE_LINK_REGISTER
-+	ret
-+.size	_vpaes_schedule_core,.-_vpaes_schedule_core
-+
-+##
-+##  .aes_schedule_192_smear
-+##
-+##  Smear the short, low side in the 192-bit key schedule.
-+##
-+##  Inputs:
-+##    %xmm7: high side, b  a  x  y
-+##    %xmm6:  low side, d  c  0  0
-+##    %xmm13: 0
-+##
-+##  Outputs:
-+##    %xmm6: b+c+d  b+c  0  0
-+##    %xmm0: b+c+d  b+c  b  a
-+##
-+.type	_vpaes_schedule_192_smear,%function
-+.align	4
-+_vpaes_schedule_192_smear:
-+	movi	v1.16b, #0
-+	dup	v0.4s, v7.s[3]
-+	ins	v1.s[3], v6.s[2]	// vpshufd	\$0x80,	%xmm6,	%xmm1	# d c 0 0 -> c 0 0 0
-+	ins	v0.s[0], v7.s[2]	// vpshufd	\$0xFE,	%xmm7,	%xmm0	# b a _ _ -> b b b a
-+	eor	v6.16b, v6.16b, v1.16b	// vpxor	%xmm1,	%xmm6,	%xmm6	# -> c+d c 0 0
-+	eor	v1.16b, v1.16b, v1.16b	// vpxor	%xmm1,	%xmm1,	%xmm1
-+	eor	v6.16b, v6.16b, v0.16b	// vpxor	%xmm0,	%xmm6,	%xmm6	# -> b+c+d b+c b a
-+	mov	v0.16b, v6.16b		// vmovdqa	%xmm6,	%xmm0
-+	ins	v6.d[0], v1.d[0]	// vmovhlps	%xmm1,	%xmm6,	%xmm6	# clobber low side with zeros
-+	ret
-+.size	_vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
-+
-+##
-+##  .aes_schedule_round
-+##
-+##  Runs one main round of the key schedule on %xmm0, %xmm7
-+##
-+##  Specifically, runs subbytes on the high dword of %xmm0
-+##  then rotates it by one byte and xors into the low dword of
-+##  %xmm7.
-+##
-+##  Adds rcon from low byte of %xmm8, then rotates %xmm8 for
-+##  next rcon.
-+##
-+##  Smears the dwords of %xmm7 by xoring the low into the
-+##  second low, result into third, result into highest.
-+##
-+##  Returns results in %xmm7 = %xmm0.
-+##  Clobbers %xmm1-%xmm4, %r11.
-+##
-+.type	_vpaes_schedule_round,%function
-+.align	4
-+_vpaes_schedule_round:
-+	// extract rcon from xmm8
-+	movi	v4.16b, #0			// vpxor	%xmm4,	%xmm4,	%xmm4
-+	ext	v1.16b, $rcon, v4.16b, #15	// vpalignr	\$15,	%xmm8,	%xmm4,	%xmm1
-+	ext	$rcon, $rcon, $rcon, #15	// vpalignr	\$15,	%xmm8,	%xmm8,	%xmm8
-+	eor	v7.16b, v7.16b, v1.16b		// vpxor	%xmm1,	%xmm7,	%xmm7
-+
-+	// rotate
-+	dup	v0.4s, v0.s[3]			// vpshufd	\$0xFF,	%xmm0,	%xmm0
-+	ext	v0.16b, v0.16b, v0.16b, #1	// vpalignr	\$1,	%xmm0,	%xmm0,	%xmm0
-+
-+	// fall through...
-+
-+	// low round: same as high round, but no rotation and no rcon.
-+_vpaes_schedule_low_round:
-+	// smear xmm7
-+	ext	v1.16b, v4.16b, v7.16b, #12	// vpslldq	\$4,	%xmm7,	%xmm1
-+	eor	v7.16b, v7.16b, v1.16b		// vpxor	%xmm1,	%xmm7,	%xmm7
-+	ext	v4.16b, v4.16b, v7.16b, #8	// vpslldq	\$8,	%xmm7,	%xmm4
-+
-+	// subbytes
-+	and	v1.16b, v0.16b, v17.16b		// vpand	%xmm9,	%xmm0,	%xmm1		# 0 = k
-+	ushr	v0.16b, v0.16b, #4		// vpsrlb	\$4,	%xmm0,	%xmm0		# 1 = i
-+	 eor	v7.16b, v7.16b, v4.16b		// vpxor	%xmm4,	%xmm7,	%xmm7
-+	tbl	v2.16b, {$invhi}, v1.16b	// vpshufb	%xmm1,	%xmm11,	%xmm2		# 2 = a/k
-+	eor	v1.16b, v1.16b, v0.16b		// vpxor	%xmm0,	%xmm1,	%xmm1		# 0 = j
-+	tbl	v3.16b, {$invlo}, v0.16b	// vpshufb	%xmm0, 	%xmm10,	%xmm3		# 3 = 1/i
-+	eor	v3.16b, v3.16b, v2.16b		// vpxor	%xmm2,	%xmm3,	%xmm3		# 3 = iak = 1/i + a/k
-+	tbl	v4.16b, {$invlo}, v1.16b	// vpshufb	%xmm1,	%xmm10,	%xmm4		# 4 = 1/j
-+	 eor	v7.16b, v7.16b, v16.16b		// vpxor	.Lk_s63(%rip),	%xmm7,	%xmm7
-+	tbl	v3.16b, {$invlo}, v3.16b	// vpshufb	%xmm3,	%xmm10,	%xmm3		# 2 = 1/iak
-+	eor	v4.16b, v4.16b, v2.16b		// vpxor	%xmm2,	%xmm4,	%xmm4		# 4 = jak = 1/j + a/k
-+	tbl	v2.16b, {$invlo}, v4.16b	// vpshufb	%xmm4,	%xmm10,	%xmm2		# 3 = 1/jak
-+	eor	v3.16b, v3.16b, v1.16b		// vpxor	%xmm1,	%xmm3,	%xmm3		# 2 = io
-+	eor	v2.16b, v2.16b, v0.16b		// vpxor	%xmm0,	%xmm2,	%xmm2		# 3 = jo
-+	tbl	v4.16b, {v23.16b}, v3.16b	// vpshufb	%xmm3,	%xmm13,	%xmm4		# 4 = sbou
-+	tbl	v1.16b, {v22.16b}, v2.16b	// vpshufb	%xmm2,	%xmm12,	%xmm1		# 0 = sb1t
-+	eor	v1.16b, v1.16b, v4.16b		// vpxor	%xmm4,	%xmm1,	%xmm1		# 0 = sbox output
-+
-+	// add in smeared stuff
-+	eor	v0.16b, v1.16b, v7.16b		// vpxor	%xmm7,	%xmm1,	%xmm0
-+	eor	v7.16b, v1.16b, v7.16b		// vmovdqa	%xmm0,	%xmm7
-+	ret
-+.size	_vpaes_schedule_round,.-_vpaes_schedule_round
-+
-+##
-+##  .aes_schedule_transform
-+##
-+##  Linear-transform %xmm0 according to tables at (%r11)
-+##
-+##  Requires that %xmm9 = 0x0F0F... as in preheat
-+##  Output in %xmm0
-+##  Clobbers %xmm1, %xmm2
-+##
-+.type	_vpaes_schedule_transform,%function
-+.align	4
-+_vpaes_schedule_transform:
-+	and	v1.16b, v0.16b, v17.16b		// vpand	%xmm9,	%xmm0,	%xmm1
-+	ushr	v0.16b, v0.16b, #4		// vpsrlb	\$4,	%xmm0,	%xmm0
-+						// vmovdqa	(%r11),	%xmm2 	# lo
-+	tbl	v2.16b, {$iptlo}, v1.16b	// vpshufb	%xmm1,	%xmm2,	%xmm2
-+						// vmovdqa	16(%r11),	%xmm1 # hi
-+	tbl	v0.16b, {$ipthi}, v0.16b	// vpshufb	%xmm0,	%xmm1,	%xmm0
-+	eor	v0.16b, v0.16b, v2.16b		// vpxor	%xmm2,	%xmm0,	%xmm0
-+	ret
-+.size	_vpaes_schedule_transform,.-_vpaes_schedule_transform
-+
-+##
-+##  .aes_schedule_mangle
-+##
-+##  Mangle xmm0 from (basis-transformed) standard version
-+##  to our version.
-+##
-+##  On encrypt,
-+##    xor with 0x63
-+##    multiply by circulant 0,1,1,1
-+##    apply shiftrows transform
-+##
-+##  On decrypt,
-+##    xor with 0x63
-+##    multiply by "inverse mixcolumns" circulant E,B,D,9
-+##    deskew
-+##    apply shiftrows transform
-+##
-+##
-+##  Writes out to (%rdx), and increments or decrements it
-+##  Keeps track of round number mod 4 in %r8
-+##  Preserves xmm0
-+##  Clobbers xmm1-xmm5
-+##
-+.type	_vpaes_schedule_mangle,%function
-+.align	4
-+_vpaes_schedule_mangle:
-+	mov	v4.16b, v0.16b			// vmovdqa	%xmm0,	%xmm4	# save xmm0 for later
-+						// vmovdqa	.Lk_mc_forward(%rip),%xmm5
-+
-+	// encrypting
-+	eor	v4.16b, v0.16b, v16.16b		// vpxor	.Lk_s63(%rip),	%xmm0,	%xmm4
-+	add	$out, $out, #16			// add	\$16,	%rdx
-+	tbl	v4.16b, {v4.16b}, v9.16b	// vpshufb	%xmm5,	%xmm4,	%xmm4
-+	tbl	v1.16b, {v4.16b}, v9.16b	// vpshufb	%xmm5,	%xmm4,	%xmm1
-+	tbl	v3.16b, {v1.16b}, v9.16b	// vpshufb	%xmm5,	%xmm1,	%xmm3
-+	eor	v4.16b, v4.16b, v1.16b		// vpxor	%xmm1,	%xmm4,	%xmm4
-+	ld1	{v1.2d}, [x8]			// vmovdqa	(%r8,%r10),	%xmm1
-+	eor	v3.16b, v3.16b, v4.16b		// vpxor	%xmm4,	%xmm3,	%xmm3
-+
-+.Lschedule_mangle_both:
-+	tbl	v3.16b, {v3.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm3
-+	add	x8, x8, #64-16			// add	\$-16,	%r8
-+	and	x8, x8, #~(1<<6)		// and	\$0x30,	%r8
-+	st1	{v3.2d}, [$out]			// vmovdqu	%xmm3,	(%rdx)
-+	ret
-+.size	_vpaes_schedule_mangle,.-_vpaes_schedule_mangle
-+
-+.globl	GFp_vpaes_set_encrypt_key
-+.type	GFp_vpaes_set_encrypt_key,%function
-+.align	4
-+GFp_vpaes_set_encrypt_key:
-+	AARCH64_SIGN_LINK_REGISTER
-+	stp	x29,x30,[sp,#-16]!
-+	add	x29,sp,#0
-+	stp	d8,d9,[sp,#-16]!	// ABI spec says so
-+
-+	lsr	w9, $bits, #5		// shr	\$5,%eax
-+	add	w9, w9, #5		// \$5,%eax
-+	str	w9, [$out,#240]		// mov	%eax,240(%rdx)	# AES_KEY->rounds = nbits/32+5;
-+
-+	mov	$dir, #0		// mov	\$0,%ecx
-+	mov	x8, #0x30		// mov	\$0x30,%r8d
-+	bl	_vpaes_schedule_core
-+	eor	x0, x0, x0
-+
-+	ldp	d8,d9,[sp],#16
-+	ldp	x29,x30,[sp],#16
-+	AARCH64_VALIDATE_LINK_REGISTER
-+	ret
-+.size	GFp_vpaes_set_encrypt_key,.-GFp_vpaes_set_encrypt_key
-+___
-+}
-+{
-+my ($inp,$out,$len,$key,$ivec) = map("x$_",(0..4));
-+my ($ctr, $ctr_tmp) = ("w6", "w7");
-+
-+# void GFp_vpaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
-+#                                     const AES_KEY *key, const uint8_t ivec[16]);
-+$code.=<<___;
-+.globl	GFp_vpaes_ctr32_encrypt_blocks
-+.type	GFp_vpaes_ctr32_encrypt_blocks,%function
-+.align	4
-+GFp_vpaes_ctr32_encrypt_blocks:
-+	AARCH64_SIGN_LINK_REGISTER
-+	stp	x29,x30,[sp,#-16]!
-+	add	x29,sp,#0
-+	stp	d8,d9,[sp,#-16]!	// ABI spec says so
-+	stp	d10,d11,[sp,#-16]!
-+	stp	d12,d13,[sp,#-16]!
-+	stp	d14,d15,[sp,#-16]!
-+
-+	cbz	$len, .Lctr32_done
-+
-+	// Note, unlike the other functions, $len here is measured in blocks,
-+	// not bytes.
-+	mov	x17, $len
-+	mov	x2,  $key
-+
-+	// Load the IV and counter portion.
-+	ldr	$ctr, [$ivec, #12]
-+	ld1	{v7.16b}, [$ivec]
-+
-+	bl	_vpaes_encrypt_preheat
-+	tst	x17, #1
-+	rev	$ctr, $ctr		// The counter is big-endian.
-+	b.eq	.Lctr32_prep_loop
-+
-+	// Handle one block so the remaining block count is even for
-+	// _vpaes_encrypt_2x.
-+	ld1	{v6.16b}, [$inp], #16	// Load input ahead of time
-+	bl	_vpaes_encrypt_core
-+	eor	v0.16b, v0.16b, v6.16b	// XOR input and result
-+	st1	{v0.16b}, [$out], #16
-+	subs	x17, x17, #1
-+	// Update the counter.
-+	add	$ctr, $ctr, #1
-+	rev	$ctr_tmp, $ctr
-+	mov	v7.s[3], $ctr_tmp
-+	b.ls	.Lctr32_done
-+
-+.Lctr32_prep_loop:
-+	// _vpaes_encrypt_core takes its input from v7, while _vpaes_encrypt_2x
-+	// uses v14 and v15.
-+	mov	v15.16b, v7.16b
-+	mov	v14.16b, v7.16b
-+	add	$ctr, $ctr, #1
-+	rev	$ctr_tmp, $ctr
-+	mov	v15.s[3], $ctr_tmp
-+
-+.Lctr32_loop:
-+	ld1	{v6.16b,v7.16b}, [$inp], #32	// Load input ahead of time
-+	bl	_vpaes_encrypt_2x
-+	eor	v0.16b, v0.16b, v6.16b		// XOR input and result
-+	eor	v1.16b, v1.16b, v7.16b		// XOR input and result (#2)
-+	st1	{v0.16b,v1.16b}, [$out], #32
-+	subs	x17, x17, #2
-+	// Update the counter.
-+	add	$ctr_tmp, $ctr, #1
-+	add	$ctr, $ctr, #2
-+	rev	$ctr_tmp, $ctr_tmp
-+	mov	v14.s[3], $ctr_tmp
-+	rev	$ctr_tmp, $ctr
-+	mov	v15.s[3], $ctr_tmp
-+	b.hi	.Lctr32_loop
-+
-+.Lctr32_done:
-+	ldp	d14,d15,[sp],#16
-+	ldp	d12,d13,[sp],#16
-+	ldp	d10,d11,[sp],#16
-+	ldp	d8,d9,[sp],#16
-+	ldp	x29,x30,[sp],#16
-+	AARCH64_VALIDATE_LINK_REGISTER
-+	ret
-+.size	GFp_vpaes_ctr32_encrypt_blocks,.-GFp_vpaes_ctr32_encrypt_blocks
-+___
-+}
-+
-+print $code;
-+
-+close STDOUT or die "error closing STDOUT";
-diff --git a/crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl b/crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl
-new file mode 100644
-index 0000000..7e52ad6
---- /dev/null
-+++ b/crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl
-@@ -0,0 +1,294 @@
-+#! /usr/bin/env perl
-+# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
-+#
-+# Licensed under the OpenSSL license (the "License").  You may not use
-+# this file except in compliance with the License.  You can obtain a copy
-+# in the file LICENSE in the source distribution or at
-+# https://www.openssl.org/source/license.html
-+
-+# ====================================================================
-+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-+# project. The module is, however, dual licensed under OpenSSL and
-+# CRYPTOGAMS licenses depending on where you obtain it. For further
-+# details see http://www.openssl.org/~appro/cryptogams/.
-+# ====================================================================
-+
-+# This file was adapted to AArch64 from the 32-bit version in ghash-armv4.pl. It
-+# implements the multiplication algorithm described in:
-+#
-+# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
-+# Polynomial Multiplication on ARM Processors using the NEON Engine.
-+#
-+# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
-+#
-+# The main distinction to keep in mind between 32-bit NEON and AArch64 SIMD is
-+# AArch64 cannot compute over the upper halves of SIMD registers. In 32-bit
-+# NEON, the low and high halves of the 128-bit register q0 are accessible as
-+# 64-bit registers d0 and d1, respectively. In AArch64, dN is the lower half of
-+# vN. Where the 32-bit version would use the upper half, this file must keep
-+# halves in separate registers.
-+#
-+# The other distinction is in syntax. 32-bit NEON embeds lane information in the
-+# instruction name, while AArch64 uses suffixes on the registers. For instance,
-+# left-shifting 64-bit lanes of a SIMD register in 32-bit would be written:
-+#
-+#     vshl.i64 q0, q0, #1
-+#
-+# in 64-bit, it would be written:
-+#
-+#     shl v0.2d, v0.2d, #1
-+#
-+# See Programmer's Guide for ARMv8-A, section 7 for details.
-+# http://infocenter.arm.com/help/topic/com.arm.doc.den0024a/DEN0024A_v8_architecture_PG.pdf
-+#
-+# Finally, note the 8-bit and 64-bit polynomial multipliers in AArch64 differ
-+# only by suffix. pmull vR.8h, vA.8b, vB.8b multiplies eight 8-bit polynomials
-+# and is always available. pmull vR.1q, vA.1d, vB.1d multiplies a 64-bit
-+# polynomial and is conditioned on the PMULL extension. This file emulates the
-+# latter with the former.
-+
-+use strict;
-+
-+my $flavour = shift;
-+my $output;
-+if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
-+else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
-+
-+if ($flavour && $flavour ne "void") {
-+    $0 =~ m/(.*[\/\\])[^\/\\]+$/;
-+    my $dir = $1;
-+    my $xlate;
-+    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
-+    ( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
-+    die "can't locate arm-xlate.pl";
-+
-+    open OUT,"| \"$^X\" $xlate $flavour $output";
-+    *STDOUT=*OUT;
-+} else {
-+    open OUT,">$output";
-+    *STDOUT=*OUT;
-+}
-+
-+my ($Xi, $Htbl, $inp, $len) = map("x$_", (0..3));	# argument block
-+my ($Xl, $Xm, $Xh, $INlo, $INhi) = map("v$_", (0..4));
-+my ($Hlo, $Hhi, $Hhl) = map("v$_", (5..7));
-+# d8-d15 are callee-saved, so avoid v8-v15. AArch64 SIMD has plenty of registers
-+# to spare.
-+my ($t0, $t1, $t2, $t3) = map("v$_", (16..19));
-+my ($t0l_t1l, $t0h_t1h, $t2l_t3l, $t2h_t3h) = map("v$_", (20..23));
-+my ($k48_k32, $k16_k0) = map("v$_", (24..25));
-+
-+my $code = "";
-+
-+# clmul64x64 emits code which emulates pmull $r.1q, $a.1d, $b.1d. $r, $a, and $b
-+# must be distinct from $t* and $k*. $t* are clobbered by the emitted code.
-+sub clmul64x64 {
-+my ($r, $a, $b) = @_;
-+$code .= <<___;
-+	ext	$t0.8b, $a.8b, $a.8b, #1	// A1
-+	pmull	$t0.8h, $t0.8b, $b.8b		// F = A1*B
-+	ext	$r.8b, $b.8b, $b.8b, #1		// B1
-+	pmull	$r.8h, $a.8b, $r.8b		// E = A*B1
-+	ext	$t1.8b, $a.8b, $a.8b, #2	// A2
-+	pmull	$t1.8h, $t1.8b, $b.8b		// H = A2*B
-+	ext	$t3.8b, $b.8b, $b.8b, #2	// B2
-+	pmull	$t3.8h, $a.8b, $t3.8b		// G = A*B2
-+	ext	$t2.8b, $a.8b, $a.8b, #3	// A3
-+	eor	$t0.16b, $t0.16b, $r.16b	// L = E + F
-+	pmull	$t2.8h, $t2.8b, $b.8b		// J = A3*B
-+	ext	$r.8b, $b.8b, $b.8b, #3		// B3
-+	eor	$t1.16b, $t1.16b, $t3.16b	// M = G + H
-+	pmull	$r.8h, $a.8b, $r.8b		// I = A*B3
-+
-+	// Here we diverge from the 32-bit version. It computes the following
-+	// (instructions reordered for clarity):
-+	//
-+	//     veor	\$t0#lo, \$t0#lo, \$t0#hi	@ t0 = P0 + P1 (L)
-+	//     vand	\$t0#hi, \$t0#hi, \$k48
-+	//     veor	\$t0#lo, \$t0#lo, \$t0#hi
-+	//
-+	//     veor	\$t1#lo, \$t1#lo, \$t1#hi	@ t1 = P2 + P3 (M)
-+	//     vand	\$t1#hi, \$t1#hi, \$k32
-+	//     veor	\$t1#lo, \$t1#lo, \$t1#hi
-+	//
-+	//     veor	\$t2#lo, \$t2#lo, \$t2#hi	@ t2 = P4 + P5 (N)
-+	//     vand	\$t2#hi, \$t2#hi, \$k16
-+	//     veor	\$t2#lo, \$t2#lo, \$t2#hi
-+	//
-+	//     veor	\$t3#lo, \$t3#lo, \$t3#hi	@ t3 = P6 + P7 (K)
-+	//     vmov.i64	\$t3#hi, #0
-+	//
-+	// \$kN is a mask with the bottom N bits set. AArch64 cannot compute on
-+	// upper halves of SIMD registers, so we must split each half into
-+	// separate registers. To compensate, we pair computations up and
-+	// parallelize.
-+
-+	ext	$t3.8b, $b.8b, $b.8b, #4	// B4
-+	eor	$t2.16b, $t2.16b, $r.16b	// N = I + J
-+	pmull	$t3.8h, $a.8b, $t3.8b		// K = A*B4
-+
-+	// This can probably be scheduled more efficiently. For now, we just
-+	// pair up independent instructions.
-+	zip1	$t0l_t1l.2d, $t0.2d, $t1.2d
-+	zip1	$t2l_t3l.2d, $t2.2d, $t3.2d
-+	zip2	$t0h_t1h.2d, $t0.2d, $t1.2d
-+	zip2	$t2h_t3h.2d, $t2.2d, $t3.2d
-+	eor	$t0l_t1l.16b, $t0l_t1l.16b, $t0h_t1h.16b
-+	eor	$t2l_t3l.16b, $t2l_t3l.16b, $t2h_t3h.16b
-+	and	$t0h_t1h.16b, $t0h_t1h.16b, $k48_k32.16b
-+	and	$t2h_t3h.16b, $t2h_t3h.16b, $k16_k0.16b
-+	eor	$t0l_t1l.16b, $t0l_t1l.16b, $t0h_t1h.16b
-+	eor	$t2l_t3l.16b, $t2l_t3l.16b, $t2h_t3h.16b
-+	zip1	$t0.2d, $t0l_t1l.2d, $t0h_t1h.2d
-+	zip1	$t2.2d, $t2l_t3l.2d, $t2h_t3h.2d
-+	zip2	$t1.2d, $t0l_t1l.2d, $t0h_t1h.2d
-+	zip2	$t3.2d, $t2l_t3l.2d, $t2h_t3h.2d
-+
-+	ext	$t0.16b, $t0.16b, $t0.16b, #15	// t0 = t0 << 8
-+	ext	$t1.16b, $t1.16b, $t1.16b, #14	// t1 = t1 << 16
-+	pmull	$r.8h, $a.8b, $b.8b		// D = A*B
-+	ext	$t3.16b, $t3.16b, $t3.16b, #12	// t3 = t3 << 32
-+	ext	$t2.16b, $t2.16b, $t2.16b, #13	// t2 = t2 << 24
-+	eor	$t0.16b, $t0.16b, $t1.16b
-+	eor	$t2.16b, $t2.16b, $t3.16b
-+	eor	$r.16b, $r.16b, $t0.16b
-+	eor	$r.16b, $r.16b, $t2.16b
-+___
-+}
-+
-+$code .= <<___;
-+#include <GFp/arm_arch.h>
-+
-+.text
-+
-+.global	GFp_gcm_init_neon
-+.type	GFp_gcm_init_neon,%function
-+.align	4
-+GFp_gcm_init_neon:
-+	AARCH64_VALID_CALL_TARGET
-+	// This function is adapted from gcm_init_v8. xC2 is t3.
-+	ld1	{$t1.2d}, [x1]			// load H
-+	movi	$t3.16b, #0xe1
-+	shl	$t3.2d, $t3.2d, #57		// 0xc2.0
-+	ext	$INlo.16b, $t1.16b, $t1.16b, #8
-+	ushr	$t2.2d, $t3.2d, #63
-+	dup	$t1.4s, $t1.s[1]
-+	ext	$t0.16b, $t2.16b, $t3.16b, #8	// t0=0xc2....01
-+	ushr	$t2.2d, $INlo.2d, #63
-+	sshr	$t1.4s, $t1.4s, #31		// broadcast carry bit
-+	and	$t2.16b, $t2.16b, $t0.16b
-+	shl	$INlo.2d, $INlo.2d, #1
-+	ext	$t2.16b, $t2.16b, $t2.16b, #8
-+	and	$t0.16b, $t0.16b, $t1.16b
-+	orr	$INlo.16b, $INlo.16b, $t2.16b	// H<<<=1
-+	eor	$Hlo.16b, $INlo.16b, $t0.16b	// twisted H
-+	st1	{$Hlo.2d}, [x0]			// store Htable[0]
-+	ret
-+.size	GFp_gcm_init_neon,.-GFp_gcm_init_neon
-+
-+.global	GFp_gcm_gmult_neon
-+.type	GFp_gcm_gmult_neon,%function
-+.align	4
-+GFp_gcm_gmult_neon:
-+	AARCH64_VALID_CALL_TARGET
-+	ld1	{$INlo.16b}, [$Xi]		// load Xi
-+	ld1	{$Hlo.1d}, [$Htbl], #8		// load twisted H
-+	ld1	{$Hhi.1d}, [$Htbl]
-+	adrp	x9, :pg_hi21:.Lmasks		// load constants
-+	add	x9, x9, :lo12:.Lmasks
-+	ld1	{$k48_k32.2d, $k16_k0.2d}, [x9]
-+	rev64	$INlo.16b, $INlo.16b		// byteswap Xi
-+	ext	$INlo.16b, $INlo.16b, $INlo.16b, #8
-+	eor	$Hhl.8b, $Hlo.8b, $Hhi.8b	// Karatsuba pre-processing
-+
-+	mov	$len, #16
-+	b	.Lgmult_neon
-+.size	GFp_gcm_gmult_neon,.-GFp_gcm_gmult_neon
-+
-+.global	GFp_gcm_ghash_neon
-+.type	GFp_gcm_ghash_neon,%function
-+.align	4
-+GFp_gcm_ghash_neon:
-+	AARCH64_VALID_CALL_TARGET
-+	ld1	{$Xl.16b}, [$Xi]		// load Xi
-+	ld1	{$Hlo.1d}, [$Htbl], #8		// load twisted H
-+	ld1	{$Hhi.1d}, [$Htbl]
-+	adrp	x9, :pg_hi21:.Lmasks		// load constants
-+	add	x9, x9, :lo12:.Lmasks
-+	ld1	{$k48_k32.2d, $k16_k0.2d}, [x9]
-+	rev64	$Xl.16b, $Xl.16b		// byteswap Xi
-+	ext	$Xl.16b, $Xl.16b, $Xl.16b, #8
-+	eor	$Hhl.8b, $Hlo.8b, $Hhi.8b	// Karatsuba pre-processing
-+
-+.Loop_neon:
-+	ld1	{$INlo.16b}, [$inp], #16	// load inp
-+	rev64	$INlo.16b, $INlo.16b		// byteswap inp
-+	ext	$INlo.16b, $INlo.16b, $INlo.16b, #8
-+	eor	$INlo.16b, $INlo.16b, $Xl.16b	// inp ^= Xi
-+
-+.Lgmult_neon:
-+	// Split the input into $INlo and $INhi. (The upper halves are unused,
-+	// so it is okay to leave them alone.)
-+	ins	$INhi.d[0], $INlo.d[1]
-+___
-+&clmul64x64	($Xl, $Hlo, $INlo);		# H.lo·Xi.lo
-+$code .= <<___;
-+	eor	$INlo.8b, $INlo.8b, $INhi.8b	// Karatsuba pre-processing
-+___
-+&clmul64x64	($Xm, $Hhl, $INlo);		# (H.lo+H.hi)·(Xi.lo+Xi.hi)
-+&clmul64x64	($Xh, $Hhi, $INhi);		# H.hi·Xi.hi
-+$code .= <<___;
-+	ext	$t0.16b, $Xl.16b, $Xh.16b, #8
-+	eor	$Xm.16b, $Xm.16b, $Xl.16b	// Karatsuba post-processing
-+	eor	$Xm.16b, $Xm.16b, $Xh.16b
-+	eor	$Xm.16b, $Xm.16b, $t0.16b	// Xm overlaps Xh.lo and Xl.hi
-+	ins	$Xl.d[1], $Xm.d[0]		// Xh|Xl - 256-bit result
-+	// This is a no-op due to the ins instruction below.
-+	// ins	$Xh.d[0], $Xm.d[1]
-+
-+	// equivalent of reduction_avx from ghash-x86_64.pl
-+	shl	$t1.2d, $Xl.2d, #57		// 1st phase
-+	shl	$t2.2d, $Xl.2d, #62
-+	eor	$t2.16b, $t2.16b, $t1.16b	//
-+	shl	$t1.2d, $Xl.2d, #63
-+	eor	$t2.16b, $t2.16b, $t1.16b	//
-+	// Note Xm contains {Xl.d[1], Xh.d[0]}.
-+	eor	$t2.16b, $t2.16b, $Xm.16b
-+	ins	$Xl.d[1], $t2.d[0]		// Xl.d[1] ^= t2.d[0]
-+	ins	$Xh.d[0], $t2.d[1]		// Xh.d[0] ^= t2.d[1]
-+
-+	ushr	$t2.2d, $Xl.2d, #1		// 2nd phase
-+	eor	$Xh.16b, $Xh.16b,$Xl.16b
-+	eor	$Xl.16b, $Xl.16b,$t2.16b	//
-+	ushr	$t2.2d, $t2.2d, #6
-+	ushr	$Xl.2d, $Xl.2d, #1		//
-+	eor	$Xl.16b, $Xl.16b, $Xh.16b	//
-+	eor	$Xl.16b, $Xl.16b, $t2.16b	//
-+
-+	subs	$len, $len, #16
-+	bne	.Loop_neon
-+
-+	rev64	$Xl.16b, $Xl.16b		// byteswap Xi and write
-+	ext	$Xl.16b, $Xl.16b, $Xl.16b, #8
-+	st1	{$Xl.16b}, [$Xi]
-+
-+	ret
-+.size	GFp_gcm_ghash_neon,.-GFp_gcm_ghash_neon
-+
-+.section	.rodata
-+.align	4
-+.Lmasks:
-+.quad	0x0000ffffffffffff	// k48
-+.quad	0x00000000ffffffff	// k32
-+.quad	0x000000000000ffff	// k16
-+.quad	0x0000000000000000	// k0
-+.asciz  "GHASH for ARMv8, derived from ARMv4 version by <appro\@openssl.org>"
-+.align  2
-+___
-+
-+foreach (split("\n",$code)) {
-+	s/\`([^\`]*)\`/eval $1/geo;
-+
-+	print $_,"\n";
-+}
-+close STDOUT or die "error closing STDOUT"; # enforce flush
--- 
-Efraim Flashner   <efraim@flashner.co.il>   רנשלפ םירפא
-GPG key = A28B F40C 3E55 1372 662D  14F7 41AA E7DC CA3D 8351
-Confidentiality cannot be guaranteed on emails sent or received unencrypted
-
diff --git a/gnu/packages/patches/rust-ring-0.16-test-files.patch b/gnu/packages/patches/rust-ring-0.16-test-files.patch
deleted file mode 100644
index dbe5c0f4ee..0000000000
--- a/gnu/packages/patches/rust-ring-0.16-test-files.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-This file exists in the upstream repository at the commit which
-corresponds to the ring-0.16.20 release, but was excluded from the
-release tarball.
-
----
- tests/ed25519_verify_tests.txt | 34 ++++++++++++++++++++++++++++++++++
- 1 file changed, 34 insertions(+)
- create mode 100644 tests/ed25519_verify_tests.txt
-
-diff --git a/tests/ed25519_verify_tests.txt b/tests/ed25519_verify_tests.txt
-new file mode 100644
-index 0000000..74c94b3
---- /dev/null
-+++ b/tests/ed25519_verify_tests.txt
-@@ -0,0 +1,34 @@
-+# BoringSSL TEST(Ed25519Test Malleability)
-+
-+# Control; S is in range.
-+MESSAGE = 54657374
-+SIG = 7c38e026f29e14aabd059a0f2db8b0cd783040609a8be684db12f82a27774ab07a9155711ecfaf7f99f277bad0c6ae7e39d4eef676573336a5c51eb6f946b30d
-+PUB = 7d4d0e7f6153a69b6242b522abbee685fda4420f8834b108c3bdae369ef549fa
-+Result = P
-+
-+# Same as above, but with the order L added to S so it is out of range.
-+# BoringSSL commit 472ba2c2dd52d06a657a63b7fbf02732a6649d21
-+MESSAGE = 54657374
-+SIG = 7c38e026f29e14aabd059a0f2db8b0cd783040609a8be684db12f82a27774ab067654bce3832c2d76f8f6f5dafc08d9339d4eef676573336a5c51eb6f946b31d
-+PUB = 7d4d0e7f6153a69b6242b522abbee685fda4420f8834b108c3bdae369ef549fa
-+Result = F
-+
-+
-+# BoringSSL commit 3094902fcdc2db2cc832fa854b9a6a8be383926c
-+MESSAGE = 124e583f8b8eca58bb29c271b41d36986bbc45541f8e51f9cb0133eca447601e
-+SIG = dac119d6ca87fc59ae611c157048f4d4fc932a149dbe20ec6effd1436abf83ea05c7df0fef06147241259113909bc71bd3c53ba4464ffcad3c0968f2ffffff0f
-+PUB = 100fdf47fb94f1536a4f7c3fda27383fa03375a8f527c537e6f1703c47f94f86
-+Result = P
-+
-+# Control. Same key as above; same message and signature as below, except S is in range.
-+PUB = 100fdf47fb94f1536a4f7c3fda27383fa03375a8f527c537e6f1703c47f94f86
-+MESSAGE = 6a0bc2b0057cedfc0fa2e3f7f7d39279b30f454a69dfd1117c758d86b19d85e0
-+SIG = 0971f86d2c9c78582524a103cb9cf949522ae528f8054dc20107d999be673ff4f58ac9d20ec563133cabc6230b1db8625f8446639ede46ad4df4053000000000
-+Result = P
-+
-+# Same key as above, but S is out of range.
-+# BoringSSL commit 472ba2c2dd52d06a657a63b7fbf02732a6649d21
-+PUB = 100fdf47fb94f1536a4f7c3fda27383fa03375a8f527c537e6f1703c47f94f86
-+MESSAGE = 6a0bc2b0057cedfc0fa2e3f7f7d39279b30f454a69dfd1117c758d86b19d85e0
-+SIG = 0971f86d2c9c78582524a103cb9cf949522ae528f8054dc20107d999be673ff4e25ebf2f2928766b1248bec6e91697775f8446639ede46ad4df4053000000010
-+Result = F
--- 
-Efraim Flashner   <efraim@flashner.co.il>   רנשלפ םירפא
-GPG key = A28B F40C 3E55 1372 662D  14F7 41AA E7DC CA3D 8351
-Confidentiality cannot be guaranteed on emails sent or received unencrypted
-
diff --git a/gnu/packages/patches/rust-ring-0.17-ring-core.patch b/gnu/packages/patches/rust-ring-0.17-ring-core.patch
new file mode 100644
index 0000000000..faa3a8a191
--- /dev/null
+++ b/gnu/packages/patches/rust-ring-0.17-ring-core.patch
@@ -0,0 +1,496 @@
+These two files are needed to finish generating the files for windows
+and were generated using 'RING_PREGENERATE_ASM=1 cargo build
+--target-dir=target/pregenerate_asm'. Included here so we don't need to
+add cargo to the computed-source.
+
+---
+ .../ring_core_generated/prefix_symbols_asm.h  | 236 ++++++++++++++++++
+ .../prefix_symbols_nasm.inc                   | 236 ++++++++++++++++++
+ 2 files changed, 472 insertions(+)
+ create mode 100644 pregenerated/tmp/ring_core_generated/prefix_symbols_asm.h
+ create mode 100644 pregenerated/tmp/ring_core_generated/prefix_symbols_nasm.inc
+
+diff --git a/pregenerated/tmp/ring_core_generated/prefix_symbols_asm.h b/pregenerated/tmp/ring_core_generated/prefix_symbols_asm.h
+new file mode 100644
+index 000000000..1cd766400
+--- /dev/null
++++ b/pregenerated/tmp/ring_core_generated/prefix_symbols_asm.h
+@@ -0,0 +1,236 @@
++
++#ifndef ring_core_generated_PREFIX_SYMBOLS_ASM_H
++#define ring_core_generated_PREFIX_SYMBOLS_ASM_H
++
++#if defined(__APPLE__)
++#define _ecp_nistz256_point_double _p256_point_double
++#define _ecp_nistz256_point_add _p256_point_add
++#define _ecp_nistz256_point_add_affine _p256_point_add_affine
++#define _ecp_nistz256_ord_mul_mont _p256_scalar_mul_mont
++#define _ecp_nistz256_ord_sqr_mont _p256_scalar_sqr_rep_mont
++#define _ecp_nistz256_mul_mont _p256_mul_mont
++#define _ecp_nistz256_sqr_mont _p256_sqr_mont
++#define _CRYPTO_memcmp _ring_core_0_17_7_CRYPTO_memcmp
++#define _CRYPTO_poly1305_finish _ring_core_0_17_7_CRYPTO_poly1305_finish
++#define _CRYPTO_poly1305_finish_neon _ring_core_0_17_7_CRYPTO_poly1305_finish_neon
++#define _CRYPTO_poly1305_init _ring_core_0_17_7_CRYPTO_poly1305_init
++#define _CRYPTO_poly1305_init_neon _ring_core_0_17_7_CRYPTO_poly1305_init_neon
++#define _CRYPTO_poly1305_update _ring_core_0_17_7_CRYPTO_poly1305_update
++#define _CRYPTO_poly1305_update_neon _ring_core_0_17_7_CRYPTO_poly1305_update_neon
++#define _ChaCha20_ctr32 _ring_core_0_17_7_ChaCha20_ctr32
++#define _LIMBS_add_mod _ring_core_0_17_7_LIMBS_add_mod
++#define _LIMBS_are_even _ring_core_0_17_7_LIMBS_are_even
++#define _LIMBS_are_zero _ring_core_0_17_7_LIMBS_are_zero
++#define _LIMBS_equal _ring_core_0_17_7_LIMBS_equal
++#define _LIMBS_equal_limb _ring_core_0_17_7_LIMBS_equal_limb
++#define _LIMBS_less_than _ring_core_0_17_7_LIMBS_less_than
++#define _LIMBS_less_than_limb _ring_core_0_17_7_LIMBS_less_than_limb
++#define _LIMBS_reduce_once _ring_core_0_17_7_LIMBS_reduce_once
++#define _LIMBS_select_512_32 _ring_core_0_17_7_LIMBS_select_512_32
++#define _LIMBS_shl_mod _ring_core_0_17_7_LIMBS_shl_mod
++#define _LIMBS_sub_mod _ring_core_0_17_7_LIMBS_sub_mod
++#define _LIMBS_window5_split_window _ring_core_0_17_7_LIMBS_window5_split_window
++#define _LIMBS_window5_unsplit_window _ring_core_0_17_7_LIMBS_window5_unsplit_window
++#define _LIMB_shr _ring_core_0_17_7_LIMB_shr
++#define _OPENSSL_armcap_P _ring_core_0_17_7_OPENSSL_armcap_P
++#define _OPENSSL_cpuid_setup _ring_core_0_17_7_OPENSSL_cpuid_setup
++#define _OPENSSL_ia32cap_P _ring_core_0_17_7_OPENSSL_ia32cap_P
++#define _aes_hw_ctr32_encrypt_blocks _ring_core_0_17_7_aes_hw_ctr32_encrypt_blocks
++#define _aes_hw_encrypt _ring_core_0_17_7_aes_hw_encrypt
++#define _aes_hw_set_encrypt_key _ring_core_0_17_7_aes_hw_set_encrypt_key
++#define _aes_nohw_ctr32_encrypt_blocks _ring_core_0_17_7_aes_nohw_ctr32_encrypt_blocks
++#define _aes_nohw_encrypt _ring_core_0_17_7_aes_nohw_encrypt
++#define _aes_nohw_set_encrypt_key _ring_core_0_17_7_aes_nohw_set_encrypt_key
++#define _aesni_gcm_decrypt _ring_core_0_17_7_aesni_gcm_decrypt
++#define _aesni_gcm_encrypt _ring_core_0_17_7_aesni_gcm_encrypt
++#define _bn_from_montgomery_in_place _ring_core_0_17_7_bn_from_montgomery_in_place
++#define _bn_gather5 _ring_core_0_17_7_bn_gather5
++#define _bn_mul_mont _ring_core_0_17_7_bn_mul_mont
++#define _bn_mul_mont_gather5 _ring_core_0_17_7_bn_mul_mont_gather5
++#define _bn_neg_inv_mod_r_u64 _ring_core_0_17_7_bn_neg_inv_mod_r_u64
++#define _bn_power5 _ring_core_0_17_7_bn_power5
++#define _bn_scatter5 _ring_core_0_17_7_bn_scatter5
++#define _bn_sqr8x_internal _ring_core_0_17_7_bn_sqr8x_internal
++#define _bn_sqrx8x_internal _ring_core_0_17_7_bn_sqrx8x_internal
++#define _bsaes_ctr32_encrypt_blocks _ring_core_0_17_7_bsaes_ctr32_encrypt_blocks
++#define _bssl_constant_time_test_conditional_memcpy _ring_core_0_17_7_bssl_constant_time_test_conditional_memcpy
++#define _bssl_constant_time_test_conditional_memxor _ring_core_0_17_7_bssl_constant_time_test_conditional_memxor
++#define _bssl_constant_time_test_main _ring_core_0_17_7_bssl_constant_time_test_main
++#define _chacha20_poly1305_open _ring_core_0_17_7_chacha20_poly1305_open
++#define _chacha20_poly1305_seal _ring_core_0_17_7_chacha20_poly1305_seal
++#define _fiat_curve25519_adx_mul _ring_core_0_17_7_fiat_curve25519_adx_mul
++#define _fiat_curve25519_adx_square _ring_core_0_17_7_fiat_curve25519_adx_square
++#define _gcm_ghash_avx _ring_core_0_17_7_gcm_ghash_avx
++#define _gcm_ghash_clmul _ring_core_0_17_7_gcm_ghash_clmul
++#define _gcm_ghash_neon _ring_core_0_17_7_gcm_ghash_neon
++#define _gcm_gmult_clmul _ring_core_0_17_7_gcm_gmult_clmul
++#define _gcm_gmult_neon _ring_core_0_17_7_gcm_gmult_neon
++#define _gcm_init_avx _ring_core_0_17_7_gcm_init_avx
++#define _gcm_init_clmul _ring_core_0_17_7_gcm_init_clmul
++#define _gcm_init_neon _ring_core_0_17_7_gcm_init_neon
++#define _k25519Precomp _ring_core_0_17_7_k25519Precomp
++#define _limbs_mul_add_limb _ring_core_0_17_7_limbs_mul_add_limb
++#define _little_endian_bytes_from_scalar _ring_core_0_17_7_little_endian_bytes_from_scalar
++#define _ecp_nistz256_neg _ring_core_0_17_7_ecp_nistz256_neg
++#define _ecp_nistz256_select_w5 _ring_core_0_17_7_ecp_nistz256_select_w5
++#define _ecp_nistz256_select_w7 _ring_core_0_17_7_ecp_nistz256_select_w7
++#define _p256_mul_mont _ring_core_0_17_7_p256_mul_mont
++#define _p256_point_add _ring_core_0_17_7_p256_point_add
++#define _p256_point_add_affine _ring_core_0_17_7_p256_point_add_affine
++#define _p256_point_double _ring_core_0_17_7_p256_point_double
++#define _p256_point_mul _ring_core_0_17_7_p256_point_mul
++#define _p256_point_mul_base _ring_core_0_17_7_p256_point_mul_base
++#define _p256_point_mul_base_vartime _ring_core_0_17_7_p256_point_mul_base_vartime
++#define _p256_scalar_mul_mont _ring_core_0_17_7_p256_scalar_mul_mont
++#define _p256_scalar_sqr_rep_mont _ring_core_0_17_7_p256_scalar_sqr_rep_mont
++#define _p256_sqr_mont _ring_core_0_17_7_p256_sqr_mont
++#define _p384_elem_div_by_2 _ring_core_0_17_7_p384_elem_div_by_2
++#define _p384_elem_mul_mont _ring_core_0_17_7_p384_elem_mul_mont
++#define _p384_elem_neg _ring_core_0_17_7_p384_elem_neg
++#define _p384_elem_sub _ring_core_0_17_7_p384_elem_sub
++#define _p384_point_add _ring_core_0_17_7_p384_point_add
++#define _p384_point_double _ring_core_0_17_7_p384_point_double
++#define _p384_point_mul _ring_core_0_17_7_p384_point_mul
++#define _p384_scalar_mul_mont _ring_core_0_17_7_p384_scalar_mul_mont
++#define _openssl_poly1305_neon2_addmulmod _ring_core_0_17_7_openssl_poly1305_neon2_addmulmod
++#define _openssl_poly1305_neon2_blocks _ring_core_0_17_7_openssl_poly1305_neon2_blocks
++#define _sha256_block_data_order _ring_core_0_17_7_sha256_block_data_order
++#define _sha512_block_data_order _ring_core_0_17_7_sha512_block_data_order
++#define _vpaes_ctr32_encrypt_blocks _ring_core_0_17_7_vpaes_ctr32_encrypt_blocks
++#define _vpaes_encrypt _ring_core_0_17_7_vpaes_encrypt
++#define _vpaes_encrypt_key_to_bsaes _ring_core_0_17_7_vpaes_encrypt_key_to_bsaes
++#define _vpaes_set_encrypt_key _ring_core_0_17_7_vpaes_set_encrypt_key
++#define _x25519_NEON _ring_core_0_17_7_x25519_NEON
++#define _x25519_fe_invert _ring_core_0_17_7_x25519_fe_invert
++#define _x25519_fe_isnegative _ring_core_0_17_7_x25519_fe_isnegative
++#define _x25519_fe_mul_ttt _ring_core_0_17_7_x25519_fe_mul_ttt
++#define _x25519_fe_neg _ring_core_0_17_7_x25519_fe_neg
++#define _x25519_fe_tobytes _ring_core_0_17_7_x25519_fe_tobytes
++#define _x25519_ge_double_scalarmult_vartime _ring_core_0_17_7_x25519_ge_double_scalarmult_vartime
++#define _x25519_ge_frombytes_vartime _ring_core_0_17_7_x25519_ge_frombytes_vartime
++#define _x25519_ge_scalarmult_base _ring_core_0_17_7_x25519_ge_scalarmult_base
++#define _x25519_ge_scalarmult_base_adx _ring_core_0_17_7_x25519_ge_scalarmult_base_adx
++#define _x25519_public_from_private_generic_masked _ring_core_0_17_7_x25519_public_from_private_generic_masked
++#define _x25519_sc_mask _ring_core_0_17_7_x25519_sc_mask
++#define _x25519_sc_muladd _ring_core_0_17_7_x25519_sc_muladd
++#define _x25519_sc_reduce _ring_core_0_17_7_x25519_sc_reduce
++#define _x25519_scalar_mult_adx _ring_core_0_17_7_x25519_scalar_mult_adx
++#define _x25519_scalar_mult_generic_masked _ring_core_0_17_7_x25519_scalar_mult_generic_masked
++
++#else
++#define ecp_nistz256_point_double p256_point_double
++#define ecp_nistz256_point_add p256_point_add
++#define ecp_nistz256_point_add_affine p256_point_add_affine
++#define ecp_nistz256_ord_mul_mont p256_scalar_mul_mont
++#define ecp_nistz256_ord_sqr_mont p256_scalar_sqr_rep_mont
++#define ecp_nistz256_mul_mont p256_mul_mont
++#define ecp_nistz256_sqr_mont p256_sqr_mont
++#define CRYPTO_memcmp ring_core_0_17_7_CRYPTO_memcmp
++#define CRYPTO_poly1305_finish ring_core_0_17_7_CRYPTO_poly1305_finish
++#define CRYPTO_poly1305_finish_neon ring_core_0_17_7_CRYPTO_poly1305_finish_neon
++#define CRYPTO_poly1305_init ring_core_0_17_7_CRYPTO_poly1305_init
++#define CRYPTO_poly1305_init_neon ring_core_0_17_7_CRYPTO_poly1305_init_neon
++#define CRYPTO_poly1305_update ring_core_0_17_7_CRYPTO_poly1305_update
++#define CRYPTO_poly1305_update_neon ring_core_0_17_7_CRYPTO_poly1305_update_neon
++#define ChaCha20_ctr32 ring_core_0_17_7_ChaCha20_ctr32
++#define LIMBS_add_mod ring_core_0_17_7_LIMBS_add_mod
++#define LIMBS_are_even ring_core_0_17_7_LIMBS_are_even
++#define LIMBS_are_zero ring_core_0_17_7_LIMBS_are_zero
++#define LIMBS_equal ring_core_0_17_7_LIMBS_equal
++#define LIMBS_equal_limb ring_core_0_17_7_LIMBS_equal_limb
++#define LIMBS_less_than ring_core_0_17_7_LIMBS_less_than
++#define LIMBS_less_than_limb ring_core_0_17_7_LIMBS_less_than_limb
++#define LIMBS_reduce_once ring_core_0_17_7_LIMBS_reduce_once
++#define LIMBS_select_512_32 ring_core_0_17_7_LIMBS_select_512_32
++#define LIMBS_shl_mod ring_core_0_17_7_LIMBS_shl_mod
++#define LIMBS_sub_mod ring_core_0_17_7_LIMBS_sub_mod
++#define LIMBS_window5_split_window ring_core_0_17_7_LIMBS_window5_split_window
++#define LIMBS_window5_unsplit_window ring_core_0_17_7_LIMBS_window5_unsplit_window
++#define LIMB_shr ring_core_0_17_7_LIMB_shr
++#define OPENSSL_armcap_P ring_core_0_17_7_OPENSSL_armcap_P
++#define OPENSSL_cpuid_setup ring_core_0_17_7_OPENSSL_cpuid_setup
++#define OPENSSL_ia32cap_P ring_core_0_17_7_OPENSSL_ia32cap_P
++#define aes_hw_ctr32_encrypt_blocks ring_core_0_17_7_aes_hw_ctr32_encrypt_blocks
++#define aes_hw_encrypt ring_core_0_17_7_aes_hw_encrypt
++#define aes_hw_set_encrypt_key ring_core_0_17_7_aes_hw_set_encrypt_key
++#define aes_nohw_ctr32_encrypt_blocks ring_core_0_17_7_aes_nohw_ctr32_encrypt_blocks
++#define aes_nohw_encrypt ring_core_0_17_7_aes_nohw_encrypt
++#define aes_nohw_set_encrypt_key ring_core_0_17_7_aes_nohw_set_encrypt_key
++#define aesni_gcm_decrypt ring_core_0_17_7_aesni_gcm_decrypt
++#define aesni_gcm_encrypt ring_core_0_17_7_aesni_gcm_encrypt
++#define bn_from_montgomery_in_place ring_core_0_17_7_bn_from_montgomery_in_place
++#define bn_gather5 ring_core_0_17_7_bn_gather5
++#define bn_mul_mont ring_core_0_17_7_bn_mul_mont
++#define bn_mul_mont_gather5 ring_core_0_17_7_bn_mul_mont_gather5
++#define bn_neg_inv_mod_r_u64 ring_core_0_17_7_bn_neg_inv_mod_r_u64
++#define bn_power5 ring_core_0_17_7_bn_power5
++#define bn_scatter5 ring_core_0_17_7_bn_scatter5
++#define bn_sqr8x_internal ring_core_0_17_7_bn_sqr8x_internal
++#define bn_sqrx8x_internal ring_core_0_17_7_bn_sqrx8x_internal
++#define bsaes_ctr32_encrypt_blocks ring_core_0_17_7_bsaes_ctr32_encrypt_blocks
++#define bssl_constant_time_test_conditional_memcpy ring_core_0_17_7_bssl_constant_time_test_conditional_memcpy
++#define bssl_constant_time_test_conditional_memxor ring_core_0_17_7_bssl_constant_time_test_conditional_memxor
++#define bssl_constant_time_test_main ring_core_0_17_7_bssl_constant_time_test_main
++#define chacha20_poly1305_open ring_core_0_17_7_chacha20_poly1305_open
++#define chacha20_poly1305_seal ring_core_0_17_7_chacha20_poly1305_seal
++#define fiat_curve25519_adx_mul ring_core_0_17_7_fiat_curve25519_adx_mul
++#define fiat_curve25519_adx_square ring_core_0_17_7_fiat_curve25519_adx_square
++#define gcm_ghash_avx ring_core_0_17_7_gcm_ghash_avx
++#define gcm_ghash_clmul ring_core_0_17_7_gcm_ghash_clmul
++#define gcm_ghash_neon ring_core_0_17_7_gcm_ghash_neon
++#define gcm_gmult_clmul ring_core_0_17_7_gcm_gmult_clmul
++#define gcm_gmult_neon ring_core_0_17_7_gcm_gmult_neon
++#define gcm_init_avx ring_core_0_17_7_gcm_init_avx
++#define gcm_init_clmul ring_core_0_17_7_gcm_init_clmul
++#define gcm_init_neon ring_core_0_17_7_gcm_init_neon
++#define k25519Precomp ring_core_0_17_7_k25519Precomp
++#define limbs_mul_add_limb ring_core_0_17_7_limbs_mul_add_limb
++#define little_endian_bytes_from_scalar ring_core_0_17_7_little_endian_bytes_from_scalar
++#define ecp_nistz256_neg ring_core_0_17_7_ecp_nistz256_neg
++#define ecp_nistz256_select_w5 ring_core_0_17_7_ecp_nistz256_select_w5
++#define ecp_nistz256_select_w7 ring_core_0_17_7_ecp_nistz256_select_w7
++#define p256_mul_mont ring_core_0_17_7_p256_mul_mont
++#define p256_point_add ring_core_0_17_7_p256_point_add
++#define p256_point_add_affine ring_core_0_17_7_p256_point_add_affine
++#define p256_point_double ring_core_0_17_7_p256_point_double
++#define p256_point_mul ring_core_0_17_7_p256_point_mul
++#define p256_point_mul_base ring_core_0_17_7_p256_point_mul_base
++#define p256_point_mul_base_vartime ring_core_0_17_7_p256_point_mul_base_vartime
++#define p256_scalar_mul_mont ring_core_0_17_7_p256_scalar_mul_mont
++#define p256_scalar_sqr_rep_mont ring_core_0_17_7_p256_scalar_sqr_rep_mont
++#define p256_sqr_mont ring_core_0_17_7_p256_sqr_mont
++#define p384_elem_div_by_2 ring_core_0_17_7_p384_elem_div_by_2
++#define p384_elem_mul_mont ring_core_0_17_7_p384_elem_mul_mont
++#define p384_elem_neg ring_core_0_17_7_p384_elem_neg
++#define p384_elem_sub ring_core_0_17_7_p384_elem_sub
++#define p384_point_add ring_core_0_17_7_p384_point_add
++#define p384_point_double ring_core_0_17_7_p384_point_double
++#define p384_point_mul ring_core_0_17_7_p384_point_mul
++#define p384_scalar_mul_mont ring_core_0_17_7_p384_scalar_mul_mont
++#define openssl_poly1305_neon2_addmulmod ring_core_0_17_7_openssl_poly1305_neon2_addmulmod
++#define openssl_poly1305_neon2_blocks ring_core_0_17_7_openssl_poly1305_neon2_blocks
++#define sha256_block_data_order ring_core_0_17_7_sha256_block_data_order
++#define sha512_block_data_order ring_core_0_17_7_sha512_block_data_order
++#define vpaes_ctr32_encrypt_blocks ring_core_0_17_7_vpaes_ctr32_encrypt_blocks
++#define vpaes_encrypt ring_core_0_17_7_vpaes_encrypt
++#define vpaes_encrypt_key_to_bsaes ring_core_0_17_7_vpaes_encrypt_key_to_bsaes
++#define vpaes_set_encrypt_key ring_core_0_17_7_vpaes_set_encrypt_key
++#define x25519_NEON ring_core_0_17_7_x25519_NEON
++#define x25519_fe_invert ring_core_0_17_7_x25519_fe_invert
++#define x25519_fe_isnegative ring_core_0_17_7_x25519_fe_isnegative
++#define x25519_fe_mul_ttt ring_core_0_17_7_x25519_fe_mul_ttt
++#define x25519_fe_neg ring_core_0_17_7_x25519_fe_neg
++#define x25519_fe_tobytes ring_core_0_17_7_x25519_fe_tobytes
++#define x25519_ge_double_scalarmult_vartime ring_core_0_17_7_x25519_ge_double_scalarmult_vartime
++#define x25519_ge_frombytes_vartime ring_core_0_17_7_x25519_ge_frombytes_vartime
++#define x25519_ge_scalarmult_base ring_core_0_17_7_x25519_ge_scalarmult_base
++#define x25519_ge_scalarmult_base_adx ring_core_0_17_7_x25519_ge_scalarmult_base_adx
++#define x25519_public_from_private_generic_masked ring_core_0_17_7_x25519_public_from_private_generic_masked
++#define x25519_sc_mask ring_core_0_17_7_x25519_sc_mask
++#define x25519_sc_muladd ring_core_0_17_7_x25519_sc_muladd
++#define x25519_sc_reduce ring_core_0_17_7_x25519_sc_reduce
++#define x25519_scalar_mult_adx ring_core_0_17_7_x25519_scalar_mult_adx
++#define x25519_scalar_mult_generic_masked ring_core_0_17_7_x25519_scalar_mult_generic_masked
++
++#endif
++#endif
+diff --git a/pregenerated/tmp/ring_core_generated/prefix_symbols_nasm.inc b/pregenerated/tmp/ring_core_generated/prefix_symbols_nasm.inc
+new file mode 100644
+index 000000000..65ce0cfaa
+--- /dev/null
++++ b/pregenerated/tmp/ring_core_generated/prefix_symbols_nasm.inc
+@@ -0,0 +1,236 @@
++
++%ifndef ring_core_generated_PREFIX_SYMBOLS_NASM_INC
++%define ring_core_generated_PREFIX_SYMBOLS_NASM_INC
++
++%ifidn __OUTPUT_FORMAT__,win32
++%define _ecp_nistz256_point_double _p256_point_double
++%define _ecp_nistz256_point_add _p256_point_add
++%define _ecp_nistz256_point_add_affine _p256_point_add_affine
++%define _ecp_nistz256_ord_mul_mont _p256_scalar_mul_mont
++%define _ecp_nistz256_ord_sqr_mont _p256_scalar_sqr_rep_mont
++%define _ecp_nistz256_mul_mont _p256_mul_mont
++%define _ecp_nistz256_sqr_mont _p256_sqr_mont
++%define _CRYPTO_memcmp _ring_core_0_17_7_CRYPTO_memcmp
++%define _CRYPTO_poly1305_finish _ring_core_0_17_7_CRYPTO_poly1305_finish
++%define _CRYPTO_poly1305_finish_neon _ring_core_0_17_7_CRYPTO_poly1305_finish_neon
++%define _CRYPTO_poly1305_init _ring_core_0_17_7_CRYPTO_poly1305_init
++%define _CRYPTO_poly1305_init_neon _ring_core_0_17_7_CRYPTO_poly1305_init_neon
++%define _CRYPTO_poly1305_update _ring_core_0_17_7_CRYPTO_poly1305_update
++%define _CRYPTO_poly1305_update_neon _ring_core_0_17_7_CRYPTO_poly1305_update_neon
++%define _ChaCha20_ctr32 _ring_core_0_17_7_ChaCha20_ctr32
++%define _LIMBS_add_mod _ring_core_0_17_7_LIMBS_add_mod
++%define _LIMBS_are_even _ring_core_0_17_7_LIMBS_are_even
++%define _LIMBS_are_zero _ring_core_0_17_7_LIMBS_are_zero
++%define _LIMBS_equal _ring_core_0_17_7_LIMBS_equal
++%define _LIMBS_equal_limb _ring_core_0_17_7_LIMBS_equal_limb
++%define _LIMBS_less_than _ring_core_0_17_7_LIMBS_less_than
++%define _LIMBS_less_than_limb _ring_core_0_17_7_LIMBS_less_than_limb
++%define _LIMBS_reduce_once _ring_core_0_17_7_LIMBS_reduce_once
++%define _LIMBS_select_512_32 _ring_core_0_17_7_LIMBS_select_512_32
++%define _LIMBS_shl_mod _ring_core_0_17_7_LIMBS_shl_mod
++%define _LIMBS_sub_mod _ring_core_0_17_7_LIMBS_sub_mod
++%define _LIMBS_window5_split_window _ring_core_0_17_7_LIMBS_window5_split_window
++%define _LIMBS_window5_unsplit_window _ring_core_0_17_7_LIMBS_window5_unsplit_window
++%define _LIMB_shr _ring_core_0_17_7_LIMB_shr
++%define _OPENSSL_armcap_P _ring_core_0_17_7_OPENSSL_armcap_P
++%define _OPENSSL_cpuid_setup _ring_core_0_17_7_OPENSSL_cpuid_setup
++%define _OPENSSL_ia32cap_P _ring_core_0_17_7_OPENSSL_ia32cap_P
++%define _aes_hw_ctr32_encrypt_blocks _ring_core_0_17_7_aes_hw_ctr32_encrypt_blocks
++%define _aes_hw_encrypt _ring_core_0_17_7_aes_hw_encrypt
++%define _aes_hw_set_encrypt_key _ring_core_0_17_7_aes_hw_set_encrypt_key
++%define _aes_nohw_ctr32_encrypt_blocks _ring_core_0_17_7_aes_nohw_ctr32_encrypt_blocks
++%define _aes_nohw_encrypt _ring_core_0_17_7_aes_nohw_encrypt
++%define _aes_nohw_set_encrypt_key _ring_core_0_17_7_aes_nohw_set_encrypt_key
++%define _aesni_gcm_decrypt _ring_core_0_17_7_aesni_gcm_decrypt
++%define _aesni_gcm_encrypt _ring_core_0_17_7_aesni_gcm_encrypt
++%define _bn_from_montgomery_in_place _ring_core_0_17_7_bn_from_montgomery_in_place
++%define _bn_gather5 _ring_core_0_17_7_bn_gather5
++%define _bn_mul_mont _ring_core_0_17_7_bn_mul_mont
++%define _bn_mul_mont_gather5 _ring_core_0_17_7_bn_mul_mont_gather5
++%define _bn_neg_inv_mod_r_u64 _ring_core_0_17_7_bn_neg_inv_mod_r_u64
++%define _bn_power5 _ring_core_0_17_7_bn_power5
++%define _bn_scatter5 _ring_core_0_17_7_bn_scatter5
++%define _bn_sqr8x_internal _ring_core_0_17_7_bn_sqr8x_internal
++%define _bn_sqrx8x_internal _ring_core_0_17_7_bn_sqrx8x_internal
++%define _bsaes_ctr32_encrypt_blocks _ring_core_0_17_7_bsaes_ctr32_encrypt_blocks
++%define _bssl_constant_time_test_conditional_memcpy _ring_core_0_17_7_bssl_constant_time_test_conditional_memcpy
++%define _bssl_constant_time_test_conditional_memxor _ring_core_0_17_7_bssl_constant_time_test_conditional_memxor
++%define _bssl_constant_time_test_main _ring_core_0_17_7_bssl_constant_time_test_main
++%define _chacha20_poly1305_open _ring_core_0_17_7_chacha20_poly1305_open
++%define _chacha20_poly1305_seal _ring_core_0_17_7_chacha20_poly1305_seal
++%define _fiat_curve25519_adx_mul _ring_core_0_17_7_fiat_curve25519_adx_mul
++%define _fiat_curve25519_adx_square _ring_core_0_17_7_fiat_curve25519_adx_square
++%define _gcm_ghash_avx _ring_core_0_17_7_gcm_ghash_avx
++%define _gcm_ghash_clmul _ring_core_0_17_7_gcm_ghash_clmul
++%define _gcm_ghash_neon _ring_core_0_17_7_gcm_ghash_neon
++%define _gcm_gmult_clmul _ring_core_0_17_7_gcm_gmult_clmul
++%define _gcm_gmult_neon _ring_core_0_17_7_gcm_gmult_neon
++%define _gcm_init_avx _ring_core_0_17_7_gcm_init_avx
++%define _gcm_init_clmul _ring_core_0_17_7_gcm_init_clmul
++%define _gcm_init_neon _ring_core_0_17_7_gcm_init_neon
++%define _k25519Precomp _ring_core_0_17_7_k25519Precomp
++%define _limbs_mul_add_limb _ring_core_0_17_7_limbs_mul_add_limb
++%define _little_endian_bytes_from_scalar _ring_core_0_17_7_little_endian_bytes_from_scalar
++%define _ecp_nistz256_neg _ring_core_0_17_7_ecp_nistz256_neg
++%define _ecp_nistz256_select_w5 _ring_core_0_17_7_ecp_nistz256_select_w5
++%define _ecp_nistz256_select_w7 _ring_core_0_17_7_ecp_nistz256_select_w7
++%define _p256_mul_mont _ring_core_0_17_7_p256_mul_mont
++%define _p256_point_add _ring_core_0_17_7_p256_point_add
++%define _p256_point_add_affine _ring_core_0_17_7_p256_point_add_affine
++%define _p256_point_double _ring_core_0_17_7_p256_point_double
++%define _p256_point_mul _ring_core_0_17_7_p256_point_mul
++%define _p256_point_mul_base _ring_core_0_17_7_p256_point_mul_base
++%define _p256_point_mul_base_vartime _ring_core_0_17_7_p256_point_mul_base_vartime
++%define _p256_scalar_mul_mont _ring_core_0_17_7_p256_scalar_mul_mont
++%define _p256_scalar_sqr_rep_mont _ring_core_0_17_7_p256_scalar_sqr_rep_mont
++%define _p256_sqr_mont _ring_core_0_17_7_p256_sqr_mont
++%define _p384_elem_div_by_2 _ring_core_0_17_7_p384_elem_div_by_2
++%define _p384_elem_mul_mont _ring_core_0_17_7_p384_elem_mul_mont
++%define _p384_elem_neg _ring_core_0_17_7_p384_elem_neg
++%define _p384_elem_sub _ring_core_0_17_7_p384_elem_sub
++%define _p384_point_add _ring_core_0_17_7_p384_point_add
++%define _p384_point_double _ring_core_0_17_7_p384_point_double
++%define _p384_point_mul _ring_core_0_17_7_p384_point_mul
++%define _p384_scalar_mul_mont _ring_core_0_17_7_p384_scalar_mul_mont
++%define _openssl_poly1305_neon2_addmulmod _ring_core_0_17_7_openssl_poly1305_neon2_addmulmod
++%define _openssl_poly1305_neon2_blocks _ring_core_0_17_7_openssl_poly1305_neon2_blocks
++%define _sha256_block_data_order _ring_core_0_17_7_sha256_block_data_order
++%define _sha512_block_data_order _ring_core_0_17_7_sha512_block_data_order
++%define _vpaes_ctr32_encrypt_blocks _ring_core_0_17_7_vpaes_ctr32_encrypt_blocks
++%define _vpaes_encrypt _ring_core_0_17_7_vpaes_encrypt
++%define _vpaes_encrypt_key_to_bsaes _ring_core_0_17_7_vpaes_encrypt_key_to_bsaes
++%define _vpaes_set_encrypt_key _ring_core_0_17_7_vpaes_set_encrypt_key
++%define _x25519_NEON _ring_core_0_17_7_x25519_NEON
++%define _x25519_fe_invert _ring_core_0_17_7_x25519_fe_invert
++%define _x25519_fe_isnegative _ring_core_0_17_7_x25519_fe_isnegative
++%define _x25519_fe_mul_ttt _ring_core_0_17_7_x25519_fe_mul_ttt
++%define _x25519_fe_neg _ring_core_0_17_7_x25519_fe_neg
++%define _x25519_fe_tobytes _ring_core_0_17_7_x25519_fe_tobytes
++%define _x25519_ge_double_scalarmult_vartime _ring_core_0_17_7_x25519_ge_double_scalarmult_vartime
++%define _x25519_ge_frombytes_vartime _ring_core_0_17_7_x25519_ge_frombytes_vartime
++%define _x25519_ge_scalarmult_base _ring_core_0_17_7_x25519_ge_scalarmult_base
++%define _x25519_ge_scalarmult_base_adx _ring_core_0_17_7_x25519_ge_scalarmult_base_adx
++%define _x25519_public_from_private_generic_masked _ring_core_0_17_7_x25519_public_from_private_generic_masked
++%define _x25519_sc_mask _ring_core_0_17_7_x25519_sc_mask
++%define _x25519_sc_muladd _ring_core_0_17_7_x25519_sc_muladd
++%define _x25519_sc_reduce _ring_core_0_17_7_x25519_sc_reduce
++%define _x25519_scalar_mult_adx _ring_core_0_17_7_x25519_scalar_mult_adx
++%define _x25519_scalar_mult_generic_masked _ring_core_0_17_7_x25519_scalar_mult_generic_masked
++
++%else
++%define ecp_nistz256_point_double p256_point_double
++%define ecp_nistz256_point_add p256_point_add
++%define ecp_nistz256_point_add_affine p256_point_add_affine
++%define ecp_nistz256_ord_mul_mont p256_scalar_mul_mont
++%define ecp_nistz256_ord_sqr_mont p256_scalar_sqr_rep_mont
++%define ecp_nistz256_mul_mont p256_mul_mont
++%define ecp_nistz256_sqr_mont p256_sqr_mont
++%define CRYPTO_memcmp ring_core_0_17_7_CRYPTO_memcmp
++%define CRYPTO_poly1305_finish ring_core_0_17_7_CRYPTO_poly1305_finish
++%define CRYPTO_poly1305_finish_neon ring_core_0_17_7_CRYPTO_poly1305_finish_neon
++%define CRYPTO_poly1305_init ring_core_0_17_7_CRYPTO_poly1305_init
++%define CRYPTO_poly1305_init_neon ring_core_0_17_7_CRYPTO_poly1305_init_neon
++%define CRYPTO_poly1305_update ring_core_0_17_7_CRYPTO_poly1305_update
++%define CRYPTO_poly1305_update_neon ring_core_0_17_7_CRYPTO_poly1305_update_neon
++%define ChaCha20_ctr32 ring_core_0_17_7_ChaCha20_ctr32
++%define LIMBS_add_mod ring_core_0_17_7_LIMBS_add_mod
++%define LIMBS_are_even ring_core_0_17_7_LIMBS_are_even
++%define LIMBS_are_zero ring_core_0_17_7_LIMBS_are_zero
++%define LIMBS_equal ring_core_0_17_7_LIMBS_equal
++%define LIMBS_equal_limb ring_core_0_17_7_LIMBS_equal_limb
++%define LIMBS_less_than ring_core_0_17_7_LIMBS_less_than
++%define LIMBS_less_than_limb ring_core_0_17_7_LIMBS_less_than_limb
++%define LIMBS_reduce_once ring_core_0_17_7_LIMBS_reduce_once
++%define LIMBS_select_512_32 ring_core_0_17_7_LIMBS_select_512_32
++%define LIMBS_shl_mod ring_core_0_17_7_LIMBS_shl_mod
++%define LIMBS_sub_mod ring_core_0_17_7_LIMBS_sub_mod
++%define LIMBS_window5_split_window ring_core_0_17_7_LIMBS_window5_split_window
++%define LIMBS_window5_unsplit_window ring_core_0_17_7_LIMBS_window5_unsplit_window
++%define LIMB_shr ring_core_0_17_7_LIMB_shr
++%define OPENSSL_armcap_P ring_core_0_17_7_OPENSSL_armcap_P
++%define OPENSSL_cpuid_setup ring_core_0_17_7_OPENSSL_cpuid_setup
++%define OPENSSL_ia32cap_P ring_core_0_17_7_OPENSSL_ia32cap_P
++%define aes_hw_ctr32_encrypt_blocks ring_core_0_17_7_aes_hw_ctr32_encrypt_blocks
++%define aes_hw_encrypt ring_core_0_17_7_aes_hw_encrypt
++%define aes_hw_set_encrypt_key ring_core_0_17_7_aes_hw_set_encrypt_key
++%define aes_nohw_ctr32_encrypt_blocks ring_core_0_17_7_aes_nohw_ctr32_encrypt_blocks
++%define aes_nohw_encrypt ring_core_0_17_7_aes_nohw_encrypt
++%define aes_nohw_set_encrypt_key ring_core_0_17_7_aes_nohw_set_encrypt_key
++%define aesni_gcm_decrypt ring_core_0_17_7_aesni_gcm_decrypt
++%define aesni_gcm_encrypt ring_core_0_17_7_aesni_gcm_encrypt
++%define bn_from_montgomery_in_place ring_core_0_17_7_bn_from_montgomery_in_place
++%define bn_gather5 ring_core_0_17_7_bn_gather5
++%define bn_mul_mont ring_core_0_17_7_bn_mul_mont
++%define bn_mul_mont_gather5 ring_core_0_17_7_bn_mul_mont_gather5
++%define bn_neg_inv_mod_r_u64 ring_core_0_17_7_bn_neg_inv_mod_r_u64
++%define bn_power5 ring_core_0_17_7_bn_power5
++%define bn_scatter5 ring_core_0_17_7_bn_scatter5
++%define bn_sqr8x_internal ring_core_0_17_7_bn_sqr8x_internal
++%define bn_sqrx8x_internal ring_core_0_17_7_bn_sqrx8x_internal
++%define bsaes_ctr32_encrypt_blocks ring_core_0_17_7_bsaes_ctr32_encrypt_blocks
++%define bssl_constant_time_test_conditional_memcpy ring_core_0_17_7_bssl_constant_time_test_conditional_memcpy
++%define bssl_constant_time_test_conditional_memxor ring_core_0_17_7_bssl_constant_time_test_conditional_memxor
++%define bssl_constant_time_test_main ring_core_0_17_7_bssl_constant_time_test_main
++%define chacha20_poly1305_open ring_core_0_17_7_chacha20_poly1305_open
++%define chacha20_poly1305_seal ring_core_0_17_7_chacha20_poly1305_seal
++%define fiat_curve25519_adx_mul ring_core_0_17_7_fiat_curve25519_adx_mul
++%define fiat_curve25519_adx_square ring_core_0_17_7_fiat_curve25519_adx_square
++%define gcm_ghash_avx ring_core_0_17_7_gcm_ghash_avx
++%define gcm_ghash_clmul ring_core_0_17_7_gcm_ghash_clmul
++%define gcm_ghash_neon ring_core_0_17_7_gcm_ghash_neon
++%define gcm_gmult_clmul ring_core_0_17_7_gcm_gmult_clmul
++%define gcm_gmult_neon ring_core_0_17_7_gcm_gmult_neon
++%define gcm_init_avx ring_core_0_17_7_gcm_init_avx
++%define gcm_init_clmul ring_core_0_17_7_gcm_init_clmul
++%define gcm_init_neon ring_core_0_17_7_gcm_init_neon
++%define k25519Precomp ring_core_0_17_7_k25519Precomp
++%define limbs_mul_add_limb ring_core_0_17_7_limbs_mul_add_limb
++%define little_endian_bytes_from_scalar ring_core_0_17_7_little_endian_bytes_from_scalar
++%define ecp_nistz256_neg ring_core_0_17_7_ecp_nistz256_neg
++%define ecp_nistz256_select_w5 ring_core_0_17_7_ecp_nistz256_select_w5
++%define ecp_nistz256_select_w7 ring_core_0_17_7_ecp_nistz256_select_w7
++%define p256_mul_mont ring_core_0_17_7_p256_mul_mont
++%define p256_point_add ring_core_0_17_7_p256_point_add
++%define p256_point_add_affine ring_core_0_17_7_p256_point_add_affine
++%define p256_point_double ring_core_0_17_7_p256_point_double
++%define p256_point_mul ring_core_0_17_7_p256_point_mul
++%define p256_point_mul_base ring_core_0_17_7_p256_point_mul_base
++%define p256_point_mul_base_vartime ring_core_0_17_7_p256_point_mul_base_vartime
++%define p256_scalar_mul_mont ring_core_0_17_7_p256_scalar_mul_mont
++%define p256_scalar_sqr_rep_mont ring_core_0_17_7_p256_scalar_sqr_rep_mont
++%define p256_sqr_mont ring_core_0_17_7_p256_sqr_mont
++%define p384_elem_div_by_2 ring_core_0_17_7_p384_elem_div_by_2
++%define p384_elem_mul_mont ring_core_0_17_7_p384_elem_mul_mont
++%define p384_elem_neg ring_core_0_17_7_p384_elem_neg
++%define p384_elem_sub ring_core_0_17_7_p384_elem_sub
++%define p384_point_add ring_core_0_17_7_p384_point_add
++%define p384_point_double ring_core_0_17_7_p384_point_double
++%define p384_point_mul ring_core_0_17_7_p384_point_mul
++%define p384_scalar_mul_mont ring_core_0_17_7_p384_scalar_mul_mont
++%define openssl_poly1305_neon2_addmulmod ring_core_0_17_7_openssl_poly1305_neon2_addmulmod
++%define openssl_poly1305_neon2_blocks ring_core_0_17_7_openssl_poly1305_neon2_blocks
++%define sha256_block_data_order ring_core_0_17_7_sha256_block_data_order
++%define sha512_block_data_order ring_core_0_17_7_sha512_block_data_order
++%define vpaes_ctr32_encrypt_blocks ring_core_0_17_7_vpaes_ctr32_encrypt_blocks
++%define vpaes_encrypt ring_core_0_17_7_vpaes_encrypt
++%define vpaes_encrypt_key_to_bsaes ring_core_0_17_7_vpaes_encrypt_key_to_bsaes
++%define vpaes_set_encrypt_key ring_core_0_17_7_vpaes_set_encrypt_key
++%define x25519_NEON ring_core_0_17_7_x25519_NEON
++%define x25519_fe_invert ring_core_0_17_7_x25519_fe_invert
++%define x25519_fe_isnegative ring_core_0_17_7_x25519_fe_isnegative
++%define x25519_fe_mul_ttt ring_core_0_17_7_x25519_fe_mul_ttt
++%define x25519_fe_neg ring_core_0_17_7_x25519_fe_neg
++%define x25519_fe_tobytes ring_core_0_17_7_x25519_fe_tobytes
++%define x25519_ge_double_scalarmult_vartime ring_core_0_17_7_x25519_ge_double_scalarmult_vartime
++%define x25519_ge_frombytes_vartime ring_core_0_17_7_x25519_ge_frombytes_vartime
++%define x25519_ge_scalarmult_base ring_core_0_17_7_x25519_ge_scalarmult_base
++%define x25519_ge_scalarmult_base_adx ring_core_0_17_7_x25519_ge_scalarmult_base_adx
++%define x25519_public_from_private_generic_masked ring_core_0_17_7_x25519_public_from_private_generic_masked
++%define x25519_sc_mask ring_core_0_17_7_x25519_sc_mask
++%define x25519_sc_muladd ring_core_0_17_7_x25519_sc_muladd
++%define x25519_sc_reduce ring_core_0_17_7_x25519_sc_reduce
++%define x25519_scalar_mult_adx ring_core_0_17_7_x25519_scalar_mult_adx
++%define x25519_scalar_mult_generic_masked ring_core_0_17_7_x25519_scalar_mult_generic_masked
++
++%endif
++%endif
diff --git a/gnu/packages/patches/rust-rspec-1-remove-clippy.patch b/gnu/packages/patches/rust-rspec-1-remove-clippy.patch
new file mode 100644
index 0000000000..46c994f7ee
--- /dev/null
+++ b/gnu/packages/patches/rust-rspec-1-remove-clippy.patch
@@ -0,0 +1,16 @@
+Remove the dependency on clippy
+
+diff --git a/Cargo.toml b/Cargo.toml
+index 177cb80..7260c34 100644
+--- a/Cargo.toml
++++ b/Cargo.toml
+@@ -39,9 +39,6 @@ version = "1.5"
+ 
+ [dependencies.time]
+ version = "0.2"
+-[build-dependencies.clippy]
+-version = "0.0.153"
+-optional = true
+ 
+ [features]
+ default = []
diff --git a/gnu/packages/patches/rust-trash-2-update-windows.patch b/gnu/packages/patches/rust-trash-2-update-windows.patch
new file mode 100644
index 0000000000..f6b13a4658
--- /dev/null
+++ b/gnu/packages/patches/rust-trash-2-update-windows.patch
@@ -0,0 +1,132 @@
+This patch is taken from upstream so we can use an already packaged
+version of the windows crate.
+
+diff --git a/Cargo.toml b/Cargo.toml
+index 2c28dfe..6b61771 100644
+--- a/Cargo.toml
++++ b/Cargo.toml
+@@ -87,9 +87,8 @@ version = "0.2.7"
+ version = "1.0.0"
+ 
+ [target."cfg(windows)".dependencies.windows]
+-version = "0.37.0"
++version = "0.44.0"
+ features = [
+-    "alloc",
+     "Win32_Foundation",
+     "Win32_System_Com_StructuredStorage",
+     "Win32_UI_Shell_PropertiesSystem",
+diff --git a/Cargo.toml.orig b/Cargo.toml.orig
+index 894a78c..c17fc02 100644
+--- a/Cargo.toml.orig
++++ b/Cargo.toml.orig
+@@ -44,7 +44,7 @@ once_cell = "1.7.2"
+ once_cell = "1.7.2"
+ 
+ [target.'cfg(windows)'.dependencies]
+-windows = { version = "0.37.0", features = [ "alloc",
++windows = { version = "0.44.0", features = [
+     "Win32_Foundation",
+     "Win32_System_Com_StructuredStorage",
+     "Win32_UI_Shell_PropertiesSystem",
+diff --git a/src/windows.rs b/src/windows.rs
+index c1379d3..3f4426b 100644
+--- a/src/windows.rs
++++ b/src/windows.rs
+@@ -1,7 +1,6 @@
+ use crate::{Error, TrashContext, TrashItem};
+ use std::{
+-    ffi::{OsStr, OsString},
+-    mem::MaybeUninit,
++    ffi::{c_void, OsStr, OsString},
+     os::windows::{ffi::OsStrExt, prelude::*},
+     path::PathBuf,
+ };
+@@ -66,7 +65,7 @@ impl TrashContext {
+                 let shi: IShellItem =
+                     SHCreateItemFromParsingName(PCWSTR(wide_path_slice.as_ptr()), None)?;
+ 
+-                pfo.DeleteItem(shi, None)?;
++                pfo.DeleteItem(&shi, None)?;
+             }
+             pfo.PerformOperations()?;
+             Ok(())
+@@ -78,28 +77,18 @@ pub fn list() -> Result<Vec<TrashItem>, Error> {
+     ensure_com_initialized();
+     unsafe {
+         let mut item_vec = Vec::new();
+-        let mut recycle_bin = MaybeUninit::<Option<IShellItem>>::uninit();
+ 
+-        SHGetKnownFolderItem(
+-            &FOLDERID_RecycleBinFolder,
+-            KF_FLAG_DEFAULT,
+-            HANDLE::default(),
+-            &IShellItem::IID,
+-            recycle_bin.as_mut_ptr() as _,
+-        )?;
+-
+-        let recycle_bin = recycle_bin.assume_init().ok_or(Error::Unknown {
+-            description: "SHGetKnownFolderItem gave NULL for FOLDERID_RecycleBinFolder".into(),
+-        })?;
++        let recycle_bin: IShellItem =
++            SHGetKnownFolderItem(&FOLDERID_RecycleBinFolder, KF_FLAG_DEFAULT, HANDLE::default())?;
+ 
+         let pesi: IEnumShellItems = recycle_bin.BindToHandler(None, &BHID_EnumItems)?;
+-        let mut fetched: u32 = 0;
+ 
+         loop {
++            let mut fetched_count: u32 = 0;
+             let mut arr = [None];
+-            pesi.Next(&mut arr, &mut fetched)?;
++            pesi.Next(&mut arr, Some(&mut fetched_count as *mut u32))?;
+ 
+-            if fetched == 0 {
++            if fetched_count == 0 {
+                 break;
+             }
+ 
+@@ -145,7 +134,7 @@ where
+             at_least_one = true;
+             let id_as_wide: Vec<u16> = item.id.encode_wide().chain(std::iter::once(0)).collect();
+             let parsing_name = PCWSTR(id_as_wide.as_ptr());
+-            let trash_item: IShellItem = SHCreateItemFromParsingName(&parsing_name, None)?;
++            let trash_item: IShellItem = SHCreateItemFromParsingName(parsing_name, None)?;
+             pfo.DeleteItem(&trash_item, None)?;
+         }
+         if at_least_one {
+@@ -181,7 +170,7 @@ where
+         for item in items.iter() {
+             let id_as_wide: Vec<u16> = item.id.encode_wide().chain(std::iter::once(0)).collect();
+             let parsing_name = PCWSTR(id_as_wide.as_ptr());
+-            let trash_item: IShellItem = SHCreateItemFromParsingName(&parsing_name, None)?;
++            let trash_item: IShellItem = SHCreateItemFromParsingName(parsing_name, None)?;
+             let parent_path_wide: Vec<_> =
+                 item.original_parent.as_os_str().encode_wide().chain(std::iter::once(0)).collect();
+             let orig_folder_shi: IShellItem =
+@@ -191,7 +180,7 @@ where
+                 .chain(std::iter::once(0))
+                 .collect();
+ 
+-            pfo.MoveItem(trash_item, orig_folder_shi, PCWSTR(name_wstr.as_ptr()), None)?;
++            pfo.MoveItem(&trash_item, &orig_folder_shi, PCWSTR(name_wstr.as_ptr()), None)?;
+         }
+         if !items.is_empty() {
+             pfo.PerformOperations()?;
+@@ -203,7 +192,7 @@ where
+ unsafe fn get_display_name(psi: &IShellItem, sigdnname: SIGDN) -> Result<OsString, Error> {
+     let name = psi.GetDisplayName(sigdnname)?;
+     let result = wstr_to_os_string(name);
+-    CoTaskMemFree(name.0 as _);
++    CoTaskMemFree(Some(name.0 as *const c_void));
+     Ok(result)
+ }
+ 
+@@ -257,7 +246,7 @@ impl CoInitializer {
+         if cfg!(feature = "coinit_speed_over_memory") {
+             init_mode |= COINIT_SPEED_OVER_MEMORY;
+         }
+-        let hr = unsafe { CoInitializeEx(std::ptr::null_mut(), init_mode) };
++        let hr = unsafe { CoInitializeEx(None, init_mode) };
+         if hr.is_err() {
+             panic!("Call to CoInitializeEx failed. HRESULT: {:?}. Consider using `trash` with the feature `coinit_multithreaded`", hr);
+         }
diff --git a/gnu/packages/patches/rustc-1.54.0-src.patch b/gnu/packages/patches/rustc-1.54.0-src.patch
index d075dce39b..d322fd0d49 100644
--- a/gnu/packages/patches/rustc-1.54.0-src.patch
+++ b/gnu/packages/patches/rustc-1.54.0-src.patch
@@ -28,51 +28,111 @@
  rustc_data_structures::static_assert_size!(ForeignItemKind, 72);
  
  impl From<ForeignItemKind> for ItemKind {
+
 --- compiler/rustc_hir/src/hir.rs
 +++ compiler/rustc_hir/src/hir.rs
-@@ -3050,3 +3050,3 @@
+@@ -3048,7 +3048,7 @@ impl<'hir> Node<'hir> {
+ }
+ 
  // Some nodes are used a lot. Make sure they don't unintentionally get bigger.
 -#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
 +#[cfg(all(not(rust_compiler="mrustc"),target_arch = "x86_64", target_pointer_width = "64"))]
  mod size_asserts {
+     rustc_data_structures::static_assert_size!(super::Block<'static>, 48);
+     rustc_data_structures::static_assert_size!(super::Expr<'static>, 64);
+
 --- compiler/rustc_middle/src/mir/interpret/error.rs
 +++ compiler/rustc_middle/src/mir/interpret/error.rs
-@@ -452,2 +452,2 @@
+@@ -449,7 +449,7 @@ impl dyn MachineStopType {
+     }
+ }
+ 
 -#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
 +#[cfg(all(not(rust_compiler="mrustc"), target_arch = "x86_64", target_pointer_width = "64"))]
  static_assert_size!(InterpError<'_>, 64);
+ 
+ pub enum InterpError<'tcx> {
+
 --- compiler/rustc_middle/src/mir/mod.rs
 +++ compiler/rustc_middle/src/mir/mod.rs
-@@ -2203,2 +2203,2 @@
+@@ -2200,7 +2200,7 @@ pub enum AggregateKind<'tcx> {
+     Generator(DefId, SubstsRef<'tcx>, hir::Movability),
+ }
+ 
 -#[cfg(target_arch = "x86_64")]
 +#[cfg(all(not(rust_compiler="mrustc"), target_arch = "x86_64"))]
  static_assert_size!(AggregateKind<'_>, 48);
+ 
+ #[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Eq, TyEncodable, TyDecodable, Hash, HashStable)]
+
 --- compiler/rustc_middle/src/thir.rs
 +++ compiler/rustc_middle/src/thir.rs
-@@ -147,2 +147,2 @@
+@@ -144,7 +144,7 @@ pub enum StmtKind<'tcx> {
+ }
+ 
+ // `Expr` is used a lot. Make sure it doesn't unintentionally get bigger.
 -#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
 +#[cfg(all(not(rust_compiler="mrustc"), target_arch = "x86_64", target_pointer_width = "64"))]
  rustc_data_structures::static_assert_size!(Expr<'_>, 144);
---- compiler/rustc_mir/src/interpret/place.rs
-+++ compiler/rustc_mir/src/interpret/place.rs
-@@ -91,2 +91,2 @@
--#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
-+#[cfg(all(not(rust_compiler = "mrustc"), target_arch = "x86_64", target_pointer_width = "64"))]
- rustc_data_structures::static_assert_size!(Place, 64);
-@@ -100,2 +100,2 @@
--#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
-+#[cfg(all(not(rust_compiler = "mrustc"), target_arch = "x86_64", target_pointer_width = "64"))]
- rustc_data_structures::static_assert_size!(PlaceTy<'_>, 80);
+ 
+ /// The Thir trait implementor lowers their expressions (`&'tcx H::Expr`)
+
 --- compiler/rustc_mir/src/interpret/operand.rs
 +++ compiler/rustc_mir/src/interpret/operand.rs
-@@ -35,2 +35,2 @@
+@@ -32,7 +32,7 @@ pub enum Immediate<Tag = ()> {
+     ScalarPair(ScalarMaybeUninit<Tag>, ScalarMaybeUninit<Tag>),
+ }
+ 
 -#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
 +#[cfg(all(not(rust_compiler = "mrustc"), target_arch = "x86_64", target_pointer_width = "64"))]
  rustc_data_structures::static_assert_size!(Immediate, 56);
-@@ -90,2 +90,2 @@
+ 
+ impl<Tag> From<ScalarMaybeUninit<Tag>> for Immediate<Tag> {
+@@ -87,7 +87,7 @@ pub struct ImmTy<'tcx, Tag = ()> {
+     pub layout: TyAndLayout<'tcx>,
+ }
+ 
 -#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
 +#[cfg(all(not(rust_compiler = "mrustc"), target_arch = "x86_64", target_pointer_width = "64"))]
  rustc_data_structures::static_assert_size!(ImmTy<'_>, 72);
+ 
+ impl<Tag: Copy> std::fmt::Display for ImmTy<'tcx, Tag> {
+
+--- compiler/rustc_mir/src/interpret/place.rs
++++ compiler/rustc_mir/src/interpret/place.rs
+@@ -88,7 +88,7 @@ pub enum Place<Tag = ()> {
+     Local { frame: usize, local: mir::Local },
+ }
+ 
+-#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
++#[cfg(all(not(rust_compiler = "mrustc"), target_arch = "x86_64", target_pointer_width = "64"))]
+ rustc_data_structures::static_assert_size!(Place, 64);
+ 
+ #[derive(Copy, Clone, Debug)]
+@@ -97,7 +97,7 @@ pub struct PlaceTy<'tcx, Tag = ()> {
+     pub layout: TyAndLayout<'tcx>,
+ }
+ 
+-#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
++#[cfg(all(not(rust_compiler = "mrustc"), target_arch = "x86_64", target_pointer_width = "64"))]
+ rustc_data_structures::static_assert_size!(PlaceTy<'_>, 80);
+ 
+ impl<'tcx, Tag> std::ops::Deref for PlaceTy<'tcx, Tag> {
+
+#
+# Disable std_detect's detection logic (use the same logic as miri)
+#
+--- library/stdarch/crates/std_detect/src/detect/mod.rs
++++ library/stdarch/crates/std_detect/src/detect/mod.rs
+@@ -86,7 +86,7 @@ mod bit;
+ mod cache;
+ 
+ cfg_if! {
+-    if #[cfg(miri)] {
++    if #[cfg(any(miri, rust_compiler = "mrustc"))] {
+         // When running under miri all target-features that are not enabled at
+         // compile-time are reported as disabled at run-time.
+         //
 
 #
 # Disable crc32fast's use of stdarch
@@ -88,19 +148,145 @@
      ))] {
 
 #
-# Disable std_detect's detection logic (use the same logic as miri)
+# Backport which is required to support arm64 on macOS 12
+# See: https://github.com/alexcrichton/curl-rust/commit/0aea09c428b9bc2bcf46da0fc33959fe3f03c74a
 #
---- library/stdarch/crates/std_detect/src/detect/mod.rs
-+++ library/stdarch/crates/std_detect/src/detect/mod.rs
-@@ -88,2 +88,2 @@
- cfg_if! {
--    if #[cfg(miri)] {
-+    if #[cfg(any(miri, rust_compiler = "mrustc"))] {
+--- vendor/curl/src/lib.rs
++++ vendor/curl/src/lib.rs
+@@ -82,6 +82,9 @@ pub mod easy;
+ pub mod multi;
+ mod panic;
+ 
++#[cfg(test)]
++static INITIALIZED: std::sync::atomic::AtomicBool = std::sync::atomic::AtomicBool::new(false);
++
+ /// Initializes the underlying libcurl library.
+ ///
+ /// The underlying libcurl library must be initialized before use, and must be
+@@ -102,46 +105,62 @@ pub fn init() {
+     /// Used to prevent concurrent or duplicate initialization.
+     static INIT: Once = Once::new();
+ 
+-    /// An exported constructor function. On supported platforms, this will be
+-    /// invoked automatically before the program's `main` is called.
+-    #[cfg_attr(
+-        any(target_os = "linux", target_os = "freebsd", target_os = "android"),
+-        link_section = ".init_array"
+-    )]
+-    #[cfg_attr(target_os = "macos", link_section = "__DATA,__mod_init_func")]
+-    #[cfg_attr(target_os = "windows", link_section = ".CRT$XCU")]
+-    static INIT_CTOR: extern "C" fn() = init_inner;
++    INIT.call_once(|| {
++        #[cfg(need_openssl_init)]
++        openssl_probe::init_ssl_cert_env_vars();
++        #[cfg(need_openssl_init)]
++        openssl_sys::init();
++
++        unsafe {
++            assert_eq!(curl_sys::curl_global_init(curl_sys::CURL_GLOBAL_ALL), 0);
++        }
++
++        #[cfg(test)]
++        {
++            INITIALIZED.store(true, std::sync::atomic::Ordering::SeqCst);
++        }
++
++        // Note that we explicitly don't schedule a call to
++        // `curl_global_cleanup`. The documentation for that function says
++        //
++        // > You must not call it when any other thread in the program (i.e. a
++        // > thread sharing the same memory) is running. This doesn't just mean
++        // > no other thread that is using libcurl.
++        //
++        // We can't ever be sure of that, so unfortunately we can't call the
++        // function.
++    });
++}
+ 
++/// An exported constructor function. On supported platforms, this will be
++/// invoked automatically before the program's `main` is called. This is done
++/// for the convenience of library users since otherwise the thread-safety rules
++/// around initialization can be difficult to fulfill.
++///
++/// This is a hidden public item to ensure the symbol isn't optimized away by a
++/// rustc/LLVM bug: https://github.com/rust-lang/rust/issues/47384. As long as
++/// any item in this module is used by the final binary (which `init` will be)
++/// then this symbol should be preserved.
++#[used]
++#[doc(hidden)]
++#[cfg_attr(
++    any(target_os = "linux", target_os = "freebsd", target_os = "android"),
++    link_section = ".init_array"
++)]
++#[cfg_attr(target_os = "macos", link_section = "__DATA,__mod_init_func")]
++#[cfg_attr(target_os = "windows", link_section = ".CRT$XCU")]
++pub static INIT_CTOR: extern "C" fn() = {
+     /// This is the body of our constructor function.
+     #[cfg_attr(
+         any(target_os = "linux", target_os = "android"),
+         link_section = ".text.startup"
+     )]
+-    extern "C" fn init_inner() {
+-        INIT.call_once(|| {
+-            #[cfg(need_openssl_init)]
+-            openssl_sys::init();
+-
+-            unsafe {
+-                assert_eq!(curl_sys::curl_global_init(curl_sys::CURL_GLOBAL_ALL), 0);
+-            }
+-
+-            // Note that we explicitly don't schedule a call to
+-            // `curl_global_cleanup`. The documentation for that function says
+-            //
+-            // > You must not call it when any other thread in the program (i.e.
+-            // > a thread sharing the same memory) is running. This doesn't just
+-            // > mean no other thread that is using libcurl.
+-            //
+-            // We can't ever be sure of that, so unfortunately we can't call the
+-            // function.
+-        });
++    extern "C" fn init_ctor() {
++        init();
+     }
+ 
+-    // We invoke our init function through our static to ensure the symbol isn't
+-    // optimized away by a bug: https://github.com/rust-lang/rust/issues/47384
+-    INIT_CTOR();
+-}
++    init_ctor
++};
+ 
+ unsafe fn opt_str<'a>(ptr: *const libc::c_char) -> Option<&'a str> {
+     if ptr.is_null() {
+@@ -158,3 +177,20 @@ fn cvt(r: curl_sys::CURLcode) -> Result<(), Error> {
+         Err(Error::new(r))
+     }
+ }
++
++#[cfg(test)]
++mod tests {
++    use super::*;
++
++    #[test]
++    #[cfg(any(
++        target_os = "linux",
++        target_os = "macos",
++        target_os = "windows",
++        target_os = "freebsd",
++        target_os = "android"
++    ))]
++    fn is_initialized_before_main() {
++        assert!(INITIALIZED.load(std::sync::atomic::Ordering::SeqCst));
++    }
++}
 
 # PPV-Lite also needs to know that we're pretending to be miri
 --- vendor/ppv-lite86/src/lib.rs
 +++ vendor/ppv-lite86/src/lib.rs
-@@ -12,9 +12,9 @@
+@@ -9,14 +9,14 @@ mod soft;
+ mod types;
+ pub use self::types::*;
+ 
 -#[cfg(all(feature = "simd", target_arch = "x86_64", not(miri)))]
 +#[cfg(all(feature = "simd", target_arch = "x86_64", not(miri), not(rust_compiler = "mrustc")))]
  pub mod x86_64;
@@ -114,4 +300,5 @@
 -#[cfg(any(miri, not(all(feature = "simd", any(target_arch = "x86_64")))))]
 +#[cfg(any(miri, rust_compiler = "mrustc", not(all(feature = "simd", any(target_arch = "x86_64")))))]
  use self::generic as arch;
-
+ 
+ pub use self::arch::{vec128_storage, vec256_storage, vec512_storage};
author	Efraim Flashner <efraim@flashner.co.il>	2024-02-28 12:18:45 +0200
committer	Efraim Flashner <efraim@flashner.co.il>	2024-02-28 12:18:45 +0200
commit	f29f80c194d0c534a92354b2bc19022a9b70ecf8 (patch)
tree	adc3c4114f59ef88ed3e097a8ec8517979f71562 /gnu/packages/patches
parent	c034088e37b51018d5bfeb88d822c559b38d51db (diff)
parent	7947d47c9b891d2461ca9e7c53048d0e44294b5d (diff)
download	guix-f29f80c194d0c534a92354b2bc19022a9b70ecf8.tar guix-f29f80c194d0c534a92354b2bc19022a9b70ecf8.tar.gz