aboutsummaryrefslogtreecommitdiff
path: root/src/common/aes.c
diff options
context:
space:
mode:
authorNick Mathewson <nickm@torproject.org>2010-10-15 13:34:31 -0400
committerNick Mathewson <nickm@torproject.org>2010-10-15 13:44:25 -0400
commit96ab83d3b64a6e81f4e3f1c18598c942efea046c (patch)
tree65e4c696bfb57d77c5875ce1fe0fcbddf017db68 /src/common/aes.c
parent006acf8b3f7095ba4e4ce38d3ea963a018bf91ef (diff)
downloadtor-96ab83d3b64a6e81f4e3f1c18598c942efea046c.tar
tor-96ab83d3b64a6e81f4e3f1c18598c942efea046c.tar.gz
Improve accuracy of comment about aes_crypt performance
The old comment was from before I tried a huge pile of crazy stuff to make the inner loop faster. Short answer: GCC already knows how to unroll loops pretty well. Other short answer: we should have made the relay payload size an even multiple of 4, 8, or ideally 16.
Diffstat (limited to 'src/common/aes.c')
-rw-r--r--src/common/aes.c19
1 files changed, 14 insertions, 5 deletions
diff --git a/src/common/aes.c b/src/common/aes.c
index a17328317..4998c386a 100644
--- a/src/common/aes.c
+++ b/src/common/aes.c
@@ -288,11 +288,20 @@ void
aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len,
char *output)
{
-
- /* XXXX This function is up to 5% of our runtime in some profiles;
- * we should look into unrolling some of the loops; taking advantage
- * of alignment, using a bigger buffer, and so on. Not till after 0.1.2.x,
- * though. */
+ /* This function alone is up to 5% of our runtime in some profiles; anything
+ * we could do to make it faster would be great.
+ *
+ * Experimenting suggests that unrolling the inner loop into a switch
+ * statement doesn't help. What does seem to help is making the input and
+ * output buffers word aligned, and never crypting anything besides an
+ * integer number of words at a time -- it shaves maybe 4-5% of the per-byte
+ * encryption time measured by bench_aes. We can't do that with the current
+ * Tor protocol, though: Tor really likes to crypt things in 509-byte
+ * chunks.
+ *
+ * If we were really ambitous, we'd force len to be a multiple of the block
+ * size, and shave maybe another 4-5% off.
+ */
int c = cipher->pos;
if (PREDICT_UNLIKELY(!len)) return;