aboutsummaryrefslogtreecommitdiff
path: root/gnu/packages/patches
diff options
context:
space:
mode:
authorRicardo Wurmus <rekado@elephly.net>2017-03-22 14:45:44 +0100
committerRicardo Wurmus <rekado@elephly.net>2017-03-22 14:45:44 +0100
commitdc8a34ed46f58397b5a53c99ec5f718c6d0e4fef (patch)
treee48263a8fde1093337a3b59da551c23d55848179 /gnu/packages/patches
parentd553cd6015d9e99226b5d89266edbc6df4c838b3 (diff)
downloadgnu-guix-dc8a34ed46f58397b5a53c99ec5f718c6d0e4fef.tar
gnu-guix-dc8a34ed46f58397b5a53c99ec5f718c6d0e4fef.tar.gz
gnu: freeimage: Fix build with GCC 5.
* gnu/packages/patches/freeimage-fix-build-with-gcc-5.patch: New file. * gnu/local.mk (dist_patch_DATA): Add it. * gnu/packages/image.scm (freeimage)[source]: Add patch.
Diffstat (limited to 'gnu/packages/patches')
-rw-r--r--gnu/packages/patches/freeimage-fix-build-with-gcc-5.patch1453
1 files changed, 1453 insertions, 0 deletions
diff --git a/gnu/packages/patches/freeimage-fix-build-with-gcc-5.patch b/gnu/packages/patches/freeimage-fix-build-with-gcc-5.patch
new file mode 100644
index 0000000000..2c9f2c3357
--- /dev/null
+++ b/gnu/packages/patches/freeimage-fix-build-with-gcc-5.patch
@@ -0,0 +1,1453 @@
+The original patch was downloaded from here:
+https://chromium-review.googlesource.com/c/297211
+
+The paths, file names, and line endings have been adapted.
+
+From eebaf97f5a1cb713d81d311308d8a48c124e5aef Mon Sep 17 00:00:00 2001
+From: James Zern <jzern@google.com>
+Date: Wed, 02 Sep 2015 23:21:13 -0700
+Subject: [PATCH] dsp/mips: add whitespace around stringizing operator
+
+fixes compile with gcc 5.1
+BUG=259
+
+Change-Id: Ideb39c6290ab8569b1b6cc835bea11c822d0286c
+---
+
+diff --git a/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c
+index 6590f43..40e4d82 100644
+--- a/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c
++++ b/Source/LibWebP/src/dsp/dsp.dec_mips_dsp_r2.c
+@@ -548,10 +548,10 @@
+ // TEMP3 = SRC[D + D1 * BPS]
+ #define LOAD_4_BYTES(TEMP0, TEMP1, TEMP2, TEMP3, \
+ A, A1, B, B1, C, C1, D, D1, SRC) \
+- "lbu %["#TEMP0"], "#A"+"#A1"*"XSTR(BPS)"(%["#SRC"]) \n\t" \
+- "lbu %["#TEMP1"], "#B"+"#B1"*"XSTR(BPS)"(%["#SRC"]) \n\t" \
+- "lbu %["#TEMP2"], "#C"+"#C1"*"XSTR(BPS)"(%["#SRC"]) \n\t" \
+- "lbu %["#TEMP3"], "#D"+"#D1"*"XSTR(BPS)"(%["#SRC"]) \n\t" \
++ "lbu %[" #TEMP0 "], " #A "+" #A1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
++ "lbu %[" #TEMP1 "], " #B "+" #B1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
++ "lbu %[" #TEMP2 "], " #C "+" #C1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
++ "lbu %[" #TEMP3 "], " #D "+" #D1 "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
+
+ static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+ int i;
+@@ -623,8 +623,8 @@
+ // DST[A * BPS] = TEMP0
+ // DST[B + C * BPS] = TEMP1
+ #define STORE_8_BYTES(TEMP0, TEMP1, A, B, C, DST) \
+- "usw %["#TEMP0"], "#A"*"XSTR(BPS)"(%["#DST"]) \n\t" \
+- "usw %["#TEMP1"], "#B"+"#C"*"XSTR(BPS)"(%["#DST"]) \n\t"
++ "usw %[" #TEMP0 "], " #A "*"XSTR(BPS)"(%[" #DST "]) \n\t" \
++ "usw %[" #TEMP1 "], " #B "+" #C "*"XSTR(BPS)"(%[" #DST "]) \n\t"
+
+ static void VE4(uint8_t* dst) { // vertical
+ const uint8_t* top = dst - BPS;
+@@ -725,8 +725,8 @@
+ // TEMP0 = SRC[A * BPS]
+ // TEMP1 = SRC[B + C * BPS]
+ #define LOAD_8_BYTES(TEMP0, TEMP1, A, B, C, SRC) \
+- "ulw %["#TEMP0"], "#A"*"XSTR(BPS)"(%["#SRC"]) \n\t" \
+- "ulw %["#TEMP1"], "#B"+"#C"*"XSTR(BPS)"(%["#SRC"]) \n\t"
++ "ulw %[" #TEMP0 "], " #A "*"XSTR(BPS)"(%[" #SRC "]) \n\t" \
++ "ulw %[" #TEMP1 "], " #B "+" #C "*"XSTR(BPS)"(%[" #SRC "]) \n\t"
+
+ static void LD4(uint8_t* dst) { // Down-Left
+ int temp0, temp1, temp2, temp3, temp4;
+@@ -873,24 +873,24 @@
+ #define CLIPPING(SIZE) \
+ "preceu.ph.qbl %[temp2], %[temp0] \n\t" \
+ "preceu.ph.qbr %[temp0], %[temp0] \n\t" \
+-".if "#SIZE" == 8 \n\t" \
++".if " #SIZE " == 8 \n\t" \
+ "preceu.ph.qbl %[temp3], %[temp1] \n\t" \
+ "preceu.ph.qbr %[temp1], %[temp1] \n\t" \
+ ".endif \n\t" \
+ "addu.ph %[temp2], %[temp2], %[dst_1] \n\t" \
+ "addu.ph %[temp0], %[temp0], %[dst_1] \n\t" \
+-".if "#SIZE" == 8 \n\t" \
++".if " #SIZE " == 8 \n\t" \
+ "addu.ph %[temp3], %[temp3], %[dst_1] \n\t" \
+ "addu.ph %[temp1], %[temp1], %[dst_1] \n\t" \
+ ".endif \n\t" \
+ "shll_s.ph %[temp2], %[temp2], 7 \n\t" \
+ "shll_s.ph %[temp0], %[temp0], 7 \n\t" \
+-".if "#SIZE" == 8 \n\t" \
++".if " #SIZE " == 8 \n\t" \
+ "shll_s.ph %[temp3], %[temp3], 7 \n\t" \
+ "shll_s.ph %[temp1], %[temp1], 7 \n\t" \
+ ".endif \n\t" \
+ "precrqu_s.qb.ph %[temp0], %[temp2], %[temp0] \n\t" \
+-".if "#SIZE" == 8 \n\t" \
++".if " #SIZE " == 8 \n\t" \
+ "precrqu_s.qb.ph %[temp1], %[temp3], %[temp1] \n\t" \
+ ".endif \n\t"
+
+@@ -899,7 +899,7 @@
+ int dst_1 = ((int)(DST)[-1] << 16) + (DST)[-1]; \
+ int temp0, temp1, temp2, temp3; \
+ __asm__ volatile ( \
+- ".if "#SIZE" < 8 \n\t" \
++ ".if " #SIZE " < 8 \n\t" \
+ "ulw %[temp0], 0(%[top]) \n\t" \
+ "subu.ph %[dst_1], %[dst_1], %[top_1] \n\t" \
+ CLIPPING(4) \
+@@ -911,7 +911,7 @@
+ CLIPPING(8) \
+ "usw %[temp0], 0(%[dst]) \n\t" \
+ "usw %[temp1], 4(%[dst]) \n\t" \
+- ".if "#SIZE" == 16 \n\t" \
++ ".if " #SIZE " == 16 \n\t" \
+ "ulw %[temp0], 8(%[top]) \n\t" \
+ "ulw %[temp1], 12(%[top]) \n\t" \
+ CLIPPING(8) \
+diff --git a/Source/LibWebP/src/dsp/dsp.enc_mips32.c b/Source/LibWebP/src/dsp/dsp.enc_mips32.c
+index c5837f1..b50e08b 100644
+--- a/Source/LibWebP/src/dsp/dsp.enc_mips32.c
++++ b/Source/LibWebP/src/dsp/dsp.enc_mips32.c
+@@ -31,26 +31,26 @@
+ // TEMP0..TEMP3 - registers for corresponding tmp elements
+ // TEMP4..TEMP5 - temporary registers
+ #define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3) \
+- "lh %[temp16], "#A"(%[temp20]) \n\t" \
+- "lh %[temp18], "#B"(%[temp20]) \n\t" \
+- "lh %[temp17], "#C"(%[temp20]) \n\t" \
+- "lh %[temp19], "#D"(%[temp20]) \n\t" \
+- "addu %["#TEMP4"], %[temp16], %[temp18] \n\t" \
+- "subu %[temp16], %[temp16], %[temp18] \n\t" \
+- "mul %["#TEMP0"], %[temp17], %[kC2] \n\t" \
+- "mul %[temp18], %[temp19], %[kC1] \n\t" \
+- "mul %[temp17], %[temp17], %[kC1] \n\t" \
+- "mul %[temp19], %[temp19], %[kC2] \n\t" \
+- "sra %["#TEMP0"], %["#TEMP0"], 16 \n\n" \
+- "sra %[temp18], %[temp18], 16 \n\n" \
+- "sra %[temp17], %[temp17], 16 \n\n" \
+- "sra %[temp19], %[temp19], 16 \n\n" \
+- "subu %["#TEMP2"], %["#TEMP0"], %[temp18] \n\t" \
+- "addu %["#TEMP3"], %[temp17], %[temp19] \n\t" \
+- "addu %["#TEMP0"], %["#TEMP4"], %["#TEMP3"] \n\t" \
+- "addu %["#TEMP1"], %[temp16], %["#TEMP2"] \n\t" \
+- "subu %["#TEMP2"], %[temp16], %["#TEMP2"] \n\t" \
+- "subu %["#TEMP3"], %["#TEMP4"], %["#TEMP3"] \n\t"
++ "lh %[temp16], " #A "(%[temp20]) \n\t" \
++ "lh %[temp18], " #B "(%[temp20]) \n\t" \
++ "lh %[temp17], " #C "(%[temp20]) \n\t" \
++ "lh %[temp19], " #D "(%[temp20]) \n\t" \
++ "addu %[" #TEMP4 "], %[temp16], %[temp18] \n\t" \
++ "subu %[temp16], %[temp16], %[temp18] \n\t" \
++ "mul %[" #TEMP0 "], %[temp17], %[kC2] \n\t" \
++ "mul %[temp18], %[temp19], %[kC1] \n\t" \
++ "mul %[temp17], %[temp17], %[kC1] \n\t" \
++ "mul %[temp19], %[temp19], %[kC2] \n\t" \
++ "sra %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\n" \
++ "sra %[temp18], %[temp18], 16 \n\n" \
++ "sra %[temp17], %[temp17], 16 \n\n" \
++ "sra %[temp19], %[temp19], 16 \n\n" \
++ "subu %[" #TEMP2 "], %[" #TEMP0 "], %[temp18] \n\t" \
++ "addu %[" #TEMP3 "], %[temp17], %[temp19] \n\t" \
++ "addu %[" #TEMP0 "], %[" #TEMP4 "], %[" #TEMP3 "] \n\t" \
++ "addu %[" #TEMP1 "], %[temp16], %[" #TEMP2 "] \n\t" \
++ "subu %[" #TEMP2 "], %[temp16], %[" #TEMP2 "] \n\t" \
++ "subu %[" #TEMP3 "], %[" #TEMP4 "], %[" #TEMP3 "] \n\t"
+
+ // macro for one horizontal pass in ITransformOne
+ // MUL and STORE macros inlined
+@@ -58,59 +58,59 @@
+ // temp0..temp15 holds tmp[0]..tmp[15]
+ // A - offset in bytes to load from ref and store to dst buffer
+ // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
+-#define HORIZONTAL_PASS(A, TEMP0, TEMP4, TEMP8, TEMP12) \
+- "addiu %["#TEMP0"], %["#TEMP0"], 4 \n\t" \
+- "addu %[temp16], %["#TEMP0"], %["#TEMP8"] \n\t" \
+- "subu %[temp17], %["#TEMP0"], %["#TEMP8"] \n\t" \
+- "mul %["#TEMP0"], %["#TEMP4"], %[kC2] \n\t" \
+- "mul %["#TEMP8"], %["#TEMP12"], %[kC1] \n\t" \
+- "mul %["#TEMP4"], %["#TEMP4"], %[kC1] \n\t" \
+- "mul %["#TEMP12"], %["#TEMP12"], %[kC2] \n\t" \
+- "sra %["#TEMP0"], %["#TEMP0"], 16 \n\t" \
+- "sra %["#TEMP8"], %["#TEMP8"], 16 \n\t" \
+- "sra %["#TEMP4"], %["#TEMP4"], 16 \n\t" \
+- "sra %["#TEMP12"], %["#TEMP12"], 16 \n\t" \
+- "subu %[temp18], %["#TEMP0"], %["#TEMP8"] \n\t" \
+- "addu %[temp19], %["#TEMP4"], %["#TEMP12"] \n\t" \
+- "addu %["#TEMP0"], %[temp16], %[temp19] \n\t" \
+- "addu %["#TEMP4"], %[temp17], %[temp18] \n\t" \
+- "subu %["#TEMP8"], %[temp17], %[temp18] \n\t" \
+- "subu %["#TEMP12"], %[temp16], %[temp19] \n\t" \
+- "lw %[temp20], 0(%[args]) \n\t" \
+- "sra %["#TEMP0"], %["#TEMP0"], 3 \n\t" \
+- "sra %["#TEMP4"], %["#TEMP4"], 3 \n\t" \
+- "sra %["#TEMP8"], %["#TEMP8"], 3 \n\t" \
+- "sra %["#TEMP12"], %["#TEMP12"], 3 \n\t" \
+- "lbu %[temp16], 0+"XSTR(BPS)"*"#A"(%[temp20]) \n\t" \
+- "lbu %[temp17], 1+"XSTR(BPS)"*"#A"(%[temp20]) \n\t" \
+- "lbu %[temp18], 2+"XSTR(BPS)"*"#A"(%[temp20]) \n\t" \
+- "lbu %[temp19], 3+"XSTR(BPS)"*"#A"(%[temp20]) \n\t" \
+- "addu %["#TEMP0"], %[temp16], %["#TEMP0"] \n\t" \
+- "addu %["#TEMP4"], %[temp17], %["#TEMP4"] \n\t" \
+- "addu %["#TEMP8"], %[temp18], %["#TEMP8"] \n\t" \
+- "addu %["#TEMP12"], %[temp19], %["#TEMP12"] \n\t" \
+- "slt %[temp16], %["#TEMP0"], $zero \n\t" \
+- "slt %[temp17], %["#TEMP4"], $zero \n\t" \
+- "slt %[temp18], %["#TEMP8"], $zero \n\t" \
+- "slt %[temp19], %["#TEMP12"], $zero \n\t" \
+- "movn %["#TEMP0"], $zero, %[temp16] \n\t" \
+- "movn %["#TEMP4"], $zero, %[temp17] \n\t" \
+- "movn %["#TEMP8"], $zero, %[temp18] \n\t" \
+- "movn %["#TEMP12"], $zero, %[temp19] \n\t" \
+- "addiu %[temp20], $zero, 255 \n\t" \
+- "slt %[temp16], %["#TEMP0"], %[temp20] \n\t" \
+- "slt %[temp17], %["#TEMP4"], %[temp20] \n\t" \
+- "slt %[temp18], %["#TEMP8"], %[temp20] \n\t" \
+- "slt %[temp19], %["#TEMP12"], %[temp20] \n\t" \
+- "movz %["#TEMP0"], %[temp20], %[temp16] \n\t" \
+- "movz %["#TEMP4"], %[temp20], %[temp17] \n\t" \
+- "lw %[temp16], 8(%[args]) \n\t" \
+- "movz %["#TEMP8"], %[temp20], %[temp18] \n\t" \
+- "movz %["#TEMP12"], %[temp20], %[temp19] \n\t" \
+- "sb %["#TEMP0"], 0+"XSTR(BPS)"*"#A"(%[temp16]) \n\t" \
+- "sb %["#TEMP4"], 1+"XSTR(BPS)"*"#A"(%[temp16]) \n\t" \
+- "sb %["#TEMP8"], 2+"XSTR(BPS)"*"#A"(%[temp16]) \n\t" \
+- "sb %["#TEMP12"], 3+"XSTR(BPS)"*"#A"(%[temp16]) \n\t"
++#define HORIZONTAL_PASS(A, TEMP0, TEMP4, TEMP8, TEMP12) \
++ "addiu %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \
++ "addu %[temp16], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
++ "subu %[temp17], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
++ "mul %[" #TEMP0 "], %[" #TEMP4 "], %[kC2] \n\t" \
++ "mul %[" #TEMP8 "], %[" #TEMP12 "], %[kC1] \n\t" \
++ "mul %[" #TEMP4 "], %[" #TEMP4 "], %[kC1] \n\t" \
++ "mul %[" #TEMP12 "], %[" #TEMP12 "], %[kC2] \n\t" \
++ "sra %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\t" \
++ "sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \
++ "sra %[" #TEMP4 "], %[" #TEMP4 "], 16 \n\t" \
++ "sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \
++ "subu %[temp18], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
++ "addu %[temp19], %[" #TEMP4 "], %[" #TEMP12 "] \n\t" \
++ "addu %[" #TEMP0 "], %[temp16], %[temp19] \n\t" \
++ "addu %[" #TEMP4 "], %[temp17], %[temp18] \n\t" \
++ "subu %[" #TEMP8 "], %[temp17], %[temp18] \n\t" \
++ "subu %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \
++ "lw %[temp20], 0(%[args]) \n\t" \
++ "sra %[" #TEMP0 "], %[" #TEMP0 "], 3 \n\t" \
++ "sra %[" #TEMP4 "], %[" #TEMP4 "], 3 \n\t" \
++ "sra %[" #TEMP8 "], %[" #TEMP8 "], 3 \n\t" \
++ "sra %[" #TEMP12 "], %[" #TEMP12 "], 3 \n\t" \
++ "lbu %[temp16], 0+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \
++ "lbu %[temp17], 1+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \
++ "lbu %[temp18], 2+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \
++ "lbu %[temp19], 3+"XSTR(BPS)"*" #A "(%[temp20]) \n\t" \
++ "addu %[" #TEMP0 "], %[temp16], %[" #TEMP0 "] \n\t" \
++ "addu %[" #TEMP4 "], %[temp17], %[" #TEMP4 "] \n\t" \
++ "addu %[" #TEMP8 "], %[temp18], %[" #TEMP8 "] \n\t" \
++ "addu %[" #TEMP12 "], %[temp19], %[" #TEMP12 "] \n\t" \
++ "slt %[temp16], %[" #TEMP0 "], $zero \n\t" \
++ "slt %[temp17], %[" #TEMP4 "], $zero \n\t" \
++ "slt %[temp18], %[" #TEMP8 "], $zero \n\t" \
++ "slt %[temp19], %[" #TEMP12 "], $zero \n\t" \
++ "movn %[" #TEMP0 "], $zero, %[temp16] \n\t" \
++ "movn %[" #TEMP4 "], $zero, %[temp17] \n\t" \
++ "movn %[" #TEMP8 "], $zero, %[temp18] \n\t" \
++ "movn %[" #TEMP12 "], $zero, %[temp19] \n\t" \
++ "addiu %[temp20], $zero, 255 \n\t" \
++ "slt %[temp16], %[" #TEMP0 "], %[temp20] \n\t" \
++ "slt %[temp17], %[" #TEMP4 "], %[temp20] \n\t" \
++ "slt %[temp18], %[" #TEMP8 "], %[temp20] \n\t" \
++ "slt %[temp19], %[" #TEMP12 "], %[temp20] \n\t" \
++ "movz %[" #TEMP0 "], %[temp20], %[temp16] \n\t" \
++ "movz %[" #TEMP4 "], %[temp20], %[temp17] \n\t" \
++ "lw %[temp16], 8(%[args]) \n\t" \
++ "movz %[" #TEMP8 "], %[temp20], %[temp18] \n\t" \
++ "movz %[" #TEMP12 "], %[temp20], %[temp19] \n\t" \
++ "sb %[" #TEMP0 "], 0+"XSTR(BPS)"*" #A "(%[temp16]) \n\t" \
++ "sb %[" #TEMP4 "], 1+"XSTR(BPS)"*" #A "(%[temp16]) \n\t" \
++ "sb %[" #TEMP8 "], 2+"XSTR(BPS)"*" #A "(%[temp16]) \n\t" \
++ "sb %[" #TEMP12 "], 3+"XSTR(BPS)"*" #A "(%[temp16]) \n\t"
+
+ // Does one or two inverse transforms.
+ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
+@@ -161,9 +161,9 @@
+ // K - offset in bytes (kZigzag[n] * 4)
+ // N - offset in bytes (n * 2)
+ #define QUANTIZE_ONE(J, K, N) \
+- "lh %[temp0], "#J"(%[ppin]) \n\t" \
+- "lhu %[temp1], "#J"(%[ppsharpen]) \n\t" \
+- "lw %[temp2], "#K"(%[ppzthresh]) \n\t" \
++ "lh %[temp0], " #J "(%[ppin]) \n\t" \
++ "lhu %[temp1], " #J "(%[ppsharpen]) \n\t" \
++ "lw %[temp2], " #K "(%[ppzthresh]) \n\t" \
+ "sra %[sign], %[temp0], 15 \n\t" \
+ "xor %[coeff], %[temp0], %[sign] \n\t" \
+ "subu %[coeff], %[coeff], %[sign] \n\t" \
+@@ -172,9 +172,9 @@
+ "addiu %[temp5], $zero, 0 \n\t" \
+ "addiu %[level], $zero, 0 \n\t" \
+ "beqz %[temp4], 2f \n\t" \
+- "lhu %[temp1], "#J"(%[ppiq]) \n\t" \
+- "lw %[temp2], "#K"(%[ppbias]) \n\t" \
+- "lhu %[temp3], "#J"(%[ppq]) \n\t" \
++ "lhu %[temp1], " #J "(%[ppiq]) \n\t" \
++ "lw %[temp2], " #K "(%[ppbias]) \n\t" \
++ "lhu %[temp3], " #J "(%[ppq]) \n\t" \
+ "mul %[level], %[coeff], %[temp1] \n\t" \
+ "addu %[level], %[level], %[temp2] \n\t" \
+ "sra %[level], %[level], 17 \n\t" \
+@@ -184,8 +184,8 @@
+ "subu %[level], %[level], %[sign] \n\t" \
+ "mul %[temp5], %[level], %[temp3] \n\t" \
+ "2: \n\t" \
+- "sh %[temp5], "#J"(%[ppin]) \n\t" \
+- "sh %[level], "#N"(%[pout]) \n\t"
++ "sh %[temp5], " #J "(%[ppin]) \n\t" \
++ "sh %[level], " #N "(%[pout]) \n\t"
+
+ static int QuantizeBlock(int16_t in[16], int16_t out[16],
+ const VP8Matrix* const mtx) {
+@@ -253,39 +253,39 @@
+ // A - offset in bytes to load from a and b buffers
+ // E..H - offsets in bytes to store first results to tmp buffer
+ // E1..H1 - offsets in bytes to store second results to tmp buffer
+-#define HORIZONTAL_PASS(A, E, F, G, H, E1, F1, G1, H1) \
+- "lbu %[temp0], 0+"XSTR(BPS)"*"#A"(%[a]) \n\t" \
+- "lbu %[temp1], 1+"XSTR(BPS)"*"#A"(%[a]) \n\t" \
+- "lbu %[temp2], 2+"XSTR(BPS)"*"#A"(%[a]) \n\t" \
+- "lbu %[temp3], 3+"XSTR(BPS)"*"#A"(%[a]) \n\t" \
+- "lbu %[temp4], 0+"XSTR(BPS)"*"#A"(%[b]) \n\t" \
+- "lbu %[temp5], 1+"XSTR(BPS)"*"#A"(%[b]) \n\t" \
+- "lbu %[temp6], 2+"XSTR(BPS)"*"#A"(%[b]) \n\t" \
+- "lbu %[temp7], 3+"XSTR(BPS)"*"#A"(%[b]) \n\t" \
+- "addu %[temp8], %[temp0], %[temp2] \n\t" \
+- "subu %[temp0], %[temp0], %[temp2] \n\t" \
+- "addu %[temp2], %[temp1], %[temp3] \n\t" \
+- "subu %[temp1], %[temp1], %[temp3] \n\t" \
+- "addu %[temp3], %[temp4], %[temp6] \n\t" \
+- "subu %[temp4], %[temp4], %[temp6] \n\t" \
+- "addu %[temp6], %[temp5], %[temp7] \n\t" \
+- "subu %[temp5], %[temp5], %[temp7] \n\t" \
+- "addu %[temp7], %[temp8], %[temp2] \n\t" \
+- "subu %[temp2], %[temp8], %[temp2] \n\t" \
+- "addu %[temp8], %[temp0], %[temp1] \n\t" \
+- "subu %[temp0], %[temp0], %[temp1] \n\t" \
+- "addu %[temp1], %[temp3], %[temp6] \n\t" \
+- "subu %[temp3], %[temp3], %[temp6] \n\t" \
+- "addu %[temp6], %[temp4], %[temp5] \n\t" \
+- "subu %[temp4], %[temp4], %[temp5] \n\t" \
+- "sw %[temp7], "#E"(%[tmp]) \n\t" \
+- "sw %[temp2], "#H"(%[tmp]) \n\t" \
+- "sw %[temp8], "#F"(%[tmp]) \n\t" \
+- "sw %[temp0], "#G"(%[tmp]) \n\t" \
+- "sw %[temp1], "#E1"(%[tmp]) \n\t" \
+- "sw %[temp3], "#H1"(%[tmp]) \n\t" \
+- "sw %[temp6], "#F1"(%[tmp]) \n\t" \
+- "sw %[temp4], "#G1"(%[tmp]) \n\t"
++#define HORIZONTAL_PASS(A, E, F, G, H, E1, F1, G1, H1) \
++ "lbu %[temp0], 0+"XSTR(BPS)"*" #A "(%[a]) \n\t" \
++ "lbu %[temp1], 1+"XSTR(BPS)"*" #A "(%[a]) \n\t" \
++ "lbu %[temp2], 2+"XSTR(BPS)"*" #A "(%[a]) \n\t" \
++ "lbu %[temp3], 3+"XSTR(BPS)"*" #A "(%[a]) \n\t" \
++ "lbu %[temp4], 0+"XSTR(BPS)"*" #A "(%[b]) \n\t" \
++ "lbu %[temp5], 1+"XSTR(BPS)"*" #A "(%[b]) \n\t" \
++ "lbu %[temp6], 2+"XSTR(BPS)"*" #A "(%[b]) \n\t" \
++ "lbu %[temp7], 3+"XSTR(BPS)"*" #A "(%[b]) \n\t" \
++ "addu %[temp8], %[temp0], %[temp2] \n\t" \
++ "subu %[temp0], %[temp0], %[temp2] \n\t" \
++ "addu %[temp2], %[temp1], %[temp3] \n\t" \
++ "subu %[temp1], %[temp1], %[temp3] \n\t" \
++ "addu %[temp3], %[temp4], %[temp6] \n\t" \
++ "subu %[temp4], %[temp4], %[temp6] \n\t" \
++ "addu %[temp6], %[temp5], %[temp7] \n\t" \
++ "subu %[temp5], %[temp5], %[temp7] \n\t" \
++ "addu %[temp7], %[temp8], %[temp2] \n\t" \
++ "subu %[temp2], %[temp8], %[temp2] \n\t" \
++ "addu %[temp8], %[temp0], %[temp1] \n\t" \
++ "subu %[temp0], %[temp0], %[temp1] \n\t" \
++ "addu %[temp1], %[temp3], %[temp6] \n\t" \
++ "subu %[temp3], %[temp3], %[temp6] \n\t" \
++ "addu %[temp6], %[temp4], %[temp5] \n\t" \
++ "subu %[temp4], %[temp4], %[temp5] \n\t" \
++ "sw %[temp7], " #E "(%[tmp]) \n\t" \
++ "sw %[temp2], " #H "(%[tmp]) \n\t" \
++ "sw %[temp8], " #F "(%[tmp]) \n\t" \
++ "sw %[temp0], " #G "(%[tmp]) \n\t" \
++ "sw %[temp1], " #E1 "(%[tmp]) \n\t" \
++ "sw %[temp3], " #H1 "(%[tmp]) \n\t" \
++ "sw %[temp6], " #F1 "(%[tmp]) \n\t" \
++ "sw %[temp4], " #G1 "(%[tmp]) \n\t"
+
+ // macro for one vertical pass in Disto4x4 (TTransform)
+ // two calls of function TTransform are merged into single one
+@@ -300,10 +300,10 @@
+ // A1..D1 - offsets in bytes to load second results from tmp buffer
+ // E..H - offsets in bytes to load from w buffer
+ #define VERTICAL_PASS(A, B, C, D, A1, B1, C1, D1, E, F, G, H) \
+- "lw %[temp0], "#A1"(%[tmp]) \n\t" \
+- "lw %[temp1], "#C1"(%[tmp]) \n\t" \
+- "lw %[temp2], "#B1"(%[tmp]) \n\t" \
+- "lw %[temp3], "#D1"(%[tmp]) \n\t" \
++ "lw %[temp0], " #A1 "(%[tmp]) \n\t" \
++ "lw %[temp1], " #C1 "(%[tmp]) \n\t" \
++ "lw %[temp2], " #B1 "(%[tmp]) \n\t" \
++ "lw %[temp3], " #D1 "(%[tmp]) \n\t" \
+ "addu %[temp8], %[temp0], %[temp1] \n\t" \
+ "subu %[temp0], %[temp0], %[temp1] \n\t" \
+ "addu %[temp1], %[temp2], %[temp3] \n\t" \
+@@ -324,18 +324,18 @@
+ "subu %[temp1], %[temp1], %[temp5] \n\t" \
+ "subu %[temp0], %[temp0], %[temp6] \n\t" \
+ "subu %[temp8], %[temp8], %[temp7] \n\t" \
+- "lhu %[temp4], "#E"(%[w]) \n\t" \
+- "lhu %[temp5], "#F"(%[w]) \n\t" \
+- "lhu %[temp6], "#G"(%[w]) \n\t" \
+- "lhu %[temp7], "#H"(%[w]) \n\t" \
++ "lhu %[temp4], " #E "(%[w]) \n\t" \
++ "lhu %[temp5], " #F "(%[w]) \n\t" \
++ "lhu %[temp6], " #G "(%[w]) \n\t" \
++ "lhu %[temp7], " #H "(%[w]) \n\t" \
+ "madd %[temp4], %[temp3] \n\t" \
+ "madd %[temp5], %[temp1] \n\t" \
+ "madd %[temp6], %[temp0] \n\t" \
+ "madd %[temp7], %[temp8] \n\t" \
+- "lw %[temp0], "#A"(%[tmp]) \n\t" \
+- "lw %[temp1], "#C"(%[tmp]) \n\t" \
+- "lw %[temp2], "#B"(%[tmp]) \n\t" \
+- "lw %[temp3], "#D"(%[tmp]) \n\t" \
++ "lw %[temp0], " #A "(%[tmp]) \n\t" \
++ "lw %[temp1], " #C "(%[tmp]) \n\t" \
++ "lw %[temp2], " #B "(%[tmp]) \n\t" \
++ "lw %[temp3], " #D "(%[tmp]) \n\t" \
+ "addu %[temp8], %[temp0], %[temp1] \n\t" \
+ "subu %[temp0], %[temp0], %[temp1] \n\t" \
+ "addu %[temp1], %[temp2], %[temp3] \n\t" \
+@@ -412,71 +412,71 @@
+ // temp0..temp15 holds tmp[0]..tmp[15]
+ // A - offset in bytes to load from src and ref buffers
+ // TEMP0..TEMP3 - registers for corresponding tmp elements
+-#define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3) \
+- "lw %["#TEMP1"], 0(%[args]) \n\t" \
+- "lw %["#TEMP2"], 4(%[args]) \n\t" \
+- "lbu %[temp16], 0+"XSTR(BPS)"*"#A"(%["#TEMP1"]) \n\t" \
+- "lbu %[temp17], 0+"XSTR(BPS)"*"#A"(%["#TEMP2"]) \n\t" \
+- "lbu %[temp18], 1+"XSTR(BPS)"*"#A"(%["#TEMP1"]) \n\t" \
+- "lbu %[temp19], 1+"XSTR(BPS)"*"#A"(%["#TEMP2"]) \n\t" \
+- "subu %[temp20], %[temp16], %[temp17] \n\t" \
+- "lbu %[temp16], 2+"XSTR(BPS)"*"#A"(%["#TEMP1"]) \n\t" \
+- "lbu %[temp17], 2+"XSTR(BPS)"*"#A"(%["#TEMP2"]) \n\t" \
+- "subu %["#TEMP0"], %[temp18], %[temp19] \n\t" \
+- "lbu %[temp18], 3+"XSTR(BPS)"*"#A"(%["#TEMP1"]) \n\t" \
+- "lbu %[temp19], 3+"XSTR(BPS)"*"#A"(%["#TEMP2"]) \n\t" \
+- "subu %["#TEMP1"], %[temp16], %[temp17] \n\t" \
+- "subu %["#TEMP2"], %[temp18], %[temp19] \n\t" \
+- "addu %["#TEMP3"], %[temp20], %["#TEMP2"] \n\t" \
+- "subu %["#TEMP2"], %[temp20], %["#TEMP2"] \n\t" \
+- "addu %[temp20], %["#TEMP0"], %["#TEMP1"] \n\t" \
+- "subu %["#TEMP0"], %["#TEMP0"], %["#TEMP1"] \n\t" \
+- "mul %[temp16], %["#TEMP2"], %[c5352] \n\t" \
+- "mul %[temp17], %["#TEMP2"], %[c2217] \n\t" \
+- "mul %[temp18], %["#TEMP0"], %[c5352] \n\t" \
+- "mul %[temp19], %["#TEMP0"], %[c2217] \n\t" \
+- "addu %["#TEMP1"], %["#TEMP3"], %[temp20] \n\t" \
+- "subu %[temp20], %["#TEMP3"], %[temp20] \n\t" \
+- "sll %["#TEMP0"], %["#TEMP1"], 3 \n\t" \
+- "sll %["#TEMP2"], %[temp20], 3 \n\t" \
+- "addiu %[temp16], %[temp16], 1812 \n\t" \
+- "addiu %[temp17], %[temp17], 937 \n\t" \
+- "addu %[temp16], %[temp16], %[temp19] \n\t" \
+- "subu %[temp17], %[temp17], %[temp18] \n\t" \
+- "sra %["#TEMP1"], %[temp16], 9 \n\t" \
+- "sra %["#TEMP3"], %[temp17], 9 \n\t"
++#define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3) \
++ "lw %[" #TEMP1 "], 0(%[args]) \n\t" \
++ "lw %[" #TEMP2 "], 4(%[args]) \n\t" \
++ "lbu %[temp16], 0+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
++ "lbu %[temp17], 0+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \
++ "lbu %[temp18], 1+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
++ "lbu %[temp19], 1+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \
++ "subu %[temp20], %[temp16], %[temp17] \n\t" \
++ "lbu %[temp16], 2+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
++ "lbu %[temp17], 2+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \
++ "subu %[" #TEMP0 "], %[temp18], %[temp19] \n\t" \
++ "lbu %[temp18], 3+"XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
++ "lbu %[temp19], 3+"XSTR(BPS)"*" #A "(%[" #TEMP2 "]) \n\t" \
++ "subu %[" #TEMP1 "], %[temp16], %[temp17] \n\t" \
++ "subu %[" #TEMP2 "], %[temp18], %[temp19] \n\t" \
++ "addu %[" #TEMP3 "], %[temp20], %[" #TEMP2 "] \n\t" \
++ "subu %[" #TEMP2 "], %[temp20], %[" #TEMP2 "] \n\t" \
++ "addu %[temp20], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
++ "subu %[" #TEMP0 "], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
++ "mul %[temp16], %[" #TEMP2 "], %[c5352] \n\t" \
++ "mul %[temp17], %[" #TEMP2 "], %[c2217] \n\t" \
++ "mul %[temp18], %[" #TEMP0 "], %[c5352] \n\t" \
++ "mul %[temp19], %[" #TEMP0 "], %[c2217] \n\t" \
++ "addu %[" #TEMP1 "], %[" #TEMP3 "], %[temp20] \n\t" \
++ "subu %[temp20], %[" #TEMP3 "], %[temp20] \n\t" \
++ "sll %[" #TEMP0 "], %[" #TEMP1 "], 3 \n\t" \
++ "sll %[" #TEMP2 "], %[temp20], 3 \n\t" \
++ "addiu %[temp16], %[temp16], 1812 \n\t" \
++ "addiu %[temp17], %[temp17], 937 \n\t" \
++ "addu %[temp16], %[temp16], %[temp19] \n\t" \
++ "subu %[temp17], %[temp17], %[temp18] \n\t" \
++ "sra %[" #TEMP1 "], %[temp16], 9 \n\t" \
++ "sra %[" #TEMP3 "], %[temp17], 9 \n\t"
+
+ // macro for one vertical pass in FTransform
+ // temp0..temp15 holds tmp[0]..tmp[15]
+ // A..D - offsets in bytes to store to out buffer
+ // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
+-#define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \
+- "addu %[temp16], %["#TEMP0"], %["#TEMP12"] \n\t" \
+- "subu %[temp19], %["#TEMP0"], %["#TEMP12"] \n\t" \
+- "addu %[temp17], %["#TEMP4"], %["#TEMP8"] \n\t" \
+- "subu %[temp18], %["#TEMP4"], %["#TEMP8"] \n\t" \
+- "mul %["#TEMP8"], %[temp19], %[c2217] \n\t" \
+- "mul %["#TEMP12"], %[temp18], %[c2217] \n\t" \
+- "mul %["#TEMP4"], %[temp19], %[c5352] \n\t" \
+- "mul %[temp18], %[temp18], %[c5352] \n\t" \
+- "addiu %[temp16], %[temp16], 7 \n\t" \
+- "addu %["#TEMP0"], %[temp16], %[temp17] \n\t" \
+- "sra %["#TEMP0"], %["#TEMP0"], 4 \n\t" \
+- "addu %["#TEMP12"], %["#TEMP12"], %["#TEMP4"] \n\t" \
+- "subu %["#TEMP4"], %[temp16], %[temp17] \n\t" \
+- "sra %["#TEMP4"], %["#TEMP4"], 4 \n\t" \
+- "addiu %["#TEMP8"], %["#TEMP8"], 30000 \n\t" \
+- "addiu %["#TEMP12"], %["#TEMP12"], 12000 \n\t" \
+- "addiu %["#TEMP8"], %["#TEMP8"], 21000 \n\t" \
+- "subu %["#TEMP8"], %["#TEMP8"], %[temp18] \n\t" \
+- "sra %["#TEMP12"], %["#TEMP12"], 16 \n\t" \
+- "sra %["#TEMP8"], %["#TEMP8"], 16 \n\t" \
+- "addiu %[temp16], %["#TEMP12"], 1 \n\t" \
+- "movn %["#TEMP12"], %[temp16], %[temp19] \n\t" \
+- "sh %["#TEMP0"], "#A"(%[temp20]) \n\t" \
+- "sh %["#TEMP4"], "#C"(%[temp20]) \n\t" \
+- "sh %["#TEMP8"], "#D"(%[temp20]) \n\t" \
+- "sh %["#TEMP12"], "#B"(%[temp20]) \n\t"
++#define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \
++ "addu %[temp16], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \
++ "subu %[temp19], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \
++ "addu %[temp17], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \
++ "subu %[temp18], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \
++ "mul %[" #TEMP8 "], %[temp19], %[c2217] \n\t" \
++ "mul %[" #TEMP12 "], %[temp18], %[c2217] \n\t" \
++ "mul %[" #TEMP4 "], %[temp19], %[c5352] \n\t" \
++ "mul %[temp18], %[temp18], %[c5352] \n\t" \
++ "addiu %[temp16], %[temp16], 7 \n\t" \
++ "addu %[" #TEMP0 "], %[temp16], %[temp17] \n\t" \
++ "sra %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \
++ "addu %[" #TEMP12 "], %[" #TEMP12 "], %[" #TEMP4 "] \n\t" \
++ "subu %[" #TEMP4 "], %[temp16], %[temp17] \n\t" \
++ "sra %[" #TEMP4 "], %[" #TEMP4 "], 4 \n\t" \
++ "addiu %[" #TEMP8 "], %[" #TEMP8 "], 30000 \n\t" \
++ "addiu %[" #TEMP12 "], %[" #TEMP12 "], 12000 \n\t" \
++ "addiu %[" #TEMP8 "], %[" #TEMP8 "], 21000 \n\t" \
++ "subu %[" #TEMP8 "], %[" #TEMP8 "], %[temp18] \n\t" \
++ "sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \
++ "sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \
++ "addiu %[temp16], %[" #TEMP12 "], 1 \n\t" \
++ "movn %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \
++ "sh %[" #TEMP0 "], " #A "(%[temp20]) \n\t" \
++ "sh %[" #TEMP4 "], " #C "(%[temp20]) \n\t" \
++ "sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
++ "sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
+
+ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+ int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+@@ -516,14 +516,14 @@
+ #if !defined(WORK_AROUND_GCC)
+
+ #define GET_SSE_INNER(A, B, C, D) \
+- "lbu %[temp0], "#A"(%[a]) \n\t" \
+- "lbu %[temp1], "#A"(%[b]) \n\t" \
+- "lbu %[temp2], "#B"(%[a]) \n\t" \
+- "lbu %[temp3], "#B"(%[b]) \n\t" \
+- "lbu %[temp4], "#C"(%[a]) \n\t" \
+- "lbu %[temp5], "#C"(%[b]) \n\t" \
+- "lbu %[temp6], "#D"(%[a]) \n\t" \
+- "lbu %[temp7], "#D"(%[b]) \n\t" \
++ "lbu %[temp0], " #A "(%[a]) \n\t" \
++ "lbu %[temp1], " #A "(%[b]) \n\t" \
++ "lbu %[temp2], " #B "(%[a]) \n\t" \
++ "lbu %[temp3], " #B "(%[b]) \n\t" \
++ "lbu %[temp4], " #C "(%[a]) \n\t" \
++ "lbu %[temp5], " #C "(%[b]) \n\t" \
++ "lbu %[temp6], " #D "(%[a]) \n\t" \
++ "lbu %[temp7], " #D "(%[b]) \n\t" \
+ "subu %[temp0], %[temp0], %[temp1] \n\t" \
+ "subu %[temp2], %[temp2], %[temp3] \n\t" \
+ "subu %[temp4], %[temp4], %[temp5] \n\t" \
+diff --git a/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c
+index 56db07c..44f6fd2 100644
+--- a/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c
++++ b/Source/LibWebP/src/dsp/dsp.enc_mips_dsp_r2.c
+@@ -27,25 +27,25 @@
+ // I - input (macro doesn't change it)
+ #define ADD_SUB_HALVES_X4(O0, O1, O2, O3, O4, O5, O6, O7, \
+ I0, I1, I2, I3, I4, I5, I6, I7) \
+- "addq.ph %["#O0"], %["#I0"], %["#I1"] \n\t" \
+- "subq.ph %["#O1"], %["#I0"], %["#I1"] \n\t" \
+- "addq.ph %["#O2"], %["#I2"], %["#I3"] \n\t" \
+- "subq.ph %["#O3"], %["#I2"], %["#I3"] \n\t" \
+- "addq.ph %["#O4"], %["#I4"], %["#I5"] \n\t" \
+- "subq.ph %["#O5"], %["#I4"], %["#I5"] \n\t" \
+- "addq.ph %["#O6"], %["#I6"], %["#I7"] \n\t" \
+- "subq.ph %["#O7"], %["#I6"], %["#I7"] \n\t"
++ "addq.ph %[" #O0 "], %[" #I0 "], %[" #I1 "] \n\t" \
++ "subq.ph %[" #O1 "], %[" #I0 "], %[" #I1 "] \n\t" \
++ "addq.ph %[" #O2 "], %[" #I2 "], %[" #I3 "] \n\t" \
++ "subq.ph %[" #O3 "], %[" #I2 "], %[" #I3 "] \n\t" \
++ "addq.ph %[" #O4 "], %[" #I4 "], %[" #I5 "] \n\t" \
++ "subq.ph %[" #O5 "], %[" #I4 "], %[" #I5 "] \n\t" \
++ "addq.ph %[" #O6 "], %[" #I6 "], %[" #I7 "] \n\t" \
++ "subq.ph %[" #O7 "], %[" #I6 "], %[" #I7 "] \n\t"
+
+ // IO - input/output
+ #define ABS_X8(IO0, IO1, IO2, IO3, IO4, IO5, IO6, IO7) \
+- "absq_s.ph %["#IO0"], %["#IO0"] \n\t" \
+- "absq_s.ph %["#IO1"], %["#IO1"] \n\t" \
+- "absq_s.ph %["#IO2"], %["#IO2"] \n\t" \
+- "absq_s.ph %["#IO3"], %["#IO3"] \n\t" \
+- "absq_s.ph %["#IO4"], %["#IO4"] \n\t" \
+- "absq_s.ph %["#IO5"], %["#IO5"] \n\t" \
+- "absq_s.ph %["#IO6"], %["#IO6"] \n\t" \
+- "absq_s.ph %["#IO7"], %["#IO7"] \n\t"
++ "absq_s.ph %[" #IO0 "], %[" #IO0 "] \n\t" \
++ "absq_s.ph %[" #IO1 "], %[" #IO1 "] \n\t" \
++ "absq_s.ph %[" #IO2 "], %[" #IO2 "] \n\t" \
++ "absq_s.ph %[" #IO3 "], %[" #IO3 "] \n\t" \
++ "absq_s.ph %[" #IO4 "], %[" #IO4 "] \n\t" \
++ "absq_s.ph %[" #IO5 "], %[" #IO5 "] \n\t" \
++ "absq_s.ph %[" #IO6 "], %[" #IO6 "] \n\t" \
++ "absq_s.ph %[" #IO7 "], %[" #IO7 "] \n\t"
+
+ // dpa.w.ph $ac0 temp0 ,temp1
+ // $ac += temp0[31..16] * temp1[31..16] + temp0[15..0] * temp1[15..0]
+@@ -56,15 +56,15 @@
+ #define MUL_HALF(O0, I0, I1, I2, I3, I4, I5, I6, I7, \
+ I8, I9, I10, I11, I12, I13, I14, I15) \
+ "mult $ac0, $zero, $zero \n\t" \
+- "dpa.w.ph $ac0, %["#I2"], %["#I0"] \n\t" \
+- "dpax.w.ph $ac0, %["#I5"], %["#I6"] \n\t" \
+- "dpa.w.ph $ac0, %["#I8"], %["#I9"] \n\t" \
+- "dpax.w.ph $ac0, %["#I11"], %["#I4"] \n\t" \
+- "dpa.w.ph $ac0, %["#I12"], %["#I7"] \n\t" \
+- "dpax.w.ph $ac0, %["#I13"], %["#I1"] \n\t" \
+- "dpa.w.ph $ac0, %["#I14"], %["#I3"] \n\t" \
+- "dpax.w.ph $ac0, %["#I15"], %["#I10"] \n\t" \
+- "mflo %["#O0"], $ac0 \n\t"
++ "dpa.w.ph $ac0, %[" #I2 "], %[" #I0 "] \n\t" \
++ "dpax.w.ph $ac0, %[" #I5 "], %[" #I6 "] \n\t" \
++ "dpa.w.ph $ac0, %[" #I8 "], %[" #I9 "] \n\t" \
++ "dpax.w.ph $ac0, %[" #I11 "], %[" #I4 "] \n\t" \
++ "dpa.w.ph $ac0, %[" #I12 "], %[" #I7 "] \n\t" \
++ "dpax.w.ph $ac0, %[" #I13 "], %[" #I1 "] \n\t" \
++ "dpa.w.ph $ac0, %[" #I14 "], %[" #I3 "] \n\t" \
++ "dpax.w.ph $ac0, %[" #I15 "], %[" #I10 "] \n\t" \
++ "mflo %[" #O0 "], $ac0 \n\t"
+
+ #define OUTPUT_EARLY_CLOBBER_REGS_17() \
+ OUTPUT_EARLY_CLOBBER_REGS_10(), \
+@@ -77,69 +77,69 @@
+ // A - offset in bytes to load from src and ref buffers
+ // TEMP0..TEMP3 - registers for corresponding tmp elements
+ #define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3) \
+- "lw %["#TEMP0"], 0(%[args]) \n\t" \
+- "lw %["#TEMP1"], 4(%[args]) \n\t" \
+- "lw %["#TEMP2"], "XSTR(BPS)"*"#A"(%["#TEMP0"]) \n\t" \
+- "lw %["#TEMP3"], "XSTR(BPS)"*"#A"(%["#TEMP1"]) \n\t" \
+- "preceu.ph.qbl %["#TEMP0"], %["#TEMP2"] \n\t" \
+- "preceu.ph.qbl %["#TEMP1"], %["#TEMP3"] \n\t" \
+- "preceu.ph.qbr %["#TEMP2"], %["#TEMP2"] \n\t" \
+- "preceu.ph.qbr %["#TEMP3"], %["#TEMP3"] \n\t" \
+- "subq.ph %["#TEMP0"], %["#TEMP0"], %["#TEMP1"] \n\t" \
+- "subq.ph %["#TEMP2"], %["#TEMP2"], %["#TEMP3"] \n\t" \
+- "rotr %["#TEMP0"], %["#TEMP0"], 16 \n\t" \
+- "addq.ph %["#TEMP1"], %["#TEMP2"], %["#TEMP0"] \n\t" \
+- "subq.ph %["#TEMP3"], %["#TEMP2"], %["#TEMP0"] \n\t" \
+- "seh %["#TEMP0"], %["#TEMP1"] \n\t" \
+- "sra %[temp16], %["#TEMP1"], 16 \n\t" \
+- "seh %[temp19], %["#TEMP3"] \n\t" \
+- "sra %["#TEMP3"], %["#TEMP3"], 16 \n\t" \
+- "subu %["#TEMP2"], %["#TEMP0"], %[temp16] \n\t" \
+- "addu %["#TEMP0"], %["#TEMP0"], %[temp16] \n\t" \
+- "mul %[temp17], %[temp19], %[c2217] \n\t" \
+- "mul %[temp18], %["#TEMP3"], %[c5352] \n\t" \
+- "mul %["#TEMP1"], %[temp19], %[c5352] \n\t" \
+- "mul %[temp16], %["#TEMP3"], %[c2217] \n\t" \
+- "sll %["#TEMP2"], %["#TEMP2"], 3 \n\t" \
+- "sll %["#TEMP0"], %["#TEMP0"], 3 \n\t" \
+- "subu %["#TEMP3"], %[temp17], %[temp18] \n\t" \
+- "addu %["#TEMP1"], %[temp16], %["#TEMP1"] \n\t" \
+- "addiu %["#TEMP3"], %["#TEMP3"], 937 \n\t" \
+- "addiu %["#TEMP1"], %["#TEMP1"], 1812 \n\t" \
+- "sra %["#TEMP3"], %["#TEMP3"], 9 \n\t" \
+- "sra %["#TEMP1"], %["#TEMP1"], 9 \n\t"
++ "lw %[" #TEMP0 "], 0(%[args]) \n\t" \
++ "lw %[" #TEMP1 "], 4(%[args]) \n\t" \
++ "lw %[" #TEMP2 "], "XSTR(BPS)"*" #A "(%[" #TEMP0 "]) \n\t" \
++ "lw %[" #TEMP3 "], "XSTR(BPS)"*" #A "(%[" #TEMP1 "]) \n\t" \
++ "preceu.ph.qbl %[" #TEMP0 "], %[" #TEMP2 "] \n\t" \
++ "preceu.ph.qbl %[" #TEMP1 "], %[" #TEMP3 "] \n\t" \
++ "preceu.ph.qbr %[" #TEMP2 "], %[" #TEMP2 "] \n\t" \
++ "preceu.ph.qbr %[" #TEMP3 "], %[" #TEMP3 "] \n\t" \
++ "subq.ph %[" #TEMP0 "], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
++ "subq.ph %[" #TEMP2 "], %[" #TEMP2 "], %[" #TEMP3 "] \n\t" \
++ "rotr %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\t" \
++ "addq.ph %[" #TEMP1 "], %[" #TEMP2 "], %[" #TEMP0 "] \n\t" \
++ "subq.ph %[" #TEMP3 "], %[" #TEMP2 "], %[" #TEMP0 "] \n\t" \
++ "seh %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
++ "sra %[temp16], %[" #TEMP1 "], 16 \n\t" \
++ "seh %[temp19], %[" #TEMP3 "] \n\t" \
++ "sra %[" #TEMP3 "], %[" #TEMP3 "], 16 \n\t" \
++ "subu %[" #TEMP2 "], %[" #TEMP0 "], %[temp16] \n\t" \
++ "addu %[" #TEMP0 "], %[" #TEMP0 "], %[temp16] \n\t" \
++ "mul %[temp17], %[temp19], %[c2217] \n\t" \
++ "mul %[temp18], %[" #TEMP3 "], %[c5352] \n\t" \
++ "mul %[" #TEMP1 "], %[temp19], %[c5352] \n\t" \
++ "mul %[temp16], %[" #TEMP3 "], %[c2217] \n\t" \
++ "sll %[" #TEMP2 "], %[" #TEMP2 "], 3 \n\t" \
++ "sll %[" #TEMP0 "], %[" #TEMP0 "], 3 \n\t" \
++ "subu %[" #TEMP3 "], %[temp17], %[temp18] \n\t" \
++ "addu %[" #TEMP1 "], %[temp16], %[" #TEMP1 "] \n\t" \
++ "addiu %[" #TEMP3 "], %[" #TEMP3 "], 937 \n\t" \
++ "addiu %[" #TEMP1 "], %[" #TEMP1 "], 1812 \n\t" \
++ "sra %[" #TEMP3 "], %[" #TEMP3 "], 9 \n\t" \
++ "sra %[" #TEMP1 "], %[" #TEMP1 "], 9 \n\t"
+
+ // macro for one vertical pass in FTransform
+ // temp0..temp15 holds tmp[0]..tmp[15]
+ // A..D - offsets in bytes to store to out buffer
+ // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
+ #define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \
+- "addu %[temp16], %["#TEMP0"], %["#TEMP12"] \n\t" \
+- "subu %[temp19], %["#TEMP0"], %["#TEMP12"] \n\t" \
+- "addu %[temp17], %["#TEMP4"], %["#TEMP8"] \n\t" \
+- "subu %[temp18], %["#TEMP4"], %["#TEMP8"] \n\t" \
+- "mul %["#TEMP8"], %[temp19], %[c2217] \n\t" \
+- "mul %["#TEMP12"], %[temp18], %[c2217] \n\t" \
+- "mul %["#TEMP4"], %[temp19], %[c5352] \n\t" \
+- "mul %[temp18], %[temp18], %[c5352] \n\t" \
+- "addiu %[temp16], %[temp16], 7 \n\t" \
+- "addu %["#TEMP0"], %[temp16], %[temp17] \n\t" \
+- "sra %["#TEMP0"], %["#TEMP0"], 4 \n\t" \
+- "addu %["#TEMP12"], %["#TEMP12"], %["#TEMP4"] \n\t" \
+- "subu %["#TEMP4"], %[temp16], %[temp17] \n\t" \
+- "sra %["#TEMP4"], %["#TEMP4"], 4 \n\t" \
+- "addiu %["#TEMP8"], %["#TEMP8"], 30000 \n\t" \
+- "addiu %["#TEMP12"], %["#TEMP12"], 12000 \n\t" \
+- "addiu %["#TEMP8"], %["#TEMP8"], 21000 \n\t" \
+- "subu %["#TEMP8"], %["#TEMP8"], %[temp18] \n\t" \
+- "sra %["#TEMP12"], %["#TEMP12"], 16 \n\t" \
+- "sra %["#TEMP8"], %["#TEMP8"], 16 \n\t" \
+- "addiu %[temp16], %["#TEMP12"], 1 \n\t" \
+- "movn %["#TEMP12"], %[temp16], %[temp19] \n\t" \
+- "sh %["#TEMP0"], "#A"(%[temp20]) \n\t" \
+- "sh %["#TEMP4"], "#C"(%[temp20]) \n\t" \
+- "sh %["#TEMP8"], "#D"(%[temp20]) \n\t" \
+- "sh %["#TEMP12"], "#B"(%[temp20]) \n\t"
++ "addu %[temp16], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \
++ "subu %[temp19], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \
++ "addu %[temp17], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \
++ "subu %[temp18], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \
++ "mul %[" #TEMP8 "], %[temp19], %[c2217] \n\t" \
++ "mul %[" #TEMP12 "], %[temp18], %[c2217] \n\t" \
++ "mul %[" #TEMP4 "], %[temp19], %[c5352] \n\t" \
++ "mul %[temp18], %[temp18], %[c5352] \n\t" \
++ "addiu %[temp16], %[temp16], 7 \n\t" \
++ "addu %[" #TEMP0 "], %[temp16], %[temp17] \n\t" \
++ "sra %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \
++ "addu %[" #TEMP12 "], %[" #TEMP12 "], %[" #TEMP4 "] \n\t" \
++ "subu %[" #TEMP4 "], %[temp16], %[temp17] \n\t" \
++ "sra %[" #TEMP4 "], %[" #TEMP4 "], 4 \n\t" \
++ "addiu %[" #TEMP8 "], %[" #TEMP8 "], 30000 \n\t" \
++ "addiu %[" #TEMP12 "], %[" #TEMP12 "], 12000 \n\t" \
++ "addiu %[" #TEMP8 "], %[" #TEMP8 "], 21000 \n\t" \
++ "subu %[" #TEMP8 "], %[" #TEMP8 "], %[temp18] \n\t" \
++ "sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \
++ "sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \
++ "addiu %[temp16], %[" #TEMP12 "], 1 \n\t" \
++ "movn %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \
++ "sh %[" #TEMP0 "], " #A "(%[temp20]) \n\t" \
++ "sh %[" #TEMP4 "], " #C "(%[temp20]) \n\t" \
++ "sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
++ "sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
+
+ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+ const int c2217 = 2217;
+@@ -329,11 +329,11 @@
+ // Intra predictions
+
+ #define FILL_PART(J, SIZE) \
+- "usw %[value], 0+"#J"*"XSTR(BPS)"(%[dst]) \n\t" \
+- "usw %[value], 4+"#J"*"XSTR(BPS)"(%[dst]) \n\t" \
+- ".if "#SIZE" == 16 \n\t" \
+- "usw %[value], 8+"#J"*"XSTR(BPS)"(%[dst]) \n\t" \
+- "usw %[value], 12+"#J"*"XSTR(BPS)"(%[dst]) \n\t" \
++ "usw %[value], 0+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \
++ "usw %[value], 4+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \
++ ".if " #SIZE " == 16 \n\t" \
++ "usw %[value], 8+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \
++ "usw %[value], 12+" #J "*"XSTR(BPS)"(%[dst]) \n\t" \
+ ".endif \n\t"
+
+ #define FILL_8_OR_16(DST, VALUE, SIZE) do { \
+@@ -348,7 +348,7 @@
+ FILL_PART( 5, SIZE) \
+ FILL_PART( 6, SIZE) \
+ FILL_PART( 7, SIZE) \
+- ".if "#SIZE" == 16 \n\t" \
++ ".if " #SIZE " == 16 \n\t" \
+ FILL_PART( 8, 16) \
+ FILL_PART( 9, 16) \
+ FILL_PART(10, 16) \
+@@ -425,7 +425,7 @@
+ CLIPPING() \
+ "usw %[temp0], 0(%[dst]) \n\t" \
+ "usw %[temp1], 4(%[dst]) \n\t" \
+- ".if "#SIZE" == 16 \n\t" \
++ ".if " #SIZE " == 16 \n\t" \
+ "ulw %[temp0], 8(%[top]) \n\t" \
+ "ulw %[temp1], 12(%[top]) \n\t" \
+ CLIPPING() \
+@@ -1060,8 +1060,8 @@
+ #if !defined(WORK_AROUND_GCC)
+
+ #define GET_SSE_INNER(A) \
+- "lw %[temp0], "#A"(%[a]) \n\t" \
+- "lw %[temp1], "#A"(%[b]) \n\t" \
++ "lw %[temp0], " #A "(%[a]) \n\t" \
++ "lw %[temp1], " #A "(%[b]) \n\t" \
+ "preceu.ph.qbr %[temp2], %[temp0] \n\t" \
+ "preceu.ph.qbl %[temp0], %[temp0] \n\t" \
+ "preceu.ph.qbr %[temp3], %[temp1] \n\t" \
+@@ -1185,28 +1185,28 @@
+ // N - offset in bytes (n * 2)
+ // N1 - offset in bytes ((n + 1) * 2)
+ #define QUANTIZE_ONE(J, K, N, N1) \
+- "ulw %[temp1], "#J"(%[ppin]) \n\t" \
+- "ulw %[temp2], "#J"(%[ppsharpen]) \n\t" \
+- "lhu %[temp3], "#K"(%[ppzthresh]) \n\t" \
+- "lhu %[temp6], "#K"+4(%[ppzthresh]) \n\t" \
++ "ulw %[temp1], " #J "(%[ppin]) \n\t" \
++ "ulw %[temp2], " #J "(%[ppsharpen]) \n\t" \
++ "lhu %[temp3], " #K "(%[ppzthresh]) \n\t" \
++ "lhu %[temp6], " #K "+4(%[ppzthresh]) \n\t" \
+ "absq_s.ph %[temp4], %[temp1] \n\t" \
+ "ins %[temp3], %[temp6], 16, 16 \n\t" \
+ "addu.ph %[coeff], %[temp4], %[temp2] \n\t" \
+ "shra.ph %[sign], %[temp1], 15 \n\t" \
+ "li %[level], 0x10001 \n\t" \
+ "cmp.lt.ph %[temp3], %[coeff] \n\t" \
+- "lhu %[temp1], "#J"(%[ppiq]) \n\t" \
++ "lhu %[temp1], " #J "(%[ppiq]) \n\t" \
+ "pick.ph %[temp5], %[level], $0 \n\t" \
+- "lw %[temp2], "#K"(%[ppbias]) \n\t" \
++ "lw %[temp2], " #K "(%[ppbias]) \n\t" \
+ "beqz %[temp5], 0f \n\t" \
+- "lhu %[temp3], "#J"(%[ppq]) \n\t" \
++ "lhu %[temp3], " #J "(%[ppq]) \n\t" \
+ "beq %[temp5], %[level], 1f \n\t" \
+ "andi %[temp5], %[temp5], 0x1 \n\t" \
+ "andi %[temp4], %[coeff], 0xffff \n\t" \
+ "beqz %[temp5], 2f \n\t" \
+ "mul %[level], %[temp4], %[temp1] \n\t" \
+- "sh $0, "#J"+2(%[ppin]) \n\t" \
+- "sh $0, "#N1"(%[pout]) \n\t" \
++ "sh $0, " #J "+2(%[ppin]) \n\t" \
++ "sh $0, " #N1 "(%[pout]) \n\t" \
+ "addu %[level], %[level], %[temp2] \n\t" \
+ "sra %[level], %[level], 17 \n\t" \
+ "slt %[temp4], %[max_level], %[level] \n\t" \
+@@ -1216,15 +1216,15 @@
+ "subu %[level], %[level], %[temp6] \n\t" \
+ "mul %[temp5], %[level], %[temp3] \n\t" \
+ "or %[ret], %[ret], %[level] \n\t" \
+- "sh %[level], "#N"(%[pout]) \n\t" \
+- "sh %[temp5], "#J"(%[ppin]) \n\t" \
++ "sh %[level], " #N "(%[pout]) \n\t" \
++ "sh %[temp5], " #J "(%[ppin]) \n\t" \
+ "j 3f \n\t" \
+ "2: \n\t" \
+- "lhu %[temp1], "#J"+2(%[ppiq]) \n\t" \
++ "lhu %[temp1], " #J "+2(%[ppiq]) \n\t" \
+ "srl %[temp5], %[coeff], 16 \n\t" \
+ "mul %[level], %[temp5], %[temp1] \n\t" \
+- "lw %[temp2], "#K"+4(%[ppbias]) \n\t" \
+- "lhu %[temp3], "#J"+2(%[ppq]) \n\t" \
++ "lw %[temp2], " #K "+4(%[ppbias]) \n\t" \
++ "lhu %[temp3], " #J "+2(%[ppq]) \n\t" \
+ "addu %[level], %[level], %[temp2] \n\t" \
+ "sra %[level], %[level], 17 \n\t" \
+ "srl %[temp6], %[sign], 16 \n\t" \
+@@ -1233,20 +1233,20 @@
+ "xor %[level], %[level], %[temp6] \n\t" \
+ "subu %[level], %[level], %[temp6] \n\t" \
+ "mul %[temp5], %[level], %[temp3] \n\t" \
+- "sh $0, "#J"(%[ppin]) \n\t" \
+- "sh $0, "#N"(%[pout]) \n\t" \
++ "sh $0, " #J "(%[ppin]) \n\t" \
++ "sh $0, " #N "(%[pout]) \n\t" \
+ "or %[ret], %[ret], %[level] \n\t" \
+- "sh %[temp5], "#J"+2(%[ppin]) \n\t" \
+- "sh %[level], "#N1"(%[pout]) \n\t" \
++ "sh %[temp5], " #J "+2(%[ppin]) \n\t" \
++ "sh %[level], " #N1 "(%[pout]) \n\t" \
+ "j 3f \n\t" \
+ "1: \n\t" \
+- "lhu %[temp1], "#J"(%[ppiq]) \n\t" \
+- "lw %[temp2], "#K"(%[ppbias]) \n\t" \
+- "ulw %[temp3], "#J"(%[ppq]) \n\t" \
++ "lhu %[temp1], " #J "(%[ppiq]) \n\t" \
++ "lw %[temp2], " #K "(%[ppbias]) \n\t" \
++ "ulw %[temp3], " #J "(%[ppq]) \n\t" \
+ "andi %[temp5], %[coeff], 0xffff \n\t" \
+ "srl %[temp0], %[coeff], 16 \n\t" \
+- "lhu %[temp6], "#J"+2(%[ppiq]) \n\t" \
+- "lw %[coeff], "#K"+4(%[ppbias]) \n\t" \
++ "lhu %[temp6], " #J "+2(%[ppiq]) \n\t" \
++ "lw %[coeff], " #K "+4(%[ppbias]) \n\t" \
+ "mul %[level], %[temp5], %[temp1] \n\t" \
+ "mul %[temp4], %[temp0], %[temp6] \n\t" \
+ "addu %[level], %[level], %[temp2] \n\t" \
+@@ -1259,15 +1259,15 @@
+ "subu.ph %[level], %[level], %[sign] \n\t" \
+ "mul.ph %[temp3], %[level], %[temp3] \n\t" \
+ "or %[ret], %[ret], %[level] \n\t" \
+- "sh %[level], "#N"(%[pout]) \n\t" \
++ "sh %[level], " #N "(%[pout]) \n\t" \
+ "srl %[level], %[level], 16 \n\t" \
+- "sh %[level], "#N1"(%[pout]) \n\t" \
+- "usw %[temp3], "#J"(%[ppin]) \n\t" \
++ "sh %[level], " #N1 "(%[pout]) \n\t" \
++ "usw %[temp3], " #J "(%[ppin]) \n\t" \
+ "j 3f \n\t" \
+ "0: \n\t" \
+- "sh $0, "#N"(%[pout]) \n\t" \
+- "sh $0, "#N1"(%[pout]) \n\t" \
+- "usw $0, "#J"(%[ppin]) \n\t" \
++ "sh $0, " #N "(%[pout]) \n\t" \
++ "sh $0, " #N1 "(%[pout]) \n\t" \
++ "usw $0, " #J "(%[ppin]) \n\t" \
+ "3: \n\t"
+
+ static int QuantizeBlock(int16_t in[16], int16_t out[16],
+@@ -1326,37 +1326,37 @@
+ // A, B, C, D - offset in bytes to load from in buffer
+ // TEMP0, TEMP1 - registers for corresponding tmp elements
+ #define HORIZONTAL_PASS_WHT(A, B, C, D, TEMP0, TEMP1) \
+- "lh %["#TEMP0"], "#A"(%[in]) \n\t" \
+- "lh %["#TEMP1"], "#B"(%[in]) \n\t" \
+- "lh %[temp8], "#C"(%[in]) \n\t" \
+- "lh %[temp9], "#D"(%[in]) \n\t" \
+- "ins %["#TEMP1"], %["#TEMP0"], 16, 16 \n\t" \
++ "lh %[" #TEMP0 "], " #A "(%[in]) \n\t" \
++ "lh %[" #TEMP1 "], " #B "(%[in]) \n\t" \
++ "lh %[temp8], " #C "(%[in]) \n\t" \
++ "lh %[temp9], " #D "(%[in]) \n\t" \
++ "ins %[" #TEMP1 "], %[" #TEMP0 "], 16, 16 \n\t" \
+ "ins %[temp9], %[temp8], 16, 16 \n\t" \
+- "subq.ph %[temp8], %["#TEMP1"], %[temp9] \n\t" \
+- "addq.ph %[temp9], %["#TEMP1"], %[temp9] \n\t" \
+- "precrq.ph.w %["#TEMP0"], %[temp8], %[temp9] \n\t" \
++ "subq.ph %[temp8], %[" #TEMP1 "], %[temp9] \n\t" \
++ "addq.ph %[temp9], %[" #TEMP1 "], %[temp9] \n\t" \
++ "precrq.ph.w %[" #TEMP0 "], %[temp8], %[temp9] \n\t" \
+ "append %[temp8], %[temp9], 16 \n\t" \
+- "subq.ph %["#TEMP1"], %["#TEMP0"], %[temp8] \n\t" \
+- "addq.ph %["#TEMP0"], %["#TEMP0"], %[temp8] \n\t" \
+- "rotr %["#TEMP1"], %["#TEMP1"], 16 \n\t"
++ "subq.ph %[" #TEMP1 "], %[" #TEMP0 "], %[temp8] \n\t" \
++ "addq.ph %[" #TEMP0 "], %[" #TEMP0 "], %[temp8] \n\t" \
++ "rotr %[" #TEMP1 "], %[" #TEMP1 "], 16 \n\t"
+
+ // macro for one vertical pass in FTransformWHT
+ // temp0..temp7 holds tmp[0]..tmp[15]
+ // A, B, C, D - offsets in bytes to store to out buffer
+ // TEMP0, TEMP2, TEMP4 and TEMP6 - registers for corresponding tmp elements
+ #define VERTICAL_PASS_WHT(A, B, C, D, TEMP0, TEMP2, TEMP4, TEMP6) \
+- "addq.ph %[temp8], %["#TEMP0"], %["#TEMP4"] \n\t" \
+- "addq.ph %[temp9], %["#TEMP2"], %["#TEMP6"] \n\t" \
+- "subq.ph %["#TEMP2"], %["#TEMP2"], %["#TEMP6"] \n\t" \
+- "subq.ph %["#TEMP6"], %["#TEMP0"], %["#TEMP4"] \n\t" \
+- "addqh.ph %["#TEMP0"], %[temp8], %[temp9] \n\t" \
+- "subqh.ph %["#TEMP4"], %["#TEMP6"], %["#TEMP2"] \n\t" \
+- "addqh.ph %["#TEMP2"], %["#TEMP2"], %["#TEMP6"] \n\t" \
+- "subqh.ph %["#TEMP6"], %[temp8], %[temp9] \n\t" \
+- "usw %["#TEMP0"], "#A"(%[out]) \n\t" \
+- "usw %["#TEMP2"], "#B"(%[out]) \n\t" \
+- "usw %["#TEMP4"], "#C"(%[out]) \n\t" \
+- "usw %["#TEMP6"], "#D"(%[out]) \n\t"
++ "addq.ph %[temp8], %[" #TEMP0 "], %[" #TEMP4 "] \n\t" \
++ "addq.ph %[temp9], %[" #TEMP2 "], %[" #TEMP6 "] \n\t" \
++ "subq.ph %[" #TEMP2 "], %[" #TEMP2 "], %[" #TEMP6 "] \n\t" \
++ "subq.ph %[" #TEMP6 "], %[" #TEMP0 "], %[" #TEMP4 "] \n\t" \
++ "addqh.ph %[" #TEMP0 "], %[temp8], %[temp9] \n\t" \
++ "subqh.ph %[" #TEMP4 "], %[" #TEMP6 "], %[" #TEMP2 "] \n\t" \
++ "addqh.ph %[" #TEMP2 "], %[" #TEMP2 "], %[" #TEMP6 "] \n\t" \
++ "subqh.ph %[" #TEMP6 "], %[temp8], %[temp9] \n\t" \
++ "usw %[" #TEMP0 "], " #A "(%[out]) \n\t" \
++ "usw %[" #TEMP2 "], " #B "(%[out]) \n\t" \
++ "usw %[" #TEMP4 "], " #C "(%[out]) \n\t" \
++ "usw %[" #TEMP6 "], " #D "(%[out]) \n\t"
+
+ static void FTransformWHT(const int16_t* in, int16_t* out) {
+ int temp0, temp1, temp2, temp3, temp4;
+@@ -1385,10 +1385,10 @@
+ // convert 8 coeffs at time
+ // A, B, C, D - offsets in bytes to load from out buffer
+ #define CONVERT_COEFFS_TO_BIN(A, B, C, D) \
+- "ulw %[temp0], "#A"(%[out]) \n\t" \
+- "ulw %[temp1], "#B"(%[out]) \n\t" \
+- "ulw %[temp2], "#C"(%[out]) \n\t" \
+- "ulw %[temp3], "#D"(%[out]) \n\t" \
++ "ulw %[temp0], " #A "(%[out]) \n\t" \
++ "ulw %[temp1], " #B "(%[out]) \n\t" \
++ "ulw %[temp2], " #C "(%[out]) \n\t" \
++ "ulw %[temp3], " #D "(%[out]) \n\t" \
+ "absq_s.ph %[temp0], %[temp0] \n\t" \
+ "absq_s.ph %[temp1], %[temp1] \n\t" \
+ "absq_s.ph %[temp2], %[temp2] \n\t" \
+diff --git a/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c
+index 66f807d..8134af5 100644
+--- a/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c
++++ b/Source/LibWebP/src/dsp/dsp.filters_mips_dsp_r2.c
+@@ -48,7 +48,7 @@
+ "srl %[temp0], %[length], 0x2 \n\t" \
+ "beqz %[temp0], 4f \n\t" \
+ " andi %[temp6], %[length], 0x3 \n\t" \
+- ".if "#INVERSE" \n\t" \
++ ".if " #INVERSE " \n\t" \
+ "lbu %[temp1], -1(%[src]) \n\t" \
+ "1: \n\t" \
+ "lbu %[temp2], 0(%[src]) \n\t" \
+@@ -84,7 +84,7 @@
+ "lbu %[temp1], -1(%[src]) \n\t" \
+ "lbu %[temp2], 0(%[src]) \n\t" \
+ "addiu %[src], %[src], 1 \n\t" \
+- ".if "#INVERSE" \n\t" \
++ ".if " #INVERSE " \n\t" \
+ "addu %[temp3], %[temp1], %[temp2] \n\t" \
+ "sb %[temp3], -1(%[src]) \n\t" \
+ ".else \n\t" \
+@@ -131,7 +131,7 @@
+ "ulw %[temp3], 4(%[src]) \n\t" \
+ "ulw %[temp4], 4(%[pred]) \n\t" \
+ "addiu %[src], %[src], 8 \n\t" \
+- ".if "#INVERSE" \n\t" \
++ ".if " #INVERSE " \n\t" \
+ "addu.qb %[temp5], %[temp1], %[temp2] \n\t" \
+ "addu.qb %[temp6], %[temp3], %[temp4] \n\t" \
+ ".else \n\t" \
+@@ -152,7 +152,7 @@
+ "lbu %[temp2], 0(%[pred]) \n\t" \
+ "addiu %[src], %[src], 1 \n\t" \
+ "addiu %[pred], %[pred], 1 \n\t" \
+- ".if "#INVERSE" \n\t" \
++ ".if " #INVERSE " \n\t" \
+ "addu %[temp3], %[temp1], %[temp2] \n\t" \
+ ".else \n\t" \
+ "subu %[temp3], %[temp1], %[temp2] \n\t" \
+@@ -177,7 +177,7 @@
+ __asm__ volatile ( \
+ "lbu %[temp1], 0(%[src]) \n\t" \
+ "lbu %[temp2], 0(%[pred]) \n\t" \
+- ".if "#INVERSE" \n\t" \
++ ".if " #INVERSE " \n\t" \
+ "addu %[temp3], %[temp1], %[temp2] \n\t" \
+ ".else \n\t" \
+ "subu %[temp3], %[temp1], %[temp2] \n\t" \
+diff --git a/Source/LibWebP/src/dsp/dsp.lossless_mips32.c b/Source/LibWebP/src/dsp/dsp.lossless_mips32.c
+index 8ae5958..cdf0e26 100644
+--- a/Source/LibWebP/src/dsp/dsp.lossless_mips32.c
++++ b/Source/LibWebP/src/dsp/dsp.lossless_mips32.c
+
+@@ -278,28 +278,28 @@
+ // literal_ and successive histograms could be unaligned
+ // so we must use ulw and usw
+ #define ADD_TO_OUT(A, B, C, D, E, P0, P1, P2) \
+- "ulw %[temp0], "#A"(%["#P0"]) \n\t" \
+- "ulw %[temp1], "#B"(%["#P0"]) \n\t" \
+- "ulw %[temp2], "#C"(%["#P0"]) \n\t" \
+- "ulw %[temp3], "#D"(%["#P0"]) \n\t" \
+- "ulw %[temp4], "#A"(%["#P1"]) \n\t" \
+- "ulw %[temp5], "#B"(%["#P1"]) \n\t" \
+- "ulw %[temp6], "#C"(%["#P1"]) \n\t" \
+- "ulw %[temp7], "#D"(%["#P1"]) \n\t" \
++ "ulw %[temp0], " #A "(%[" #P0 "]) \n\t" \
++ "ulw %[temp1], " #B "(%[" #P0 "]) \n\t" \
++ "ulw %[temp2], " #C "(%[" #P0 "]) \n\t" \
++ "ulw %[temp3], " #D "(%[" #P0 "]) \n\t" \
++ "ulw %[temp4], " #A "(%[" #P1 "]) \n\t" \
++ "ulw %[temp5], " #B "(%[" #P1 "]) \n\t" \
++ "ulw %[temp6], " #C "(%[" #P1 "]) \n\t" \
++ "ulw %[temp7], " #D "(%[" #P1 "]) \n\t" \
+ "addu %[temp4], %[temp4], %[temp0] \n\t" \
+ "addu %[temp5], %[temp5], %[temp1] \n\t" \
+ "addu %[temp6], %[temp6], %[temp2] \n\t" \
+ "addu %[temp7], %[temp7], %[temp3] \n\t" \
+- "addiu %["#P0"], %["#P0"], 16 \n\t" \
+- ".if "#E" == 1 \n\t" \
+- "addiu %["#P1"], %["#P1"], 16 \n\t" \
++ "addiu %[" #P0 "], %[" #P0 "], 16 \n\t" \
++ ".if " #E " == 1 \n\t" \
++ "addiu %[" #P1 "], %[" #P1 "], 16 \n\t" \
+ ".endif \n\t" \
+- "usw %[temp4], "#A"(%["#P2"]) \n\t" \
+- "usw %[temp5], "#B"(%["#P2"]) \n\t" \
+- "usw %[temp6], "#C"(%["#P2"]) \n\t" \
+- "usw %[temp7], "#D"(%["#P2"]) \n\t" \
+- "addiu %["#P2"], %["#P2"], 16 \n\t" \
+- "bne %["#P0"], %[LoopEnd], 1b \n\t" \
++ "usw %[temp4], " #A "(%[" #P2 "]) \n\t" \
++ "usw %[temp5], " #B "(%[" #P2 "]) \n\t" \
++ "usw %[temp6], " #C "(%[" #P2 "]) \n\t" \
++ "usw %[temp7], " #D "(%[" #P2 "]) \n\t" \
++ "addiu %[" #P2 "], %[" #P2 "], 16 \n\t" \
++ "bne %[" #P0 "], %[LoopEnd], 1b \n\t" \
+ ".set pop \n\t" \
+
+ #define ASM_END_COMMON_0 \
+diff --git a/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c
+index ad55f2c..90aed7f 100644
+--- a/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c
++++ b/Source/LibWebP/src/dsp/dsp.lossless_mips_dsp_r2.c
+@@ -29,14 +29,14 @@
+ for (x = 0; x < (width >> 2); ++x) { \
+ int tmp1, tmp2, tmp3, tmp4; \
+ __asm__ volatile ( \
+- ".ifc "#TYPE", uint8_t \n\t" \
++ ".ifc " #TYPE ", uint8_t \n\t" \
+ "lbu %[tmp1], 0(%[src]) \n\t" \
+ "lbu %[tmp2], 1(%[src]) \n\t" \
+ "lbu %[tmp3], 2(%[src]) \n\t" \
+ "lbu %[tmp4], 3(%[src]) \n\t" \
+ "addiu %[src], %[src], 4 \n\t" \
+ ".endif \n\t" \
+- ".ifc "#TYPE", uint32_t \n\t" \
++ ".ifc " #TYPE ", uint32_t \n\t" \
+ "lw %[tmp1], 0(%[src]) \n\t" \
+ "lw %[tmp2], 4(%[src]) \n\t" \
+ "lw %[tmp3], 8(%[src]) \n\t" \
+@@ -55,7 +55,7 @@
+ "lwx %[tmp2], %[tmp2](%[color_map]) \n\t" \
+ "lwx %[tmp3], %[tmp3](%[color_map]) \n\t" \
+ "lwx %[tmp4], %[tmp4](%[color_map]) \n\t" \
+- ".ifc "#TYPE", uint8_t \n\t" \
++ ".ifc " #TYPE ", uint8_t \n\t" \
+ "ext %[tmp1], %[tmp1], 8, 8 \n\t" \
+ "ext %[tmp2], %[tmp2], 8, 8 \n\t" \
+ "ext %[tmp3], %[tmp3], 8, 8 \n\t" \
+@@ -66,7 +66,7 @@
+ "sb %[tmp4], 3(%[dst]) \n\t" \
+ "addiu %[dst], %[dst], 4 \n\t" \
+ ".endif \n\t" \
+- ".ifc "#TYPE", uint32_t \n\t" \
++ ".ifc " #TYPE ", uint32_t \n\t" \
+ "sw %[tmp1], 0(%[dst]) \n\t" \
+ "sw %[tmp2], 4(%[dst]) \n\t" \
+ "sw %[tmp3], 8(%[dst]) \n\t" \
+diff --git a/Source/LibWebP/src/dsp/mips_macro.h b/Source/LibWebP/src/dsp/mips_macro.h
+index 4cfb23c..e09d2c4 100644
+--- a/Source/LibWebP/src/dsp/mips_macro.h
++++ b/Source/LibWebP/src/dsp/mips_macro.h
+@@ -25,25 +25,25 @@
+ // I - input (macro doesn't change it)
+ #define ADD_SUB_HALVES(O0, O1, \
+ I0, I1) \
+- "addq.ph %["#O0"], %["#I0"], %["#I1"] \n\t" \
+- "subq.ph %["#O1"], %["#I0"], %["#I1"] \n\t"
++ "addq.ph %[" #O0 "], %[" #I0 "], %[" #I1 "] \n\t" \
++ "subq.ph %[" #O1 "], %[" #I0 "], %[" #I1 "] \n\t"
+
+ // O - output
+ // I - input (macro doesn't change it)
+ // I[0/1] - offset in bytes
+ #define LOAD_IN_X2(O0, O1, \
+ I0, I1) \
+- "lh %["#O0"], "#I0"(%[in]) \n\t" \
+- "lh %["#O1"], "#I1"(%[in]) \n\t"
++ "lh %[" #O0 "], " #I0 "(%[in]) \n\t" \
++ "lh %[" #O1 "], " #I1 "(%[in]) \n\t"
+
+ // I0 - location
+ // I1..I9 - offsets in bytes
+ #define LOAD_WITH_OFFSET_X4(O0, O1, O2, O3, \
+ I0, I1, I2, I3, I4, I5, I6, I7, I8, I9) \
+- "ulw %["#O0"], "#I1"+"XSTR(I9)"*"#I5"(%["#I0"]) \n\t" \
+- "ulw %["#O1"], "#I2"+"XSTR(I9)"*"#I6"(%["#I0"]) \n\t" \
+- "ulw %["#O2"], "#I3"+"XSTR(I9)"*"#I7"(%["#I0"]) \n\t" \
+- "ulw %["#O3"], "#I4"+"XSTR(I9)"*"#I8"(%["#I0"]) \n\t"
++ "ulw %[" #O0 "], " #I1 "+"XSTR(I9)"*" #I5 "(%[" #I0 "]) \n\t" \
++ "ulw %[" #O1 "], " #I2 "+"XSTR(I9)"*" #I6 "(%[" #I0 "]) \n\t" \
++ "ulw %[" #O2 "], " #I3 "+"XSTR(I9)"*" #I7 "(%[" #I0 "]) \n\t" \
++ "ulw %[" #O3 "], " #I4 "+"XSTR(I9)"*" #I8 "(%[" #I0 "]) \n\t"
+
+ // O - output
+ // IO - input/output
+@@ -51,42 +51,42 @@
+ #define MUL_SHIFT_SUM(O0, O1, O2, O3, O4, O5, O6, O7, \
+ IO0, IO1, IO2, IO3, \
+ I0, I1, I2, I3, I4, I5, I6, I7) \
+- "mul %["#O0"], %["#I0"], %[kC2] \n\t" \
+- "mul %["#O1"], %["#I0"], %[kC1] \n\t" \
+- "mul %["#O2"], %["#I1"], %[kC2] \n\t" \
+- "mul %["#O3"], %["#I1"], %[kC1] \n\t" \
+- "mul %["#O4"], %["#I2"], %[kC2] \n\t" \
+- "mul %["#O5"], %["#I2"], %[kC1] \n\t" \
+- "mul %["#O6"], %["#I3"], %[kC2] \n\t" \
+- "mul %["#O7"], %["#I3"], %[kC1] \n\t" \
+- "sra %["#O0"], %["#O0"], 16 \n\t" \
+- "sra %["#O1"], %["#O1"], 16 \n\t" \
+- "sra %["#O2"], %["#O2"], 16 \n\t" \
+- "sra %["#O3"], %["#O3"], 16 \n\t" \
+- "sra %["#O4"], %["#O4"], 16 \n\t" \
+- "sra %["#O5"], %["#O5"], 16 \n\t" \
+- "sra %["#O6"], %["#O6"], 16 \n\t" \
+- "sra %["#O7"], %["#O7"], 16 \n\t" \
+- "addu %["#IO0"], %["#IO0"], %["#I4"] \n\t" \
+- "addu %["#IO1"], %["#IO1"], %["#I5"] \n\t" \
+- "subu %["#IO2"], %["#IO2"], %["#I6"] \n\t" \
+- "subu %["#IO3"], %["#IO3"], %["#I7"] \n\t"
++ "mul %[" #O0 "], %[" #I0 "], %[kC2] \n\t" \
++ "mul %[" #O1 "], %[" #I0 "], %[kC1] \n\t" \
++ "mul %[" #O2 "], %[" #I1 "], %[kC2] \n\t" \
++ "mul %[" #O3 "], %[" #I1 "], %[kC1] \n\t" \
++ "mul %[" #O4 "], %[" #I2 "], %[kC2] \n\t" \
++ "mul %[" #O5 "], %[" #I2 "], %[kC1] \n\t" \
++ "mul %[" #O6 "], %[" #I3 "], %[kC2] \n\t" \
++ "mul %[" #O7 "], %[" #I3 "], %[kC1] \n\t" \
++ "sra %[" #O0 "], %[" #O0 "], 16 \n\t" \
++ "sra %[" #O1 "], %[" #O1 "], 16 \n\t" \
++ "sra %[" #O2 "], %[" #O2 "], 16 \n\t" \
++ "sra %[" #O3 "], %[" #O3 "], 16 \n\t" \
++ "sra %[" #O4 "], %[" #O4 "], 16 \n\t" \
++ "sra %[" #O5 "], %[" #O5 "], 16 \n\t" \
++ "sra %[" #O6 "], %[" #O6 "], 16 \n\t" \
++ "sra %[" #O7 "], %[" #O7 "], 16 \n\t" \
++ "addu %[" #IO0 "], %[" #IO0 "], %[" #I4 "] \n\t" \
++ "addu %[" #IO1 "], %[" #IO1 "], %[" #I5 "] \n\t" \
++ "subu %[" #IO2 "], %[" #IO2 "], %[" #I6 "] \n\t" \
++ "subu %[" #IO3 "], %[" #IO3 "], %[" #I7 "] \n\t"
+
+ // O - output
+ // I - input (macro doesn't change it)
+ #define INSERT_HALF_X2(O0, O1, \
+ I0, I1) \
+- "ins %["#O0"], %["#I0"], 16, 16 \n\t" \
+- "ins %["#O1"], %["#I1"], 16, 16 \n\t"
++ "ins %[" #O0 "], %[" #I0 "], 16, 16 \n\t" \
++ "ins %[" #O1 "], %[" #I1 "], 16, 16 \n\t"
+
+ // O - output
+ // I - input (macro doesn't change it)
+ #define SRA_16(O0, O1, O2, O3, \
+ I0, I1, I2, I3) \
+- "sra %["#O0"], %["#I0"], 16 \n\t" \
+- "sra %["#O1"], %["#I1"], 16 \n\t" \
+- "sra %["#O2"], %["#I2"], 16 \n\t" \
+- "sra %["#O3"], %["#I3"], 16 \n\t"
++ "sra %[" #O0 "], %[" #I0 "], 16 \n\t" \
++ "sra %[" #O1 "], %[" #I1 "], 16 \n\t" \
++ "sra %[" #O2 "], %[" #I2 "], 16 \n\t" \
++ "sra %[" #O3 "], %[" #I3 "], 16 \n\t"
+
+ // temp0[31..16 | 15..0] = temp8[31..16 | 15..0] + temp12[31..16 | 15..0]
+ // temp1[31..16 | 15..0] = temp8[31..16 | 15..0] - temp12[31..16 | 15..0]
+@@ -96,22 +96,22 @@
+ // I - input (macro doesn't change it)
+ #define SHIFT_R_SUM_X2(O0, O1, O2, O3, O4, O5, O6, O7, \
+ I0, I1, I2, I3, I4, I5, I6, I7) \
+- "addq.ph %["#O0"], %["#I0"], %["#I4"] \n\t" \
+- "subq.ph %["#O1"], %["#I0"], %["#I4"] \n\t" \
+- "addq.ph %["#O2"], %["#I1"], %["#I5"] \n\t" \
+- "subq.ph %["#O3"], %["#I1"], %["#I5"] \n\t" \
+- "addq.ph %["#O4"], %["#I2"], %["#I6"] \n\t" \
+- "subq.ph %["#O5"], %["#I2"], %["#I6"] \n\t" \
+- "addq.ph %["#O6"], %["#I3"], %["#I7"] \n\t" \
+- "subq.ph %["#O7"], %["#I3"], %["#I7"] \n\t" \
+- "shra.ph %["#O0"], %["#O0"], 3 \n\t" \
+- "shra.ph %["#O1"], %["#O1"], 3 \n\t" \
+- "shra.ph %["#O2"], %["#O2"], 3 \n\t" \
+- "shra.ph %["#O3"], %["#O3"], 3 \n\t" \
+- "shra.ph %["#O4"], %["#O4"], 3 \n\t" \
+- "shra.ph %["#O5"], %["#O5"], 3 \n\t" \
+- "shra.ph %["#O6"], %["#O6"], 3 \n\t" \
+- "shra.ph %["#O7"], %["#O7"], 3 \n\t"
++ "addq.ph %[" #O0 "], %[" #I0 "], %[" #I4 "] \n\t" \
++ "subq.ph %[" #O1 "], %[" #I0 "], %[" #I4 "] \n\t" \
++ "addq.ph %[" #O2 "], %[" #I1 "], %[" #I5 "] \n\t" \
++ "subq.ph %[" #O3 "], %[" #I1 "], %[" #I5 "] \n\t" \
++ "addq.ph %[" #O4 "], %[" #I2 "], %[" #I6 "] \n\t" \
++ "subq.ph %[" #O5 "], %[" #I2 "], %[" #I6 "] \n\t" \
++ "addq.ph %[" #O6 "], %[" #I3 "], %[" #I7 "] \n\t" \
++ "subq.ph %[" #O7 "], %[" #I3 "], %[" #I7 "] \n\t" \
++ "shra.ph %[" #O0 "], %[" #O0 "], 3 \n\t" \
++ "shra.ph %[" #O1 "], %[" #O1 "], 3 \n\t" \
++ "shra.ph %[" #O2 "], %[" #O2 "], 3 \n\t" \
++ "shra.ph %[" #O3 "], %[" #O3 "], 3 \n\t" \
++ "shra.ph %[" #O4 "], %[" #O4 "], 3 \n\t" \
++ "shra.ph %[" #O5 "], %[" #O5 "], 3 \n\t" \
++ "shra.ph %[" #O6 "], %[" #O6 "], 3 \n\t" \
++ "shra.ph %[" #O7 "], %[" #O7 "], 3 \n\t"
+
+ // precrq.ph.w temp0, temp8, temp2
+ // temp0 = temp8[31..16] | temp2[31..16]
+@@ -123,14 +123,14 @@
+ #define PACK_2_HALVES_TO_WORD(O0, O1, O2, O3, \
+ IO0, IO1, IO2, IO3, \
+ I0, I1, I2, I3) \
+- "precrq.ph.w %["#O0"], %["#I0"], %["#IO0"] \n\t" \
+- "precrq.ph.w %["#O1"], %["#I1"], %["#IO1"] \n\t" \
+- "ins %["#IO0"], %["#I0"], 16, 16 \n\t" \
+- "ins %["#IO1"], %["#I1"], 16, 16 \n\t" \
+- "precrq.ph.w %["#O2"], %["#I2"], %["#IO2"] \n\t" \
+- "precrq.ph.w %["#O3"], %["#I3"], %["#IO3"] \n\t" \
+- "ins %["#IO2"], %["#I2"], 16, 16 \n\t" \
+- "ins %["#IO3"], %["#I3"], 16, 16 \n\t"
++ "precrq.ph.w %[" #O0 "], %[" #I0 "], %[" #IO0 "] \n\t" \
++ "precrq.ph.w %[" #O1 "], %[" #I1 "], %[" #IO1 "] \n\t" \
++ "ins %[" #IO0 "], %[" #I0 "], 16, 16 \n\t" \
++ "ins %[" #IO1 "], %[" #I1 "], 16, 16 \n\t" \
++ "precrq.ph.w %[" #O2 "], %[" #I2 "], %[" #IO2 "] \n\t" \
++ "precrq.ph.w %[" #O3 "], %[" #I3 "], %[" #IO3 "] \n\t" \
++ "ins %[" #IO2 "], %[" #I2 "], 16, 16 \n\t" \
++ "ins %[" #IO3 "], %[" #I3 "], 16, 16 \n\t"
+
+ // preceu.ph.qbr temp0, temp8
+ // temp0 = 0 | 0 | temp8[23..16] | temp8[7..0]
+@@ -140,14 +140,14 @@
+ // I - input (macro doesn't change it)
+ #define CONVERT_2_BYTES_TO_HALF(O0, O1, O2, O3, O4, O5, O6, O7, \
+ I0, I1, I2, I3) \
+- "preceu.ph.qbr %["#O0"], %["#I0"] \n\t" \
+- "preceu.ph.qbl %["#O1"], %["#I0"] \n\t" \
+- "preceu.ph.qbr %["#O2"], %["#I1"] \n\t" \
+- "preceu.ph.qbl %["#O3"], %["#I1"] \n\t" \
+- "preceu.ph.qbr %["#O4"], %["#I2"] \n\t" \
+- "preceu.ph.qbl %["#O5"], %["#I2"] \n\t" \
+- "preceu.ph.qbr %["#O6"], %["#I3"] \n\t" \
+- "preceu.ph.qbl %["#O7"], %["#I3"] \n\t"
++ "preceu.ph.qbr %[" #O0 "], %[" #I0 "] \n\t" \
++ "preceu.ph.qbl %[" #O1 "], %[" #I0 "] \n\t" \
++ "preceu.ph.qbr %[" #O2 "], %[" #I1 "] \n\t" \
++ "preceu.ph.qbl %[" #O3 "], %[" #I1 "] \n\t" \
++ "preceu.ph.qbr %[" #O4 "], %[" #I2 "] \n\t" \
++ "preceu.ph.qbl %[" #O5 "], %[" #I2 "] \n\t" \
++ "preceu.ph.qbr %[" #O6 "], %[" #I3 "] \n\t" \
++ "preceu.ph.qbl %[" #O7 "], %[" #I3 "] \n\t"
+
+ // temp0[31..16 | 15..0] = temp0[31..16 | 15..0] + temp8[31..16 | 15..0]
+ // temp0[31..16 | 15..0] = temp0[31..16 <<(s) 7 | 15..0 <<(s) 7]
+@@ -160,30 +160,30 @@
+ #define STORE_SAT_SUM_X2(IO0, IO1, IO2, IO3, IO4, IO5, IO6, IO7, \
+ I0, I1, I2, I3, I4, I5, I6, I7, \
+ I8, I9, I10, I11, I12, I13) \
+- "addq.ph %["#IO0"], %["#IO0"], %["#I0"] \n\t" \
+- "addq.ph %["#IO1"], %["#IO1"], %["#I1"] \n\t" \
+- "addq.ph %["#IO2"], %["#IO2"], %["#I2"] \n\t" \
+- "addq.ph %["#IO3"], %["#IO3"], %["#I3"] \n\t" \
+- "addq.ph %["#IO4"], %["#IO4"], %["#I4"] \n\t" \
+- "addq.ph %["#IO5"], %["#IO5"], %["#I5"] \n\t" \
+- "addq.ph %["#IO6"], %["#IO6"], %["#I6"] \n\t" \
+- "addq.ph %["#IO7"], %["#IO7"], %["#I7"] \n\t" \
+- "shll_s.ph %["#IO0"], %["#IO0"], 7 \n\t" \
+- "shll_s.ph %["#IO1"], %["#IO1"], 7 \n\t" \
+- "shll_s.ph %["#IO2"], %["#IO2"], 7 \n\t" \
+- "shll_s.ph %["#IO3"], %["#IO3"], 7 \n\t" \
+- "shll_s.ph %["#IO4"], %["#IO4"], 7 \n\t" \
+- "shll_s.ph %["#IO5"], %["#IO5"], 7 \n\t" \
+- "shll_s.ph %["#IO6"], %["#IO6"], 7 \n\t" \
+- "shll_s.ph %["#IO7"], %["#IO7"], 7 \n\t" \
+- "precrqu_s.qb.ph %["#IO0"], %["#IO1"], %["#IO0"] \n\t" \
+- "precrqu_s.qb.ph %["#IO2"], %["#IO3"], %["#IO2"] \n\t" \
+- "precrqu_s.qb.ph %["#IO4"], %["#IO5"], %["#IO4"] \n\t" \
+- "precrqu_s.qb.ph %["#IO6"], %["#IO7"], %["#IO6"] \n\t" \
+- "usw %["#IO0"], "XSTR(I13)"*"#I9"(%["#I8"]) \n\t" \
+- "usw %["#IO2"], "XSTR(I13)"*"#I10"(%["#I8"]) \n\t" \
+- "usw %["#IO4"], "XSTR(I13)"*"#I11"(%["#I8"]) \n\t" \
+- "usw %["#IO6"], "XSTR(I13)"*"#I12"(%["#I8"]) \n\t"
++ "addq.ph %[" #IO0 "], %[" #IO0 "], %[" #I0 "] \n\t" \
++ "addq.ph %[" #IO1 "], %[" #IO1 "], %[" #I1 "] \n\t" \
++ "addq.ph %[" #IO2 "], %[" #IO2 "], %[" #I2 "] \n\t" \
++ "addq.ph %[" #IO3 "], %[" #IO3 "], %[" #I3 "] \n\t" \
++ "addq.ph %[" #IO4 "], %[" #IO4 "], %[" #I4 "] \n\t" \
++ "addq.ph %[" #IO5 "], %[" #IO5 "], %[" #I5 "] \n\t" \
++ "addq.ph %[" #IO6 "], %[" #IO6 "], %[" #I6 "] \n\t" \
++ "addq.ph %[" #IO7 "], %[" #IO7 "], %[" #I7 "] \n\t" \
++ "shll_s.ph %[" #IO0 "], %[" #IO0 "], 7 \n\t" \
++ "shll_s.ph %[" #IO1 "], %[" #IO1 "], 7 \n\t" \
++ "shll_s.ph %[" #IO2 "], %[" #IO2 "], 7 \n\t" \
++ "shll_s.ph %[" #IO3 "], %[" #IO3 "], 7 \n\t" \
++ "shll_s.ph %[" #IO4 "], %[" #IO4 "], 7 \n\t" \
++ "shll_s.ph %[" #IO5 "], %[" #IO5 "], 7 \n\t" \
++ "shll_s.ph %[" #IO6 "], %[" #IO6 "], 7 \n\t" \
++ "shll_s.ph %[" #IO7 "], %[" #IO7 "], 7 \n\t" \
++ "precrqu_s.qb.ph %[" #IO0 "], %[" #IO1 "], %[" #IO0 "] \n\t" \
++ "precrqu_s.qb.ph %[" #IO2 "], %[" #IO3 "], %[" #IO2 "] \n\t" \
++ "precrqu_s.qb.ph %[" #IO4 "], %[" #IO5 "], %[" #IO4 "] \n\t" \
++ "precrqu_s.qb.ph %[" #IO6 "], %[" #IO7 "], %[" #IO6 "] \n\t" \
++ "usw %[" #IO0 "], "XSTR(I13)"*" #I9 "(%[" #I8 "]) \n\t" \
++ "usw %[" #IO2 "], "XSTR(I13)"*" #I10 "(%[" #I8 "]) \n\t" \
++ "usw %[" #IO4 "], "XSTR(I13)"*" #I11 "(%[" #I8 "]) \n\t" \
++ "usw %[" #IO6 "], "XSTR(I13)"*" #I12 "(%[" #I8 "]) \n\t"
+
+ #define OUTPUT_EARLY_CLOBBER_REGS_10() \
+ : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), \
+diff --git a/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c
+index 9c9665f..46f207b 100644
+--- a/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c
++++ b/Source/LibWebP/src/dsp/dsp.upsampling_mips_dsp_r2.c
+@@ -34,15 +34,15 @@
+ G = G - t2 + kGCst; \
+ B = B + kBCst; \
+ __asm__ volatile ( \
+- "shll_s.w %["#R"], %["#R"], 9 \n\t" \
+- "shll_s.w %["#G"], %["#G"], 9 \n\t" \
+- "shll_s.w %["#B"], %["#B"], 9 \n\t" \
+- "precrqu_s.qb.ph %["#R"], %["#R"], $zero \n\t" \
+- "precrqu_s.qb.ph %["#G"], %["#G"], $zero \n\t" \
+- "precrqu_s.qb.ph %["#B"], %["#B"], $zero \n\t" \
+- "srl %["#R"], %["#R"], 24 \n\t" \
+- "srl %["#G"], %["#G"], 24 \n\t" \
+- "srl %["#B"], %["#B"], 24 \n\t" \
++ "shll_s.w %[" #R "], %[" #R "], 9 \n\t" \
++ "shll_s.w %[" #G "], %[" #G "], 9 \n\t" \
++ "shll_s.w %[" #B "], %[" #B "], 9 \n\t" \
++ "precrqu_s.qb.ph %[" #R "], %[" #R "], $zero \n\t" \
++ "precrqu_s.qb.ph %[" #G "], %[" #G "], $zero \n\t" \
++ "precrqu_s.qb.ph %[" #B "], %[" #B "], $zero \n\t" \
++ "srl %[" #R "], %[" #R "], 24 \n\t" \
++ "srl %[" #G "], %[" #G "], 24 \n\t" \
++ "srl %[" #B "], %[" #B "], 24 \n\t" \
+ : [R]"+r"(R), [G]"+r"(G), [B]"+r"(B) \
+ : \
+ ); \
+diff --git a/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c b/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c
+index 43f02cc..45a2200 100644
+--- a/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c
++++ b/Source/LibWebP/src/dsp/dsp.yuv_mips_dsp_r2.c
+@@ -39,12 +39,12 @@
+ "addu %[temp5], %[temp0], %[temp1] \n\t" \
+ "subu %[temp6], %[temp0], %[temp2] \n\t" \
+ "addu %[temp7], %[temp0], %[temp4] \n\t" \
+-".if "#K" \n\t" \
++".if " #K " \n\t" \
+ "lbu %[temp0], 1(%[y]) \n\t" \
+ ".endif \n\t" \
+ "shll_s.w %[temp5], %[temp5], 9 \n\t" \
+ "shll_s.w %[temp6], %[temp6], 9 \n\t" \
+-".if "#K" \n\t" \
++".if " #K " \n\t" \
+ "mul %[temp0], %[t_con_5], %[temp0] \n\t" \
+ ".endif \n\t" \
+ "shll_s.w %[temp7], %[temp7], 9 \n\t" \
+@@ -54,9 +54,9 @@
+ "srl %[temp5], %[temp5], 24 \n\t" \
+ "srl %[temp6], %[temp6], 24 \n\t" \
+ "srl %[temp7], %[temp7], 24 \n\t" \
+- "sb %[temp5], "#R"(%[dst]) \n\t" \
+- "sb %[temp6], "#G"(%[dst]) \n\t" \
+- "sb %[temp7], "#B"(%[dst]) \n\t" \
++ "sb %[temp5], " #R "(%[dst]) \n\t" \
++ "sb %[temp6], " #G "(%[dst]) \n\t" \
++ "sb %[temp7], " #B "(%[dst]) \n\t" \
+
+ #define ASM_CLOBBER_LIST() \
+ : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), \