7 #if CRYPTOPP_MSC_VERSION
8 # pragma warning(disable: 4100)
11 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
12 # pragma GCC diagnostic ignored "-Wunused"
13 # pragma GCC diagnostic ignored "-Wunused-but-set-variable"
16 #ifndef CRYPTOPP_IMPORTS
35 #if (_MSC_VER >= 1400) && !defined(_M_ARM)
43 #ifdef CRYPTOPP_MSVC6_NO_PP
44 #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
48 #if (__SUNPRO_CC >= 0x5130)
50 # define MAYBE_UNCONST_CAST const_cast<word*>
52 # define MAYBE_CONST const
53 # define MAYBE_UNCONST_CAST
58 #if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_INTEL_ASM)
59 # undef CRYPTOPP_X86_ASM_AVAILABLE
60 # undef CRYPTOPP_X32_ASM_AVAILABLE
61 # undef CRYPTOPP_X64_ASM_AVAILABLE
62 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
63 # undef CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
64 # define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
65 # define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0
67 # define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
72 bool AssignIntToInteger(
const std::type_info &valueType,
void *pInteger,
const void *pInt)
74 if (valueType !=
typeid(
Integer))
76 *
reinterpret_cast<Integer *
>(pInteger) = *
reinterpret_cast<const int *
>(pInt);
80 inline static int Compare(
const word *A,
const word *B,
size_t N)
91 inline static int Increment(word *A,
size_t N, word B=1)
98 for (
unsigned i=1; i<N; i++)
104 inline static int Decrement(word *A,
size_t N, word B=1)
111 for (
unsigned i=1; i<N; i++)
117 static void TwosComplement(word *A,
size_t N)
120 for (
unsigned i=0; i<N; i++)
124 static word AtomicInverseModPower2(word A)
130 for (
unsigned i=3; i<WORD_BITS; i*=2)
139 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
140 #define Declare2Words(x) word x##0, x##1;
141 #define AssignWord(a, b) a##0 = b; a##1 = 0;
142 #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
143 #define LowWord(a) a##0
144 #define HighWord(a) a##1
146 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
147 #ifndef __INTEL_COMPILER
148 #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
150 #elif defined(__DECCXX)
151 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
152 #elif defined(__x86_64__)
153 #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
155 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
157 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
158 #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
159 #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
160 #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
161 #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
162 #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
165 #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
167 #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
170 #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
172 #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
173 #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
174 #define GetCarry(u) u##1
175 #define GetBorrow(u) u##1
177 #define Declare2Words(x) dword x;
178 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && !defined(_M_ARM)
179 #define MultiplyWords(p, a, b) p = __emulu(a, b);
181 #define MultiplyWords(p, a, b) p = (dword)a*b;
183 #define AssignWord(a, b) a = b;
184 #define Add2WordsBy1(a, b, c) a = b + c;
185 #define Acc2WordsBy2(a, b) a += b;
186 #define LowWord(a) word(a)
187 #define HighWord(a) word(a>>WORD_BITS)
188 #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
189 #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
190 #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
191 #define GetCarry(u) HighWord(u)
192 #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
195 #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
198 #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
201 #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
207 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
208 DWord() : m_whole() { }
210 DWord() : m_halfs() { }
213 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
214 explicit DWord(word low) : m_whole(low) { }
216 explicit DWord(word low) : m_halfs()
222 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
223 DWord(word low, word high) : m_whole()
225 DWord(word low, word high) : m_halfs()
228 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
229 # if defined(IS_LITTLE_ENDIAN)
230 const word t[2] = {low,high};
231 memcpy(&m_whole, &t,
sizeof(m_whole));
233 const word t[2] = {high,low};
234 memcpy(&m_whole, &t,
sizeof(m_whole));
242 static DWord Multiply(word a, word b)
245 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
246 r.m_whole = (dword)a * b;
247 #elif defined(MultiplyWordsLoHi)
248 MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
255 static DWord MultiplyAndAdd(word a, word b, word c)
257 DWord r = Multiply(a, b);
261 DWord & operator+=(word a)
263 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
264 m_whole = m_whole + a;
267 m_halfs.high += (m_halfs.low < a);
272 DWord operator+(word a)
275 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
276 r.m_whole = m_whole + a;
278 r.m_halfs.low = m_halfs.low + a;
279 r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
287 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
288 r.m_whole = m_whole - a.m_whole;
290 r.m_halfs.low = m_halfs.low - a.m_halfs.low;
291 r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
296 DWord operator-(word a)
299 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
300 r.m_whole = m_whole - a;
302 r.m_halfs.low = m_halfs.low - a;
303 r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
309 word operator/(word divisor);
311 word operator%(word a);
313 bool operator!()
const
315 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
318 return !m_halfs.high && !m_halfs.low;
324 word GetLowHalf()
const {
return m_halfs.low;}
325 word GetHighHalf()
const {
return m_halfs.high;}
326 word GetHighHalfAsBorrow()
const {
return 0-m_halfs.high;}
334 #ifdef IS_LITTLE_ENDIAN
344 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
356 #if defined(__COVERITY__)
357 Word() : m_whole(0) {}
360 Word() : m_whole(0) {memset(&m_whole, 0xaa,
sizeof(m_whole));}
365 Word(word value) : m_whole(value) {}
366 Word(hword low, hword high) : m_whole(low | (word(high) << (WORD_BITS/2))) {}
368 static Word Multiply(hword a, hword b)
371 r.m_whole = (word)a * b;
378 r.m_whole = m_whole - a.m_whole;
382 Word operator-(hword a)
385 r.m_whole = m_whole - a;
390 hword operator/(hword divisor)
392 return hword(m_whole / divisor);
395 bool operator!()
const
400 word GetWhole()
const {
return m_whole;}
401 hword GetLowHalf()
const {
return hword(m_whole);}
402 hword GetHighHalf()
const {
return hword(m_whole>>(WORD_BITS/2));}
403 hword GetHighHalfAsBorrow()
const {
return 0-hword(m_whole>>(WORD_BITS/2));}
410 template <
class S,
class D>
411 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
413 CRYPTOPP_UNUSED(dummy);
422 S Q;
bool pre = (S(B1+1) == 0);
424 Q = D(A[1], A[2]) / S(B1+1);
428 Q = D(A[0], A[1]) / B0;
431 D p = D::Multiply(B0, Q);
432 D u = (D) A[0] - p.GetLowHalf();
433 A[0] = u.GetLowHalf();
434 u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
435 A[1] = u.GetLowHalf();
436 A[2] += u.GetHighHalf();
439 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
442 A[0] = u.GetLowHalf();
443 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
444 A[1] = u.GetLowHalf();
445 A[2] += u.GetHighHalf();
454 template <
class S,
class D>
455 inline D DivideFourWordsByTwo(S *T,
const D &Al,
const D &Ah,
const D &B)
463 T[0] = Al.GetLowHalf();
464 T[1] = Al.GetHighHalf();
465 T[2] = Ah.GetLowHalf();
466 T[3] = Ah.GetHighHalf();
467 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
468 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
469 return D(Q[0], Q[1]);
473 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
478 inline word DWord::operator/(word a)
480 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
481 return word(m_whole / a);
484 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
488 inline word DWord::operator%(word a)
490 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
491 return word(m_whole % a);
493 if (a < (word(1) << (WORD_BITS/2)))
496 word r = m_halfs.high % h;
497 r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
498 return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
503 DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
504 return Word(r[0], r[1]).GetWhole();
512 #if defined(__GNUC__)
513 #define AddPrologue \
515 __asm__ __volatile__ \
518 #define AddEpilogue \
521 : "d" (C), "a" (A), "D" (B), "c" (N) \
522 : "%esi", "memory", "cc" \
525 #define MulPrologue \
526 __asm__ __volatile__ \
531 #define MulEpilogue \
535 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
536 : "%esi", "memory", "cc" \
538 #define SquPrologue MulPrologue
539 #define SquEpilogue \
543 : "d" (s_maskLow16), "c" (C), "a" (A) \
544 : "%esi", "%edi", "memory", "cc" \
546 #define TopPrologue MulPrologue
547 #define TopEpilogue \
551 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
555 #define AddPrologue \
558 __asm mov eax, [esp+12] \
559 __asm mov edi, [esp+16]
560 #define AddEpilogue \
565 #define SaveEBX __asm push ebx
566 #define RestoreEBX __asm pop ebx
571 #define SquPrologue \
575 AS2( lea ebx, s_maskLow16)
576 #define MulPrologue \
581 AS2( lea ebx, s_maskLow16)
582 #define TopPrologue \
588 AS2( lea ebx, s_maskLow16)
589 #define SquEpilogue RestoreEBX
590 #define MulEpilogue RestoreEBX
591 #define TopEpilogue RestoreEBX
594 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
596 int Baseline_Add(
size_t N, word *C,
const word *A,
const word *B);
597 int Baseline_Sub(
size_t N, word *C,
const word *A,
const word *B);
599 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
600 int Baseline_Add(
size_t N, word *C,
const word *A,
const word *B)
608 AS2( mov %0,[%3+8*%1])
609 AS2( add %0,[%4+8*%1])
610 AS2( mov [%2+8*%1],%0)
612 AS2( mov %0,[%3+8*%1+8])
613 AS2( adc %0,[%4+8*%1+8])
614 AS2( mov [%2+8*%1+8],%0)
617 AS2( mov %0,[%3+8*%1])
618 AS2( adc %0,[%4+8*%1])
619 AS2( mov [%2+8*%1],%0)
625 :
"=&r" (result),
"+c" (N)
626 :
"r" (C+N),
"r" (A+N),
"r" (B+N)
632 int Baseline_Sub(
size_t N, word *C,
const word *A,
const word *B)
640 AS2( mov %0,[%3+8*%1])
641 AS2( sub %0,[%4+8*%1])
642 AS2( mov [%2+8*%1],%0)
644 AS2( mov %0,[%3+8*%1+8])
645 AS2( sbb %0,[%4+8*%1+8])
646 AS2( mov [%2+8*%1+8],%0)
649 AS2( mov %0,[%3+8*%1])
650 AS2( sbb %0,[%4+8*%1])
651 AS2( mov [%2+8*%1],%0)
657 :
"=&r" (result),
"+c" (N)
658 :
"r" (C+N),
"r" (A+N),
"r" (B+N)
663 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
664 CRYPTOPP_NAKED
int CRYPTOPP_FASTCALL Baseline_Add(
size_t N, word *C,
const word *A,
const word *B)
669 AS2( lea eax, [eax+4*ecx])
670 AS2( lea edi, [edi+4*ecx])
671 AS2( lea edx, [edx+4*ecx])
681 AS2( mov esi,[eax+4*ecx])
682 AS2( adc esi,[edi+4*ecx])
683 AS2( mov [edx+4*ecx],esi)
684 AS2( mov esi,[eax+4*ecx+4])
685 AS2( adc esi,[edi+4*ecx+4])
686 AS2( mov [edx+4*ecx+4],esi)
688 AS2( mov esi,[eax+4*ecx+8])
689 AS2( adc esi,[edi+4*ecx+8])
690 AS2( mov [edx+4*ecx+8],esi)
691 AS2( mov esi,[eax+4*ecx+12])
692 AS2( adc esi,[edi+4*ecx+12])
693 AS2( mov [edx+4*ecx+12],esi)
695 AS2( lea ecx,[ecx+4])
705 CRYPTOPP_NAKED
int CRYPTOPP_FASTCALL Baseline_Sub(
size_t N, word *C, const word *A, const word *B)
710 AS2( lea eax, [eax+4*ecx])
711 AS2( lea edi, [edi+4*ecx])
712 AS2( lea edx, [edx+4*ecx])
722 AS2( mov esi,[eax+4*ecx])
723 AS2( sbb esi,[edi+4*ecx])
724 AS2( mov [edx+4*ecx],esi)
725 AS2( mov esi,[eax+4*ecx+4])
726 AS2( sbb esi,[edi+4*ecx+4])
727 AS2( mov [edx+4*ecx+4],esi)
729 AS2( mov esi,[eax+4*ecx+8])
730 AS2( sbb esi,[edi+4*ecx+8])
731 AS2( mov [edx+4*ecx+8],esi)
732 AS2( mov esi,[eax+4*ecx+12])
733 AS2( sbb esi,[edi+4*ecx+12])
734 AS2( mov [edx+4*ecx+12],esi)
736 AS2( lea ecx,[ecx+4])
746 #if CRYPTOPP_INTEGER_SSE2
747 CRYPTOPP_NAKED
int CRYPTOPP_FASTCALL SSE2_Add(
size_t N, word *C,
const word *A,
const word *B)
752 AS2( lea eax, [eax+4*ecx])
753 AS2( lea edi, [edi+4*ecx])
754 AS2( lea edx, [edx+4*ecx])
765 AS2( movd mm0, DWORD PTR [eax+4*ecx])
766 AS2( movd mm1, DWORD PTR [edi+4*ecx])
769 AS2( movd DWORD PTR [edx+4*ecx], mm2)
772 AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
773 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
776 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
780 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
781 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
784 AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
787 AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
788 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
791 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
803 CRYPTOPP_NAKED
int CRYPTOPP_FASTCALL SSE2_Sub(
size_t N, word *C, const word *A, const word *B)
808 AS2( lea eax, [eax+4*ecx])
809 AS2( lea edi, [edi+4*ecx])
810 AS2( lea edx, [edx+4*ecx])
821 AS2( movd mm0, DWORD PTR [eax+4*ecx])
822 AS2( movd mm1, DWORD PTR [edi+4*ecx])
825 AS2( movd DWORD PTR [edx+4*ecx], mm0)
828 AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
829 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
832 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
836 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
837 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
840 AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
843 AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
844 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
847 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
859 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
861 int CRYPTOPP_FASTCALL Baseline_Add(
size_t N, word *C,
const word *A,
const word *B)
867 for (
size_t i=0; i<N; i+=2)
869 AddWithCarry(u, A[i], B[i]);
871 AddWithCarry(u, A[i+1], B[i+1]);
874 return int(GetCarry(u));
877 int CRYPTOPP_FASTCALL Baseline_Sub(
size_t N, word *C,
const word *A,
const word *B)
883 for (
size_t i=0; i<N; i+=2)
885 SubtractWithBorrow(u, A[i], B[i]);
887 SubtractWithBorrow(u, A[i+1], B[i+1]);
890 return int(GetBorrow(u));
894 static word LinearMultiply(word *C,
const word *AA, word B,
size_t N)
897 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
900 for(
unsigned i=0; i<N; i++)
903 MultiplyWords(p, A[i], B);
904 Acc2WordsBy1(p, carry);
911 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
915 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
920 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
921 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
922 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
923 Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
924 Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
929 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
930 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
931 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
932 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
933 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
934 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
935 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
936 Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
937 Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
938 Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
939 Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
940 Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
941 Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
946 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
947 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
948 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
949 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
950 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
951 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
952 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
953 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
954 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
955 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
956 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
957 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
958 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
959 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
960 Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
961 Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
962 Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
963 Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
964 Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
965 Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
966 Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
967 Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
968 Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
969 Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
970 Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
971 Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
972 Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
973 Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
974 Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
983 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
984 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
985 Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
986 Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
991 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
992 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
993 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
994 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
995 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
996 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
997 Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
998 Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
999 Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1000 Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1001 Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
1002 Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
1007 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1008 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1009 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
1010 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
1011 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
1012 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
1013 Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
1014 Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
1015 Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1016 Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1017 Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
1018 Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
1019 Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
1020 Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
1021 Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
1022 Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
1023 Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
1024 Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
1025 Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
1026 Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
1027 Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
1028 Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
1029 Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
1030 Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
1031 Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
1032 Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
1033 Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
1034 Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
1039 Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
1044 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1045 Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
1046 Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
1051 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1052 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1053 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1054 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1055 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1056 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1057 Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
1062 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1063 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1064 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1065 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1066 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1067 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1068 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1069 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1070 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1071 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1072 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1073 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1074 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1075 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1076 Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
1082 #define Mul_Begin(n) \
1086 MultiplyWords(p, A[0], B[0]) \
1087 AssignWord(c, LowWord(p)) \
1088 AssignWord(d, HighWord(p))
1090 #define Mul_Acc(i, j) \
1091 MultiplyWords(p, A[i], B[j]) \
1092 Acc2WordsBy1(c, LowWord(p)) \
1093 Acc2WordsBy1(d, HighWord(p))
1095 #define Mul_SaveAcc(k, i, j) \
1096 R[k] = LowWord(c); \
1097 Add2WordsBy1(c, d, HighWord(c)) \
1098 MultiplyWords(p, A[i], B[j]) \
1099 AssignWord(d, HighWord(p)) \
1100 Acc2WordsBy1(c, LowWord(p))
1102 #define Mul_End(n) \
1103 R[2*n-3] = LowWord(c); \
1104 Acc2WordsBy1(d, HighWord(c)) \
1105 MultiplyWords(p, A[n-1], B[n-1])\
1106 Acc2WordsBy2(d, p) \
1107 R[2*n-2] = LowWord(d); \
1108 R[2*n-1] = HighWord(d);
1110 #define Bot_SaveAcc(k, i, j) \
1111 R[k] = LowWord(c); \
1112 word e = LowWord(d) + HighWord(c); \
1115 #define Bot_Acc(i, j) \
1118 #define Bot_End(n) \
1121 #define Mul_Begin(n) \
1125 MultiplyWords(p, A[0], B[0]) \
1127 AssignWord(d, HighWord(p))
1129 #define Mul_Acc(i, j) \
1130 MulAcc(c, d, A[i], B[j])
1132 #define Mul_SaveAcc(k, i, j) \
1135 AssignWord(d, HighWord(d)) \
1136 MulAcc(c, d, A[i], B[j])
1138 #define Mul_End(k, i) \
1140 MultiplyWords(p, A[i], B[i]) \
1141 Acc2WordsBy2(p, d) \
1142 R[k+1] = LowWord(p); \
1143 R[k+2] = HighWord(p);
1145 #define Bot_SaveAcc(k, i, j) \
1150 #define Bot_Acc(i, j) \
1153 #define Bot_End(n) \
1157 #define Squ_Begin(n) \
1162 MultiplyWords(p, A[0], A[0]) \
1163 R[0] = LowWord(p); \
1164 AssignWord(e, HighWord(p)) \
1165 MultiplyWords(p, A[0], A[1]) \
1167 AssignWord(d, HighWord(p)) \
1170 #define Squ_NonDiag \
1173 #define Squ_SaveAcc(k, i, j) \
1174 Acc3WordsBy2(c, d, e) \
1176 MultiplyWords(p, A[i], A[j]) \
1178 AssignWord(d, HighWord(p)) \
1180 #define Squ_Acc(i, j) \
1181 MulAcc(c, d, A[i], A[j])
1183 #define Squ_Diag(i) \
1185 MulAcc(c, d, A[i], A[i])
1187 #define Squ_End(n) \
1188 Acc3WordsBy2(c, d, e) \
1190 MultiplyWords(p, A[n-1], A[n-1])\
1191 Acc2WordsBy2(p, e) \
1192 R[2*n-2] = LowWord(p); \
1193 R[2*n-1] = HighWord(p);
1196 void Baseline_Multiply2(word *R,
const word *AA,
const word *BB)
1199 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1200 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1205 void Baseline_Multiply4(word *R,
const word *AA,
const word *BB)
1208 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1209 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1214 void Baseline_Multiply8(word *R,
const word *AA,
const word *BB)
1217 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1218 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1223 void Baseline_Square2(word *R,
const word *AA)
1226 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1231 void Baseline_Square4(word *R,
const word *AA)
1234 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1239 void Baseline_Square8(word *R,
const word *AA)
1242 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1247 void Baseline_MultiplyBottom2(word *R,
const word *AA,
const word *BB)
1250 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1251 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1256 void Baseline_MultiplyBottom4(word *R,
const word *AA,
const word *BB)
1259 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1260 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1265 void Baseline_MultiplyBottom8(word *R,
const word *AA,
const word *BB)
1268 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1269 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1274 #define Top_Begin(n) \
1278 MultiplyWords(p, A[0], B[n-2]);\
1279 AssignWord(d, HighWord(p));
1281 #define Top_Acc(i, j) \
1282 MultiplyWords(p, A[i], B[j]);\
1283 Acc2WordsBy1(d, HighWord(p));
1285 #define Top_SaveAcc0(i, j) \
1287 AssignWord(d, HighWord(d)) \
1288 MulAcc(c, d, A[i], B[j])
1290 #define Top_SaveAcc1(i, j) \
1292 Acc2WordsBy1(d, c); \
1294 AssignWord(d, HighWord(d)) \
1295 MulAcc(c, d, A[i], B[j])
1297 void Baseline_MultiplyTop2(word *R,
const word *A,
const word *B, word L)
1301 Baseline_Multiply2(T, A, B);
1306 void Baseline_MultiplyTop4(word *R,
const word *AA,
const word *BB, word L)
1309 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1310 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1313 Top_Acc(1, 1) Top_Acc(2, 0) \
1314 Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1315 Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
1316 Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
1320 void Baseline_MultiplyTop8(word *R, const word *AA, const word *BB, word L)
1323 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1324 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1327 Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
1328 Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1329 Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1330 Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1331 Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1332 Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1333 Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1334 Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
1338 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
1339 void Baseline_Multiply16(word *R,
const word *AA,
const word *BB)
1342 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1343 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1348 void Baseline_Square16(word *R,
const word *AA)
1351 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1356 void Baseline_MultiplyBottom16(word *R,
const word *AA,
const word *BB)
1359 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1360 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1365 void Baseline_MultiplyTop16(word *R,
const word *AA,
const word *BB, word L)
1368 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1369 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1372 Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
1373 Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1374 Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1375 Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1376 Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1377 Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1378 Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1379 Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1380 Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1381 Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1382 Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1383 Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1384 Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1385 Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1386 Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1387 Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
1394 #if CRYPTOPP_INTEGER_SSE2
1396 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
1414 #define SSE2_FinalSave(k) \
1415 AS2( psllq xmm5, 16) \
1416 AS2( paddq xmm4, xmm5) \
1417 AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
1419 #define SSE2_SaveShift(k) \
1420 AS2( movq xmm0, xmm6) \
1421 AS2( punpckhqdq xmm6, xmm0) \
1422 AS2( movq xmm1, xmm7) \
1423 AS2( punpckhqdq xmm7, xmm1) \
1424 AS2( paddd xmm6, xmm0) \
1425 AS2( pslldq xmm6, 4) \
1426 AS2( paddd xmm7, xmm1) \
1427 AS2( paddd xmm4, xmm6) \
1428 AS2( pslldq xmm7, 4) \
1429 AS2( movq xmm6, xmm4) \
1430 AS2( paddd xmm5, xmm7) \
1431 AS2( movq xmm7, xmm5) \
1432 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1433 AS2( psrlq xmm6, 16) \
1434 AS2( paddq xmm6, xmm7) \
1435 AS2( punpckhqdq xmm4, xmm0) \
1436 AS2( punpckhqdq xmm5, xmm0) \
1437 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
1438 AS2( psrlq xmm6, 3*16) \
1439 AS2( paddd xmm4, xmm6) \
1441 #define Squ_SSE2_SaveShift(k) \
1442 AS2( movq xmm0, xmm6) \
1443 AS2( punpckhqdq xmm6, xmm0) \
1444 AS2( movq xmm1, xmm7) \
1445 AS2( punpckhqdq xmm7, xmm1) \
1446 AS2( paddd xmm6, xmm0) \
1447 AS2( pslldq xmm6, 4) \
1448 AS2( paddd xmm7, xmm1) \
1449 AS2( paddd xmm4, xmm6) \
1450 AS2( pslldq xmm7, 4) \
1451 AS2( movhlps xmm6, xmm4) \
1452 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1453 AS2( paddd xmm5, xmm7) \
1454 AS2( movhps QWORD PTR [esp+12], xmm5)\
1455 AS2( psrlq xmm4, 16) \
1456 AS2( paddq xmm4, xmm5) \
1457 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
1458 AS2( psrlq xmm4, 3*16) \
1459 AS2( paddd xmm4, xmm6) \
1460 AS2( movq QWORD PTR [esp+4], xmm4)\
1462 #define SSE2_FirstMultiply(i) \
1463 AS2( movdqa xmm7, [esi+(i)*16])\
1464 AS2( movdqa xmm5, [edi-(i)*16])\
1465 AS2( pmuludq xmm5, xmm7) \
1466 AS2( movdqa xmm4, [ebx])\
1467 AS2( movdqa xmm6, xmm4) \
1468 AS2( pand xmm4, xmm5) \
1469 AS2( psrld xmm5, 16) \
1470 AS2( pmuludq xmm7, [edx-(i)*16])\
1471 AS2( pand xmm6, xmm7) \
1472 AS2( psrld xmm7, 16)
1474 #define Squ_Begin(n) \
1477 AS2( and esp, 0xfffffff0)\
1478 AS2( lea edi, [esp-32*n])\
1479 AS2( sub esp, 32*n+16)\
1481 AS2( mov esi, edi) \
1482 AS2( xor edx, edx) \
1484 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1485 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1486 AS2( movdqa [edi+2*edx], xmm0) \
1487 AS2( psrlq xmm0, 32) \
1488 AS2( movdqa [edi+2*edx+16], xmm0) \
1489 AS2( movdqa [edi+16*n+2*edx], xmm1) \
1490 AS2( psrlq xmm1, 32) \
1491 AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
1493 AS2( cmp edx, 8*(n)) \
1495 AS2( lea edx, [edi+16*n])\
1496 SSE2_FirstMultiply(0) \
1498 #define Squ_Acc(i) \
1500 AS2( movdqa xmm1, [esi+(i)*16]) \
1501 AS2( movdqa xmm0, [edi-(i)*16]) \
1502 AS2( movdqa xmm2, [ebx]) \
1503 AS2( pmuludq xmm0, xmm1) \
1504 AS2( pmuludq xmm1, [edx-(i)*16]) \
1505 AS2( movdqa xmm3, xmm2) \
1506 AS2( pand xmm2, xmm0) \
1507 AS2( psrld xmm0, 16) \
1508 AS2( paddd xmm4, xmm2) \
1509 AS2( paddd xmm5, xmm0) \
1510 AS2( pand xmm3, xmm1) \
1511 AS2( psrld xmm1, 16) \
1512 AS2( paddd xmm6, xmm3) \
1513 AS2( paddd xmm7, xmm1) \
1516 #define Squ_Acc2(i) ASC(call, LSqu##i)
1517 #define Squ_Acc3(i) Squ_Acc2(i)
1518 #define Squ_Acc4(i) Squ_Acc2(i)
1519 #define Squ_Acc5(i) Squ_Acc2(i)
1520 #define Squ_Acc6(i) Squ_Acc2(i)
1521 #define Squ_Acc7(i) Squ_Acc2(i)
1522 #define Squ_Acc8(i) Squ_Acc2(i)
1524 #define SSE2_End(E, n) \
1525 SSE2_SaveShift(2*(n)-3) \
1526 AS2( movdqa xmm7, [esi+16]) \
1527 AS2( movdqa xmm0, [edi]) \
1528 AS2( pmuludq xmm0, xmm7) \
1529 AS2( movdqa xmm2, [ebx]) \
1530 AS2( pmuludq xmm7, [edx]) \
1531 AS2( movdqa xmm6, xmm2) \
1532 AS2( pand xmm2, xmm0) \
1533 AS2( psrld xmm0, 16) \
1534 AS2( paddd xmm4, xmm2) \
1535 AS2( paddd xmm5, xmm0) \
1536 AS2( pand xmm6, xmm7) \
1537 AS2( psrld xmm7, 16) \
1538 SSE2_SaveShift(2*(n)-2) \
1539 SSE2_FinalSave(2*(n)-1) \
1543 #define Squ_End(n) SSE2_End(SquEpilogue, n)
1544 #define Mul_End(n) SSE2_End(MulEpilogue, n)
1545 #define Top_End(n) SSE2_End(TopEpilogue, n)
1547 #define Squ_Column1(k, i) \
1548 Squ_SSE2_SaveShift(k) \
1550 SSE2_FirstMultiply(1)\
1552 AS2( paddd xmm4, xmm4) \
1553 AS2( paddd xmm5, xmm5) \
1554 AS2( movdqa xmm3, [esi]) \
1555 AS2( movq xmm1, QWORD PTR [esi+8]) \
1556 AS2( pmuludq xmm1, xmm3) \
1557 AS2( pmuludq xmm3, xmm3) \
1558 AS2( movdqa xmm0, [ebx])\
1559 AS2( movdqa xmm2, xmm0) \
1560 AS2( pand xmm0, xmm1) \
1561 AS2( psrld xmm1, 16) \
1562 AS2( paddd xmm6, xmm0) \
1563 AS2( paddd xmm7, xmm1) \
1564 AS2( pand xmm2, xmm3) \
1565 AS2( psrld xmm3, 16) \
1566 AS2( paddd xmm6, xmm6) \
1567 AS2( paddd xmm7, xmm7) \
1568 AS2( paddd xmm4, xmm2) \
1569 AS2( paddd xmm5, xmm3) \
1570 AS2( movq xmm0, QWORD PTR [esp+4])\
1571 AS2( movq xmm1, QWORD PTR [esp+12])\
1572 AS2( paddd xmm4, xmm0)\
1573 AS2( paddd xmm5, xmm1)\
1575 #define Squ_Column0(k, i) \
1576 Squ_SSE2_SaveShift(k) \
1579 SSE2_FirstMultiply(1)\
1581 AS2( paddd xmm6, xmm6) \
1582 AS2( paddd xmm7, xmm7) \
1583 AS2( paddd xmm4, xmm4) \
1584 AS2( paddd xmm5, xmm5) \
1585 AS2( movq xmm0, QWORD PTR [esp+4])\
1586 AS2( movq xmm1, QWORD PTR [esp+12])\
1587 AS2( paddd xmm4, xmm0)\
1588 AS2( paddd xmm5, xmm1)\
1590 #define SSE2_MulAdd45 \
1591 AS2( movdqa xmm7, [esi]) \
1592 AS2( movdqa xmm0, [edi]) \
1593 AS2( pmuludq xmm0, xmm7) \
1594 AS2( movdqa xmm2, [ebx]) \
1595 AS2( pmuludq xmm7, [edx]) \
1596 AS2( movdqa xmm6, xmm2) \
1597 AS2( pand xmm2, xmm0) \
1598 AS2( psrld xmm0, 16) \
1599 AS2( paddd xmm4, xmm2) \
1600 AS2( paddd xmm5, xmm0) \
1601 AS2( pand xmm6, xmm7) \
1602 AS2( psrld xmm7, 16)
1604 #define Mul_Begin(n) \
1607 AS2( and esp, 0xfffffff0)\
1608 AS2( sub esp, 48*n+16)\
1610 AS2( xor edx, edx) \
1612 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1613 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1614 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1615 AS2( movdqa [esp+20+2*edx], xmm0) \
1616 AS2( psrlq xmm0, 32) \
1617 AS2( movdqa [esp+20+2*edx+16], xmm0) \
1618 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1619 AS2( psrlq xmm1, 32) \
1620 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1621 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1622 AS2( psrlq xmm2, 32) \
1623 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1625 AS2( cmp edx, 8*(n)) \
1627 AS2( lea edi, [esp+20])\
1628 AS2( lea edx, [esp+20+16*n])\
1629 AS2( lea esi, [esp+20+32*n])\
1630 SSE2_FirstMultiply(0) \
1632 #define Mul_Acc(i) \
1634 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1635 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1636 AS2( movdqa xmm2, [ebx]) \
1637 AS2( pmuludq xmm0, xmm1) \
1638 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1639 AS2( movdqa xmm3, xmm2) \
1640 AS2( pand xmm2, xmm0) \
1641 AS2( psrld xmm0, 16) \
1642 AS2( paddd xmm4, xmm2) \
1643 AS2( paddd xmm5, xmm0) \
1644 AS2( pand xmm3, xmm1) \
1645 AS2( psrld xmm1, 16) \
1646 AS2( paddd xmm6, xmm3) \
1647 AS2( paddd xmm7, xmm1) \
1650 #define Mul_Acc2(i) ASC(call, LMul##i)
1651 #define Mul_Acc3(i) Mul_Acc2(i)
1652 #define Mul_Acc4(i) Mul_Acc2(i)
1653 #define Mul_Acc5(i) Mul_Acc2(i)
1654 #define Mul_Acc6(i) Mul_Acc2(i)
1655 #define Mul_Acc7(i) Mul_Acc2(i)
1656 #define Mul_Acc8(i) Mul_Acc2(i)
1657 #define Mul_Acc9(i) Mul_Acc2(i)
1658 #define Mul_Acc10(i) Mul_Acc2(i)
1659 #define Mul_Acc11(i) Mul_Acc2(i)
1660 #define Mul_Acc12(i) Mul_Acc2(i)
1661 #define Mul_Acc13(i) Mul_Acc2(i)
1662 #define Mul_Acc14(i) Mul_Acc2(i)
1663 #define Mul_Acc15(i) Mul_Acc2(i)
1664 #define Mul_Acc16(i) Mul_Acc2(i)
1666 #define Mul_Column1(k, i) \
1672 #define Mul_Column0(k, i) \
1679 #define Bot_Acc(i) \
1680 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1681 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1682 AS2( pmuludq xmm0, xmm1) \
1683 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1684 AS2( paddq xmm4, xmm0) \
1685 AS2( paddd xmm6, xmm1)
1687 #define Bot_SaveAcc(k) \
1691 AS2( movdqa xmm6, [esi]) \
1692 AS2( movdqa xmm0, [edi]) \
1693 AS2( pmuludq xmm0, xmm6) \
1694 AS2( paddq xmm4, xmm0) \
1695 AS2( psllq xmm5, 16) \
1696 AS2( paddq xmm4, xmm5) \
1697 AS2( pmuludq xmm6, [edx])
1699 #define Bot_End(n) \
1700 AS2( movhlps xmm7, xmm6) \
1701 AS2( paddd xmm6, xmm7) \
1702 AS2( psllq xmm6, 32) \
1703 AS2( paddd xmm4, xmm6) \
1704 AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
1708 #define Top_Begin(n) \
1711 AS2( and esp, 0xfffffff0)\
1712 AS2( sub esp, 48*n+16)\
1714 AS2( xor edx, edx) \
1716 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1717 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1718 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1719 AS2( movdqa [esp+20+2*edx], xmm0) \
1720 AS2( psrlq xmm0, 32) \
1721 AS2( movdqa [esp+20+2*edx+16], xmm0) \
1722 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1723 AS2( psrlq xmm1, 32) \
1724 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1725 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1726 AS2( psrlq xmm2, 32) \
1727 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1729 AS2( cmp edx, 8*(n)) \
1731 AS2( mov eax, esi) \
1732 AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
1733 AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
1734 AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
1735 AS2( pxor xmm4, xmm4)\
1736 AS2( pxor xmm5, xmm5)
1738 #define Top_Acc(i) \
1739 AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
1740 AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1741 AS2( psrlq xmm0, 48) \
1742 AS2( paddd xmm5, xmm0)\
1744 #define Top_Column0(i) \
1745 AS2( psllq xmm5, 32) \
1751 #define Top_Column1(i) \
1757 AS2( movd xmm0, eax)\
1758 AS2( movd xmm1, [ecx+4])\
1759 AS2( psrld xmm1, 16)\
1760 AS2( pcmpgtd xmm1, xmm0)\
1761 AS2( psrld xmm1, 31)\
1762 AS2( paddd xmm4, xmm1)\
1764 void SSE2_Square4(word *C,
const word *A)
1771 void SSE2_Square8(word *C, const word *A)
1787 void SSE2_Square16(word *C, const word *A)
1792 Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1811 void SSE2_Square32(word *C, const word *A)
1815 Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1849 void SSE2_Multiply4(word *C, const word *A, const word *B)
1861 void SSE2_Multiply8(word *C, const word *A, const word *B)
1866 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1877 void SSE2_Multiply16(word *C, const word *A, const word *B)
1882 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1901 void SSE2_Multiply32(word *C, const word *A, const word *B)
1905 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1939 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
1942 Bot_SaveAcc(0) Bot_Acc(2)
1946 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
1951 Mul_Acc(3) Mul_Acc(2)
1956 Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1960 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
1965 Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1974 Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1978 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
1983 Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2000 Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2004 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
2007 Top_Acc(3) Top_Acc(2) Top_Acc(1)
2010 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2019 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
2022 Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2025 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2038 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
2041 Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2044 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2065 #endif // #if CRYPTOPP_INTEGER_SSE2
2069 typedef int (CRYPTOPP_FASTCALL * PAdd)(
size_t N, word *C,
const word *A,
const word *B);
2070 typedef void (* PMul)(word *C,
const word *A,
const word *B);
2071 typedef void (* PSqu)(word *C,
const word *A);
2072 typedef void (* PMulTop)(word *C,
const word *A,
const word *B, word L);
2074 #if CRYPTOPP_INTEGER_SSE2
2075 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
2076 static size_t s_recursionLimit = 8;
2078 static const size_t s_recursionLimit = 16;
2081 static PMul s_pMul[9], s_pBot[9];
2082 static PSqu s_pSqu[9];
2083 static PMulTop s_pTop[9];
2085 static void SetFunctionPointers()
2087 s_pMul[0] = &Baseline_Multiply2;
2088 s_pBot[0] = &Baseline_MultiplyBottom2;
2089 s_pSqu[0] = &Baseline_Square2;
2090 s_pTop[0] = &Baseline_MultiplyTop2;
2091 s_pTop[1] = &Baseline_MultiplyTop4;
2093 #if CRYPTOPP_INTEGER_SSE2
2096 #if _MSC_VER != 1200 || !(CRYPTOPP_DEBUG)
2104 s_recursionLimit = 32;
2106 s_pMul[1] = &SSE2_Multiply4;
2107 s_pMul[2] = &SSE2_Multiply8;
2108 s_pMul[4] = &SSE2_Multiply16;
2109 s_pMul[8] = &SSE2_Multiply32;
2111 s_pBot[1] = &SSE2_MultiplyBottom4;
2112 s_pBot[2] = &SSE2_MultiplyBottom8;
2113 s_pBot[4] = &SSE2_MultiplyBottom16;
2114 s_pBot[8] = &SSE2_MultiplyBottom32;
2116 s_pSqu[1] = &SSE2_Square4;
2117 s_pSqu[2] = &SSE2_Square8;
2118 s_pSqu[4] = &SSE2_Square16;
2119 s_pSqu[8] = &SSE2_Square32;
2121 s_pTop[2] = &SSE2_MultiplyTop8;
2122 s_pTop[4] = &SSE2_MultiplyTop16;
2123 s_pTop[8] = &SSE2_MultiplyTop32;
2128 s_pMul[1] = &Baseline_Multiply4;
2129 s_pMul[2] = &Baseline_Multiply8;
2131 s_pBot[1] = &Baseline_MultiplyBottom4;
2132 s_pBot[2] = &Baseline_MultiplyBottom8;
2134 s_pSqu[1] = &Baseline_Square4;
2135 s_pSqu[2] = &Baseline_Square8;
2137 s_pTop[2] = &Baseline_MultiplyTop8;
2139 #if !CRYPTOPP_INTEGER_SSE2
2140 s_pMul[4] = &Baseline_Multiply16;
2141 s_pBot[4] = &Baseline_MultiplyBottom16;
2142 s_pSqu[4] = &Baseline_Square16;
2143 s_pTop[4] = &Baseline_MultiplyTop16;
2148 inline int Add(word *C,
const word *A,
const word *B,
size_t N)
2150 #if CRYPTOPP_INTEGER_SSE2
2151 return s_pAdd(N, C, A, B);
2153 return Baseline_Add(N, C, A, B);
2157 inline int Subtract(word *C,
const word *A,
const word *B,
size_t N)
2159 #if CRYPTOPP_INTEGER_SSE2
2160 return s_pSub(N, C, A, B);
2162 return Baseline_Sub(N, C, A, B);
2189 void RecursiveMultiply(word *R, word *T,
const word *A,
const word *B,
size_t N)
2193 if (N <= s_recursionLimit)
2194 s_pMul[N/4](R, A, B);
2197 const size_t N2 = N/2;
2199 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2200 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2202 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2203 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2205 RecursiveMultiply(R2, T2, A1, B1, N2);
2206 RecursiveMultiply(T0, T2, R0, R1, N2);
2207 RecursiveMultiply(R0, T2, A0, B0, N2);
2211 int c2 = Add(R2, R2, R1, N2);
2213 c2 += Add(R1, R2, R0, N2);
2214 c3 += Add(R2, R2, R3, N2);
2217 c3 -= Subtract(R1, R1, T0, N);
2219 c3 += Add(R1, R1, T0, N);
2221 c3 += Increment(R2, N2, c2);
2223 Increment(R3, N2, c3);
2231 void RecursiveSquare(word *R, word *T,
const word *A,
size_t N)
2235 if (N <= s_recursionLimit)
2239 const size_t N2 = N/2;
2241 RecursiveSquare(R0, T2, A0, N2);
2242 RecursiveSquare(R2, T2, A1, N2);
2243 RecursiveMultiply(T0, T2, A0, A1, N2);
2245 int carry = Add(R1, R1, T0, N);
2246 carry += Add(R1, R1, T0, N);
2247 Increment(R3, N2, carry);
2256 void RecursiveMultiplyBottom(word *R, word *T,
const word *A,
const word *B,
size_t N)
2260 if (N <= s_recursionLimit)
2261 s_pBot[N/4](R, A, B);
2264 const size_t N2 = N/2;
2266 RecursiveMultiply(R, T, A0, B0, N2);
2267 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
2268 Add(R1, R1, T0, N2);
2269 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
2270 Add(R1, R1, T0, N2);
2280 void MultiplyTop(word *R, word *T,
const word *L,
const word *A,
const word *B,
size_t N)
2284 if (N <= s_recursionLimit)
2285 s_pTop[N/4](R, A, B, L[N-1]);
2288 const size_t N2 = N/2;
2290 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2291 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2293 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2294 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2296 RecursiveMultiply(T0, T2, R0, R1, N2);
2297 RecursiveMultiply(R0, T2, A1, B1, N2);
2302 int c2 = Subtract(T2, L+N2, L, N2);
2306 c2 -= Add(T2, T2, T0, N2);
2307 t = (Compare(T2, R0, N2) == -1);
2308 c3 = t - Subtract(T2, T2, T1, N2);
2312 c2 += Subtract(T2, T2, T0, N2);
2313 t = (Compare(T2, R0, N2) == -1);
2314 c3 = t + Add(T2, T2, T1, N2);
2319 c3 += Increment(T2, N2, c2);
2321 c3 -= Decrement(T2, N2, -c2);
2322 c3 += Add(R0, T2, R1, N2);
2325 Increment(R1, N2, c3);
2329 inline void Multiply(word *R, word *T,
const word *A,
const word *B,
size_t N)
2331 RecursiveMultiply(R, T, A, B, N);
2334 inline void Square(word *R, word *T,
const word *A,
size_t N)
2336 RecursiveSquare(R, T, A, N);
2339 inline void MultiplyBottom(word *R, word *T,
const word *A,
const word *B,
size_t N)
2341 RecursiveMultiplyBottom(R, T, A, B, N);
2349 void AsymmetricMultiply(word *R, word *T,
const word *A,
size_t NA,
const word *B,
size_t NB)
2356 Multiply(R, T, A, B, NA);
2378 R[NB] = LinearMultiply(R, B, A[0], NB);
2382 SetWords(R, 0, NB+2);
2385 CopyWords(R, B, NB);
2386 R[NB] = R[NB+1] = 0;
2394 Multiply(R, T, A, B, NA);
2395 CopyWords(T+2*NA, R+NA, NA);
2397 for (i=2*NA; i<NB; i+=2*NA)
2398 Multiply(T+NA+i, T, A, B+i, NA);
2399 for (i=NA; i<NB; i+=2*NA)
2400 Multiply(R+i, T, A, B+i, NA);
2404 for (i=0; i<NB; i+=2*NA)
2405 Multiply(R+i, T, A, B+i, NA);
2406 for (i=NA; i<NB; i+=2*NA)
2407 Multiply(T+NA+i, T, A, B+i, NA);
2410 if (Add(R+NA, R+NA, T+2*NA, NB-NA))
2411 Increment(R+NB, NA);
2418 void RecursiveInverseModPower2(word *R, word *T,
const word *A,
size_t N)
2424 const size_t N2 = N/2;
2425 RecursiveInverseModPower2(R0, T0, A0, N2);
2427 SetWords(T0+1, 0, N2-1);
2428 MultiplyTop(R1, T1, T0, R0, A0, N2);
2429 MultiplyBottom(T0, T1, R0, A1, N2);
2430 Add(T0, R1, T0, N2);
2431 TwosComplement(T0, N2);
2432 MultiplyBottom(R1, T1, R0, T0, N2);
2436 T[0] = AtomicInverseModPower2(A[0]);
2438 s_pBot[0](T+2, T, A);
2439 TwosComplement(T+2, 2);
2440 Increment(T+2, 2, 2);
2441 s_pBot[0](R, T, T+2);
2451 void MontgomeryReduce(word *R, word *T, word *X,
const word *M,
const word *U,
size_t N)
2454 MultiplyBottom(R, T, X, U, N);
2455 MultiplyTop(T, T+N, X, R, M, N);
2456 word borrow = Subtract(T, X+N, T, N);
2458 word carry = Add(T+N, T, M, N);
2460 CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
2461 CopyWords(R, T + ((0-borrow) & N), N);
2463 const word u = 0-U[0];
2465 for (
size_t i=0; i<N; i++)
2467 const word t = u * X[i];
2469 for (
size_t j=0; j<N; j+=2)
2471 MultiplyWords(p, t, M[j]);
2472 Acc2WordsBy1(p, X[i+j]);
2474 X[i+j] = LowWord(p);
2476 MultiplyWords(p, t, M[j+1]);
2477 Acc2WordsBy1(p, X[i+j+1]);
2479 X[i+j+1] = LowWord(p);
2483 if (Increment(X+N+i, N-i, c))
2484 while (!Subtract(X+N, X+N, M, N)) {}
2487 memcpy(R, X+N, N*WORD_SIZE);
2489 __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
2490 for (
size_t i=0; i<N; i++)
2492 __m64 t = _mm_cvtsi32_si64(X[i]);
2493 t = _mm_mul_su32(t, u);
2494 __m64 c = _mm_setzero_si64();
2495 for (
size_t j=0; j<N; j+=2)
2497 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
2498 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
2499 c = _mm_add_si64(c, p);
2500 X[i+j] = _mm_cvtsi64_si32(c);
2501 c = _mm_srli_si64(c, 32);
2502 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
2503 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
2504 c = _mm_add_si64(c, p);
2505 X[i+j+1] = _mm_cvtsi64_si32(c);
2506 c = _mm_srli_si64(c, 32);
2509 if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
2510 while (!Subtract(X+N, X+N, M, N)) {}
2513 memcpy(R, X+N, N*WORD_SIZE);
2525 void HalfMontgomeryReduce(word *R, word *T,
const word *X,
const word *M,
const word *U,
const word *V,
size_t N)
2539 const size_t N2 = N/2;
2540 Multiply(T0, T2, V0, X3, N2);
2541 int c2 = Add(T0, T0, X0, N);
2542 MultiplyBottom(T3, T2, T0, U, N2);
2543 MultiplyTop(T2, R, T0, T3, M0, N2);
2544 c2 -= Subtract(T2, T1, T2, N2);
2545 Multiply(T0, R, T3, M1, N2);
2546 c2 -= Subtract(T0, T2, T0, N2);
2547 int c3 = -(int)Subtract(T1, X2, T1, N2);
2548 Multiply(R0, T2, V1, X3, N2);
2549 c3 += Add(R, R, T, N);
2552 c3 += Increment(R1, N2);
2554 c3 -= Decrement(R1, N2, -c2);
2558 Subtract(R, R, M, N);
2652 static inline void AtomicDivide(word *Q,
const word *A,
const word *B)
2655 DWord q = DivideFourWordsByTwo<word, DWord>(T,
DWord(A[0], A[1]),
DWord(A[2], A[3]),
DWord(B[0], B[1]));
2656 Q[0] = q.GetLowHalf();
2657 Q[1] = q.GetHighHalf();
2663 CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2673 static void CorrectQuotientEstimate(word *R, word *T, word *Q,
const word *B,
size_t N)
2677 AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
2679 word borrow = Subtract(R, R, T, N+2);
2681 CRYPTOPP_UNUSED(borrow);
2683 while (R[N] || Compare(R, B, N) >= 0)
2685 R[N] -= Subtract(R, R, B, N);
2686 Q[1] += (++Q[0]==0);
2697 void Divide(word *R, word *Q, word *T,
const word *A,
size_t NA,
const word *B,
size_t NB)
2705 word *
const TB=T+NA+2;
2706 word *
const TP=T+NA+2+NB;
2709 unsigned shiftWords = (B[NB-1]==0);
2710 TB[0] = TB[NB-1] = 0;
2711 CopyWords(TB+shiftWords, B, NB-shiftWords);
2712 unsigned shiftBits = WORD_BITS -
BitPrecision(TB[NB-1]);
2714 ShiftWordsLeftByBits(TB, NB, shiftBits);
2717 TA[0] = TA[NA] = TA[NA+1] = 0;
2718 CopyWords(TA+shiftWords, A, NA);
2719 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
2721 if (TA[NA+1]==0 && TA[NA] <= 1)
2723 Q[NA-NB+1] = Q[NA-NB] = 0;
2724 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
2726 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
2737 BT[0] = TB[NB-2] + 1;
2738 BT[1] = TB[NB-1] + (BT[0]==0);
2741 for (
size_t i=NA-2; i>=NB; i-=2)
2743 AtomicDivide(Q+i-NB, TA+i-2, BT);
2744 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
2748 CopyWords(R, TA+shiftWords, NB);
2749 ShiftWordsRightByBits(R, NB, shiftBits);
2752 static inline size_t EvenWordCount(
const word *X,
size_t N)
2754 while (N && X[N-2]==0 && X[N-1]==0)
2765 unsigned int AlmostInverse(word *R, word *T,
const word *A,
size_t NA,
const word *M,
size_t N)
2773 size_t bcLen=2, fgLen=EvenWordCount(M, N);
2777 SetWords(T, 0, 3*N);
2779 CopyWords(f, A, NA);
2787 if (EvenWordCount(f, fgLen)==0)
2793 ShiftWordsRightByWords(f, fgLen, 1);
2794 bcLen += 2 * (c[bcLen-1] != 0);
2796 ShiftWordsLeftByWords(c, bcLen, 1);
2806 if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
2809 Subtract(R, M, b, N);
2815 ShiftWordsRightByBits(f, fgLen, i);
2816 t = ShiftWordsLeftByBits(c, bcLen, i);
2818 bcLen += 2 * (t!=0);
2821 bool swap = Compare(f, g, fgLen)==-1;
2826 fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
2828 Subtract(f, f, g, fgLen);
2829 t = Add(b, b, c, bcLen);
2840 void DivideByPower2Mod(word *R,
const word *A,
size_t k,
const word *M,
size_t N)
2847 ShiftWordsRightByBits(R, N, 1);
2850 word carry = Add(R, R, M, N);
2851 ShiftWordsRightByBits(R, N, 1);
2852 R[N-1] += carry<<(WORD_BITS-1);
2861 void MultiplyByPower2Mod(word *R,
const word *A,
size_t k,
const word *M,
size_t N)
2866 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
2867 Subtract(R, R, M, N);
2872 InitializeInteger::InitializeInteger()
2874 if (!g_pAssignIntToInteger)
2876 SetFunctionPointers();
2877 g_pAssignIntToInteger = (CryptoPP::PAssignIntToInteger)AssignIntToInteger;
2881 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
2883 static inline size_t RoundupSize(
size_t n)
2886 return RoundupSizeTable[n];
2898 : reg(2), sign(POSITIVE)
2900 reg[0] = reg[1] = 0;
2904 : reg(RoundupSize(t.WordCount())), sign(t.sign)
2906 CopyWords(reg, t.reg, reg.
size());
2912 reg[0] = word(value);
2926 reg[0] = word(value);
2942 unsigned long value = (
unsigned long)reg[0];
2946 return (
signed long)value >= 0;
2948 return -(
signed long)value < 0;
2955 unsigned long value = (
unsigned long)reg[0];
2957 return sign==
POSITIVE ? value : -(
signed long)value;
2967 encodedInteger.
Get(block, block.
size());
2974 Decode(encodedInteger, byteCount, s);
2984 #if (CRYPTOPP_MSC_VERSION >= 1400)
2985 std::reverse_copy(encodedInteger, encodedInteger+byteCount,
2986 stdext::make_checked_array_iterator(block.
begin(), block.
size()));
2988 std::reverse_copy(encodedInteger, encodedInteger+byteCount, block.
begin());
2994 Decode(encodedInteger, byteCount, s);
3009 if (!
Randomize(rng, min, max, rnType, equiv, mod))
3053 bool Integer::operator!()
const
3062 if (reg.
size() != t.reg.
size() || t.reg[t.reg.
size()/2] == 0)
3064 CopyWords(reg, t.reg, reg.
size());
3074 if (n/WORD_BITS < reg.
size())
3075 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
3085 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
3089 if (n/WORD_BITS < reg.
size())
3090 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
3098 if (n/WORD_SIZE < reg.
size())
3099 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
3107 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
3108 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
3115 for (
unsigned int j=0; j<n; j++)
3116 v |= lword(
GetBit(i+j)) << j;
3120 Integer Integer::operator-()
const
3127 Integer Integer::AbsoluteValue()
const
3137 std::swap(sign, a.sign);
3141 : reg(RoundupSize(length)), sign(POSITIVE)
3144 SetWords(reg+1, 0, reg.
size()-1);
3152 int radix, sign = 1;
3155 unsigned int length;
3156 for (length = 0; str[length] != 0; length++) {}
3163 switch (str[length-1])
3185 str += 1, length -= 1;
3188 if (length > 2 && str[0] ==
'0' && (str[1] ==
'x' || str[1] ==
'X'))
3191 str += 2, length -= 2;
3196 for (
unsigned int i=0; i<length; i++)
3198 int digit, ch =
static_cast<int>(str[i]);
3202 if (ch >=
'0' && ch <=
'9')
3204 else if (ch >=
'a' && ch <=
'f')
3205 digit = ch -
'a' + 10;
3206 else if (ch >=
'A' && ch <=
'F')
3207 digit = ch -
'A' + 10;
3221 unsigned int nh = 0, nl = 0, nc = 0;
3224 for (
unsigned int i=0; i<length; i++)
3226 int digit, ch =
static_cast<int>(str[i]);
3228 if (ch >=
'0' && ch <=
'9')
3230 else if (ch >=
'a' && ch <=
'f')
3231 digit = ch -
'a' + 10;
3232 else if (ch >=
'A' && ch <=
'F')
3233 digit = ch -
'A' + 10;
3246 v += position * (nh << 4 | nl);
3247 nc = 0, position <<= 8;
3257 for (
int i=
static_cast<int>(length)-1; i>=0; i--)
3259 int digit, ch =
static_cast<int>(str[i]);
3261 if (ch >=
'0' && ch <=
'9')
3263 else if (ch >=
'a' && ch <=
'f')
3264 digit = ch -
'a' + 10;
3265 else if (ch >=
'A' && ch <=
'F')
3266 digit = ch -
'A' + 10;
3285 : reg(2), sign(POSITIVE)
3287 *
this = StringToInteger(str,order);
3291 : reg(2), sign(POSITIVE)
3293 *
this = StringToInteger(str,order);
3298 return (
unsigned int)CountWords(reg, reg.
size());
3305 return (wordCount-1)*WORD_SIZE +
BytePrecision(reg[wordCount-1]);
3314 return (wordCount-1)*WORD_BITS +
BitPrecision(reg[wordCount-1]);
3322 Decode(store, inputLen, s);
3333 while (inputLen>0 && (sign==
POSITIVE ? b==0 : b==0xff))
3341 const size_t size = RoundupSize(
BytesToWords(inputLen));
3345 for (
size_t i=inputLen; i > 0; i--)
3348 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
3353 for (
size_t i=inputLen; i<reg.
size()*WORD_SIZE; i++)
3354 reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
3355 TwosComplement(reg, reg.
size());
3364 const bool pre = (signedness ==
UNSIGNED);
3378 Encode(sink, outputLen, signedness);
3385 for (
size_t i=outputLen; i > 0; i--)
3412 if (!dec.IsDefiniteLength() || dec.
MaxRetrievable() < dec.RemainingLength())
3428 if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
3442 word16 bitCount = word16(
BitCount());
3446 return 2 + byteCount;
3465 const size_t nbytes = nbits/8 + 1;
3469 buf[0] = (byte)
Crop(buf[0], nbits % 8);
3479 const unsigned int nbits = range.
BitCount();
3485 while (*
this > range);
3492 return GenerateRandomNoThrow(rng,
MakeParameters(
"Min", min)(
"Max", max)(
"RandomNumberType", rnType)(
"EquivalentTo", equiv)(
"Mod", mod));
3498 KDF2_RNG(
const byte *seed,
size_t seedSize)
3499 : m_counter(0), m_counterAndSeed(seedSize + 4)
3501 memcpy(m_counterAndSeed + 4, seed, seedSize);
3535 throw InvalidArgument(
"Integer: invalid EquivalentTo and/or Mod argument");
3541 if (params.
GetValue(Name::Seed(), seed))
3554 bq.
Get(finalSeed, finalSeed.size());
3555 kdf2Rng.reset(
new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
3566 Integer min1 = min + (equiv-min)%mod;
3587 if (
FirstPrime(first, max, equiv, mod, pSelector))
3591 if (!
FirstPrime(first, max, equiv, mod, pSelector))
3599 if (
FirstPrime(*
this,
STDMIN(*
this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
3609 std::istream& operator>>(std::istream& in,
Integer &a)
3612 unsigned int length = 0;
3621 if (length >= str.
size())
3622 str.
Grow(length + 16);
3624 while (in && (c==
'-' || c==
'x' || (c>=
'0' && c<=
'9') || (c>=
'a' && c<=
'f') || (c>=
'A' && c<=
'F') || c==
'h' || c==
'H' || c==
'o' || c==
'O' || c==
',' || c==
'.'));
3628 str[length-1] =
'\0';
3634 std::ostream& operator<<(std::ostream& out,
const Integer &a)
3637 const long f = out.flags() & std::ios::basefield;
3642 case std::ios::oct :
3647 case std::ios::hex :
3669 static const char upper[]=
"0123456789ABCDEF";
3670 static const char lower[]=
"0123456789abcdef";
3672 const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
3691 #ifdef CRYPTOPP_USE_STD_SHOWBASE
3692 if (out.flags() & std::ios_base::showbase)
3697 return out << suffix;
3701 Integer& Integer::operator++()
3705 if (Increment(reg, reg.
size()))
3708 reg[reg.
size()/2]=1;
3713 word borrow = Decrement(reg, reg.
size());
3715 CRYPTOPP_UNUSED(borrow);
3723 Integer& Integer::operator--()
3727 if (Increment(reg, reg.
size()))
3730 reg[reg.
size()/2]=1;
3735 if (Decrement(reg, reg.
size()))
3745 int carry;
const bool pre = (a.reg.size() == b.reg.size());
3746 if (!pre && a.reg.size() > b.reg.size())
3748 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
3749 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
3750 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
3754 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3758 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3759 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
3760 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
3766 sum.reg[sum.reg.
size()/2] = 1;
3773 unsigned aSize = a.WordCount();
3775 unsigned bSize = b.WordCount();
3782 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
3783 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
3784 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
3788 else if (aSize == bSize)
3790 if (Compare(a.reg, b.reg, aSize) >= 0)
3792 Subtract(diff.reg, a.reg, b.reg, aSize);
3797 Subtract(diff.reg, b.reg, a.reg, aSize);
3803 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
3804 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
3805 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
3812 template <
class T>
inline const T& STDMAX2(
const T& a,
const T& b)
3814 return a < b ? b : a;
3819 Integer sum((word)0, STDMAX2(reg.
size(), b.reg.size()));
3822 if (b.NotNegative())
3823 PositiveAdd(sum, *
this, b);
3825 PositiveSubtract(sum, *
this, b);
3829 if (b.NotNegative())
3830 PositiveSubtract(sum, b, *
this);
3833 PositiveAdd(sum, *
this, b);
3846 PositiveAdd(*
this, *
this, t);
3848 PositiveSubtract(*
this, *
this, t);
3853 PositiveSubtract(*
this, t, *
this);
3856 PositiveAdd(*
this, *
this, t);
3865 Integer diff((word)0, STDMAX2(reg.
size(), b.reg.size()));
3868 if (b.NotNegative())
3869 PositiveSubtract(diff, *
this, b);
3871 PositiveAdd(diff, *
this, b);
3875 if (b.NotNegative())
3877 PositiveAdd(diff, *
this, b);
3881 PositiveSubtract(diff, b, *
this);
3892 PositiveSubtract(*
this, *
this, t);
3894 PositiveAdd(*
this, *
this, t);
3900 PositiveAdd(*
this, *
this, t);
3904 PositiveSubtract(*
this, t, *
this);
3909 Integer& Integer::operator<<=(
size_t n)
3912 const size_t shiftWords = n / WORD_BITS;
3913 const unsigned int shiftBits = (
unsigned int)(n % WORD_BITS);
3916 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
3917 ShiftWordsLeftByBits(reg+shiftWords, wordCount+
BitsToWords(shiftBits), shiftBits);
3921 Integer& Integer::operator>>=(
size_t n)
3924 const size_t shiftWords = n / WORD_BITS;
3925 const unsigned int shiftBits = (
unsigned int)(n % WORD_BITS);
3927 ShiftWordsRightByWords(reg, wordCount, shiftWords);
3928 if (wordCount > shiftWords)
3929 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
3937 size_t aSize = RoundupSize(a.WordCount());
3938 size_t bSize = RoundupSize(b.WordCount());
3940 product.reg.
CleanNew(RoundupSize(aSize+bSize));
3944 AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
3949 PositiveMultiply(product, a, b);
3951 if (a.NotNegative() != b.NotNegative())
3958 Multiply(product, *
this, b);
3987 unsigned aSize = a.WordCount();
3988 unsigned bSize = b.WordCount();
4004 remainder.reg.
CleanNew(RoundupSize(bSize));
4006 quotient.reg.
CleanNew(RoundupSize(aSize-bSize+2));
4010 Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
4015 PositiveDivide(remainder, quotient, dividend, divisor);
4023 remainder = divisor.AbsoluteValue() - remainder;
4037 if (wordCount <= a.WordCount())
4039 r.reg.
resize(RoundupSize(wordCount));
4040 CopyWords(r.reg, a.reg, wordCount);
4041 SetWords(r.reg+wordCount, 0, r.reg.
size()-wordCount);
4042 if (n % WORD_BITS != 0)
4043 r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
4047 r.reg.
resize(RoundupSize(a.WordCount()));
4048 CopyWords(r.reg, a.reg, r.reg.
size());
4052 if (a.IsNegative() && r.
NotZero())
4078 if ((divisor & (divisor-1)) == 0)
4081 remainder = dividend.reg[0] & (divisor-1);
4086 quotient.reg.
CleanNew(RoundupSize(i));
4090 quotient.reg[i] =
DWord(dividend.reg[i], remainder) / divisor;
4091 remainder =
DWord(dividend.reg[i], remainder) % divisor;
4102 remainder = divisor - remainder;
4107 Integer Integer::DividedBy(word b)
const
4124 if ((divisor & (divisor-1)) != 0)
4133 remainder =
DWord(reg[i], remainder) % divisor;
4140 remainder = sum % divisor;
4145 remainder = reg[0] & (divisor-1);
4149 remainder = divisor - remainder;
4157 sign =
Sign(1-sign);
4160 int Integer::PositiveCompare(
const Integer& t)
const
4166 return size > tSize ? 1 : -1;
4168 return CryptoPP::Compare(reg, t.reg, size);
4176 return PositiveCompare(t);
4185 return -PositiveCompare(t);
4201 y = (x + *
this/x) >> 1;
4215 return (
WordCount() == 1) && (reg[0] == 1);
4231 return mr.Exponentiate(x, e);
4254 return !u ?
Zero() : (m*(*
this-u)+1)/(*this);
4259 unsigned k = AlmostInverse(r.reg, T, reg, reg.
size(), m.reg, m.reg.
size());
4260 DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.
size());
4266 word g0 = mod, g1 = *
this % mod;
4267 word v0 = 0, v1 = 1;
4295 if (oid != ASN1::prime_field())
4305 ASN1::prime_field().DEREncode(seq);
4322 if (a.reg.size()==m_modulus.reg.
size())
4324 CryptoPP::DivideByPower2Mod(m_result.reg.
begin(), a.reg, 1, m_modulus.reg, a.reg.size());
4328 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
4333 if (a.reg.size()==m_modulus.reg.
size() && b.reg.size()==m_modulus.reg.
size())
4335 if (CryptoPP::Add(m_result.reg.
begin(), a.reg, b.reg, a.reg.size())
4336 || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
4338 CryptoPP::Subtract(m_result.reg.
begin(), m_result.reg, m_modulus.reg, a.reg.size());
4345 if (m_result1 >= m_modulus)
4346 m_result1 -= m_modulus;
4353 if (a.reg.size()==m_modulus.reg.
size() && b.reg.size()==m_modulus.reg.
size())
4355 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
4356 || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
4358 CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
4373 if (a.reg.size()==m_modulus.reg.
size() && b.reg.size()==m_modulus.reg.
size())
4375 if (CryptoPP::Subtract(m_result.reg.
begin(), a.reg, b.reg, a.reg.size()))
4376 CryptoPP::Add(m_result.reg.
begin(), m_result.reg, m_modulus.reg, a.reg.size());
4383 m_result1 += m_modulus;
4390 if (a.reg.size()==m_modulus.reg.
size() && b.reg.size()==m_modulus.reg.
size())
4392 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
4393 CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
4410 CopyWords(m_result.reg.
begin(), m_modulus.reg, m_modulus.reg.
size());
4411 if (CryptoPP::Subtract(m_result.reg.
begin(), m_result.reg, a.reg, a.reg.size()))
4412 Decrement(m_result.reg.
begin()+a.reg.size(), m_modulus.reg.
size()-a.reg.size());
4419 if (m_modulus.
IsOdd())
4430 if (m_modulus.
IsOdd())
4434 for (
unsigned int i=0; i<exponentsCount; i++)
4443 m_u((word)0, m_modulus.reg.size()),
4444 m_workspace(5*m_modulus.reg.size())
4446 if (!m_modulus.IsOdd())
4447 throw InvalidArgument(
"MontgomeryRepresentation: Montgomery representation requires an odd modulus");
4449 RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.
size());
4454 word *
const T = m_workspace.
begin();
4455 word *
const R = m_result.reg.begin();
4456 const size_t N = m_modulus.reg.size();
4459 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
4460 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
4461 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4467 word *
const T = m_workspace.
begin();
4468 word *
const R = m_result.reg.begin();
4469 const size_t N = m_modulus.reg.size();
4472 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
4473 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
4474 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4480 word *
const T = m_workspace.
begin();
4481 word *
const R = m_result.reg.begin();
4482 const size_t N = m_modulus.reg.size();
4485 CopyWords(T, a.reg, a.reg.size());
4486 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4487 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4494 word *
const T = m_workspace.
begin();
4495 word *
const R = m_result.reg.begin();
4496 const size_t N = m_modulus.reg.size();
4499 CopyWords(T, a.reg, a.reg.size());
4500 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4501 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4502 unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
4507 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
4509 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
4516 template <> CRYPTOPP_DLL
4520 static const unsigned int BIT_32 = (1U << 31);
4521 const bool UPPER = !!(base & BIT_32);
4522 static const unsigned int BIT_31 = (1U << 30);
4523 const bool BASE = !!(base & BIT_31);
4525 const char CH = UPPER ?
'A' :
'a';
4526 base &= ~(BIT_32|BIT_31);
4532 bool negative =
false, zero =
false;
4550 s[i++]=char((digit < 10 ?
'0' : (CH - 10)) + digit);
4555 result.reserve(i+2);
4570 else if (base == 16)
4582 template <> CRYPTOPP_DLL
4586 static const unsigned int HIGH_BIT = (1U << 31);
4587 const char CH = !!(base & HIGH_BIT) ?
'A' :
'a';
4597 word64 digit = value % base;
4598 result = char((digit < 10 ?
'0' : (CH - 10)) + digit) + result;