105 const std::vector<Fr*>& root_table)
107 auto scratch_space_ptr = get_scratch_space<Fr>(domain.
size);
108 auto scratch_space = scratch_space_ptr.get();
110 const size_t num_polys = coeffs.size();
112 const size_t poly_size = domain.
size / num_polys;
114 const size_t poly_mask = poly_size - 1;
115 const size_t log2_poly_size = (size_t)numeric::get_msb(poly_size);
120 for (size_t i = (j * domain.thread_size); i < ((j + 1) * domain.thread_size); i += 2) {
121 uint32_t next_index_1 = (uint32_t)reverse_bits((uint32_t)i + 2, (uint32_t)domain.log2_size);
122 uint32_t next_index_2 = (uint32_t)reverse_bits((uint32_t)i + 3, (uint32_t)domain.log2_size);
123 __builtin_prefetch(&coeffs[next_index_1]);
124 __builtin_prefetch(&coeffs[next_index_2]);
126 uint32_t swap_index_1 = (uint32_t)reverse_bits((uint32_t)i, (uint32_t)domain.log2_size);
127 uint32_t swap_index_2 = (uint32_t)reverse_bits((uint32_t)i + 1, (uint32_t)domain.log2_size);
129 size_t poly_idx_1 = swap_index_1 >> log2_poly_size;
130 size_t elem_idx_1 = swap_index_1 & poly_mask;
131 size_t poly_idx_2 = swap_index_2 >> log2_poly_size;
132 size_t elem_idx_2 = swap_index_2 & poly_mask;
134 Fr::__copy(coeffs[poly_idx_1][elem_idx_1], temp_1);
135 Fr::__copy(coeffs[poly_idx_2][elem_idx_2], temp_2);
136 scratch_space[i + 1] = temp_1 - temp_2;
137 scratch_space[i] = temp_1 + temp_2;
143 if (domain.size <= 2) {
144 coeffs[0][0] = scratch_space[0];
145 coeffs[0][1] = scratch_space[1];
149 for (
size_t m = 2; m < (domain.size); m <<= 1) {
150 parallel_for(domain.num_threads, [&](
size_t j) {
160 const size_t start = j * (domain.thread_size >> 1);
161 const size_t end = (j + 1) * (domain.thread_size >> 1);
181 const size_t block_mask = m - 1;
191 const size_t index_mask = ~block_mask;
196 const Fr* round_roots = root_table[static_cast<size_t>(numeric::get_msb(m)) - 1];
201 if (m != (domain.size >> 1)) {
202 for (size_t i = start; i < end; ++i) {
203 size_t k1 = (i & index_mask) << 1;
204 size_t j1 = i & block_mask;
205 temp = round_roots[j1] * scratch_space[k1 + j1 + m];
206 scratch_space[k1 + j1 + m] = scratch_space[k1 + j1] - temp;
207 scratch_space[k1 + j1] += temp;
210 for (size_t i = start; i < end; ++i) {
211 size_t k1 = (i & index_mask) << 1;
212 size_t j1 = i & block_mask;
214 size_t poly_idx_1 = (k1 + j1) >> log2_poly_size;
215 size_t elem_idx_1 = (k1 + j1) & poly_mask;
216 size_t poly_idx_2 = (k1 + j1 + m) >> log2_poly_size;
217 size_t elem_idx_2 = (k1 + j1 + m) & poly_mask;
219 temp = round_roots[j1] * scratch_space[k1 + j1 + m];
220 coeffs[poly_idx_2][elem_idx_2] = scratch_space[k1 + j1] - temp;
221 coeffs[poly_idx_1][elem_idx_1] = scratch_space[k1 + j1] + temp;
236 for (size_t i = (j * domain.thread_size); i < ((j + 1) * domain.thread_size); i += 2) {
237 uint32_t next_index_1 = (uint32_t)reverse_bits((uint32_t)i + 2, (uint32_t)domain.log2_size);
238 uint32_t next_index_2 = (uint32_t)reverse_bits((uint32_t)i + 3, (uint32_t)domain.log2_size);
239 __builtin_prefetch(&coeffs[next_index_1]);
240 __builtin_prefetch(&coeffs[next_index_2]);
242 uint32_t swap_index_1 = (uint32_t)reverse_bits((uint32_t)i, (uint32_t)domain.log2_size);
243 uint32_t swap_index_2 = (uint32_t)reverse_bits((uint32_t)i + 1, (uint32_t)domain.log2_size);
245 Fr::__copy(coeffs[swap_index_1], temp_1);
246 Fr::__copy(coeffs[swap_index_2], temp_2);
247 target[i + 1] = temp_1 - temp_2;
248 target[i] = temp_1 + temp_2;
254 if (domain.size <= 2) {
255 coeffs[0] = target[0];
256 coeffs[1] = target[1];
260 for (
size_t m = 2; m < (domain.size); m <<= 1) {
261 parallel_for(domain.num_threads, [&](
size_t j) {
271 const size_t start = j * (domain.thread_size >> 1);
272 const size_t end = (j + 1) * (domain.thread_size >> 1);
292 const size_t block_mask = m - 1;
302 const size_t index_mask = ~block_mask;
307 const Fr* round_roots = root_table[static_cast<size_t>(numeric::get_msb(m)) - 1];
312 for (size_t i = start; i < end; ++i) {
313 size_t k1 = (i & index_mask) << 1;
314 size_t j1 = i & block_mask;
315 temp = round_roots[j1] * target[k1 + j1 + m];
316 target[k1 + j1 + m] = target[k1 + j1] - temp;
317 target[k1 + j1] += temp;
420 const size_t domain_extension)
422 const size_t log2_domain_extension =
static_cast<size_t>(numeric::get_msb(domain_extension));
427 auto scratch_space_ptr = get_scratch_space<Fr>(domain.
size * domain_extension);
428 auto scratch_space = scratch_space_ptr.get();
433 std::vector<Fr> coset_generators(domain_extension);
435 for (
size_t i = 1; i < domain_extension; ++i) {
436 coset_generators[i] = coset_generators[i - 1] * primitive_root;
438 for (
size_t i = domain_extension - 1; i < domain_extension; --i) {
442 for (
size_t i = 0; i < domain_extension; ++i) {
444 scratch_space + (i * domain.
size),
450 if (domain_extension == 4) {
452 const size_t start = j * domain.thread_size;
453 const size_t end = (j + 1) * domain.thread_size;
454 for (size_t i = start; i < end; ++i) {
455 Fr::__copy(scratch_space[i], coeffs[(i << 2UL)]);
456 Fr::__copy(scratch_space[i + (1UL << domain.log2_size)], coeffs[(i << 2UL) + 1UL]);
457 Fr::__copy(scratch_space[i + (2UL << domain.log2_size)], coeffs[(i << 2UL) + 2UL]);
458 Fr::__copy(scratch_space[i + (3UL << domain.log2_size)], coeffs[(i << 2UL) + 3UL]);
462 for (
size_t i = 0; i < domain.
size; ++i) {
463 for (
size_t j = 0; j < domain_extension; ++j) {
464 Fr::__copy(scratch_space[i + (j << domain.
log2_size)], coeffs[(i << log2_domain_extension) + j]);
468 for (
size_t i = 0; i < domain.size; ++i) {
469 for (
size_t j = 0; j < domain_extension; ++j) {
470 Fr::__copy(scratch_space[i + (j << domain.log2_size)], coeffs[(i << log2_domain_extension) + j]);
693 Fr numerator_polynomial[n + 1];
694 polynomial_arithmetic::compute_linear_polynomial_product(evaluation_points, numerator_polynomial, n);
696 Fr roots_and_denominators[2 * n];
698 for (
size_t i = 0; i < n; ++i) {
699 roots_and_denominators[i] = -evaluation_points[i];
700 temp_src[i] = src[i];
703 roots_and_denominators[n + i] = 1;
704 for (
size_t j = 0; j < n; ++j) {
708 roots_and_denominators[n + i] *= (evaluation_points[i] - evaluation_points[j]);
718 bool interpolation_domain_contains_zero =
false;
721 if (numerator_polynomial[0] ==
Fr(0)) {
722 for (
size_t i = 0; i < n; ++i) {
723 if (evaluation_points[i] ==
Fr(0)) {
725 interpolation_domain_contains_zero =
true;
731 if (!interpolation_domain_contains_zero) {
732 for (
size_t i = 0; i < n; ++i) {
734 z = roots_and_denominators[i];
736 multiplier = temp_src[i] * roots_and_denominators[n + i];
737 temp_dest[0] = multiplier * numerator_polynomial[0];
739 dest[0] += temp_dest[0];
740 for (
size_t j = 1; j < n; ++j) {
741 temp_dest[j] = multiplier * numerator_polynomial[j] - temp_dest[j - 1];
743 dest[j] += temp_dest[j];
747 for (
size_t i = 0; i < n; ++i) {
753 z = roots_and_denominators[i];
755 multiplier = temp_src[i] * roots_and_denominators[n + i];
757 temp_dest[1] = multiplier * numerator_polynomial[1];
761 dest[1] += temp_dest[1];
763 for (
size_t j = 2; j < n; ++j) {
764 temp_dest[j] = multiplier * numerator_polynomial[j] - temp_dest[j - 1];
766 dest[j] += temp_dest[j];
770 for (
size_t i = 0; i < n; ++i) {
771 dest[i] += temp_src[idx_zero] * roots_and_denominators[n + idx_zero] * numerator_polynomial[i + 1];