Barretenberg: src/barretenberg/stdlib/hash/keccak/keccak.cpp Source File

// === AUDIT STATUS ===

// internal:    { status: not started, auditors: [], date: YYYY-MM-DD }

// external_1:  { status: not started, auditors: [], date: YYYY-MM-DD }

// external_2:  { status: not started, auditors: [], date: YYYY-MM-DD }

// =====================


#include "keccak.hpp"

#include "barretenberg/common/assert.hpp"

#include "barretenberg/common/constexpr_utils.hpp"

#include "barretenberg/numeric/bitop/sparse_form.hpp"

#include "barretenberg/stdlib/primitives/logic/logic.hpp"

#include "barretenberg/stdlib/primitives/plookup/plookup.hpp"

#include "barretenberg/stdlib_circuit_builders/plookup_tables/keccak/keccak_rho.hpp"

#include "barretenberg/stdlib_circuit_builders/plookup_tables/keccak/keccak_theta.hpp"

namespace bb::stdlib {


using namespace bb::plookup;


template <typename Builder>

template <size_t lane_index>


field_t<Builder> keccak<Builder>::normalize_and_rotate(const field_ct& limb, field_ct& msb)

{

    // left_bits = the number of bits that wrap around 11^64 (left_bits)

    constexpr size_t left_bits = ROTATIONS[lane_index];


    // right_bits = the number of bits that don't wrap

    constexpr size_t right_bits = 64 - ROTATIONS[lane_index];


    // TODO read from same source as plookup table code

    constexpr size_t max_bits_per_table = plookup::keccak_tables::Rho<>::MAXIMUM_MULTITABLE_BITS;


    // compute the number of lookups required for our left and right bit slices

    constexpr size_t num_left_tables = left_bits / max_bits_per_table + (left_bits % max_bits_per_table > 0 ? 1 : 0);

    constexpr size_t num_right_tables = right_bits / max_bits_per_table + (right_bits % max_bits_per_table > 0 ? 1 : 0);


    // get the numerical value of the left and right bit slices

    // (lookup table input values derived from left / right)

    uint256_t input = limb.get_value();

    constexpr uint256_t slice_divisor = BASE.pow(right_bits);

    const auto [left, right] = input.divmod(slice_divisor);


    // compute the normalized values for the left and right bit slices

    // (lookup table output values derived from left_normalised / right_normalized)

    uint256_t left_normalized = normalize_sparse(left);

    uint256_t right_normalized = normalize_sparse(right);


    plookup::ReadData<bb::fr> lookup;


    // compute plookup witness values for a given slice

    // (same lambda can be used to compute witnesses for left and right slices)

    auto compute_lookup_witnesses_for_limb = [&]<size_t limb_bits, size_t num_lookups>(uint256_t& normalized) {

        // (use a constexpr loop to make some pow and div operations compile-time)

        bb::constexpr_for<0, num_lookups, 1>([&]<size_t i> {

            constexpr size_t num_bits_processed = i * max_bits_per_table;


            // How many bits can this slice contain?

            // We want to implicitly range-constrain `normalized < 11^{limb_bits}`,

            // which means potentially using a lookup table that is not of size 11^{max_bits_per_table}

            // for the most-significant slice

            constexpr size_t bit_slice = (num_bits_processed + max_bits_per_table > limb_bits)

                                             ? limb_bits % max_bits_per_table

                                             : max_bits_per_table;


            // current column values are tracked via 'input' and 'normalized'

            lookup[ColumnIdx::C1].push_back(input);

            lookup[ColumnIdx::C2].push_back(normalized);


            constexpr uint64_t divisor = numeric::pow64(static_cast<uint64_t>(BASE), bit_slice);

            constexpr uint64_t msb_divisor = divisor / static_cast<uint64_t>(BASE);


            // compute the value of the most significant bit of this slice and store in C3

            const auto [normalized_quotient, normalized_slice] = normalized.divmod(divisor);


            // 256-bit divisions are expensive! cast to u64s when we don't need the extra bits

            const uint64_t normalized_msb = (static_cast<uint64_t>(normalized_slice) / msb_divisor);

            lookup[ColumnIdx::C3].push_back(normalized_msb);


            // We need to provide a key/value object for this lookup in order for the Builder

            // to compute the plookup sorted list commitment

            const auto [input_quotient, input_slice] = input.divmod(divisor);

            lookup.lookup_entries.push_back(

                { { static_cast<uint64_t>(input_slice), 0 }, { normalized_slice, normalized_msb } });


            // reduce the input and output by 11^{bit_slice}

            input = input_quotient;

            normalized = normalized_quotient;

        });

    };


    // template lambda syntax is a little funky.

    // Need to explicitly write `.template operator()` (instead of just `()`).

    // Otherwise compiler cannot distinguish between `>` symbol referring to closing the template parameter list,

    // OR `>` being a greater-than operator :/

    compute_lookup_witnesses_for_limb.template operator()<right_bits, num_right_tables>(right_normalized);

    compute_lookup_witnesses_for_limb.template operator()<left_bits, num_left_tables>(left_normalized);


    // Call builder method to create plookup constraints.

    // The MultiTable table index can be derived from `lane_idx`

    // Each lane_idx has a different rotation amount, which changes sizes of left/right slices

    // and therefore the selector constants required (i.e. the Q1, Q2, Q3 values in the earlier example)

    const auto accumulator_witnesses = limb.context->create_gates_from_plookup_accumulators(

        (plookup::MultiTableId)((size_t)KECCAK_NORMALIZE_AND_ROTATE + lane_index), lookup, limb.get_witness_index());


    // extract the most significant bit of the normalized output from the final lookup entry in column C3

    msb = field_ct::from_witness_index(limb.get_context(),

                                       accumulator_witnesses[ColumnIdx::C3][num_left_tables + num_right_tables - 1]);


    // Extract the witness that maps to the normalized right slice

    const field_t<Builder> right_output =

        field_t<Builder>::from_witness_index(limb.get_context(), accumulator_witnesses[ColumnIdx::C2][0]);


    if (num_left_tables == 0) {

        // if the left slice size is 0 bits (i.e. no rotation), return `right_output`

        return right_output;

    } else {

        // Extract the normalized left slice

        const field_t<Builder> left_output = field_t<Builder>::from_witness_index(

            limb.get_context(), accumulator_witnesses[ColumnIdx::C2][num_right_tables]);


        // Stitch the right/left slices together to create our rotated output

        constexpr uint256_t shift = BASE.pow(ROTATIONS[lane_index]);

        return (left_output + right_output * shift);

    }

}


template <typename Builder> void keccak<Builder>::compute_twisted_state(keccak_state& internal)

{

    for (size_t i = 0; i < NUM_KECCAK_LANES; ++i) {

        internal.twisted_state[i] = ((internal.state[i] * 11) + internal.state_msb[i]).normalize();

    }

}


template <typename Builder> void keccak<Builder>::theta(keccak_state& internal)

{

    std::array<field_ct, 5> C;

    std::array<field_ct, 5> D;


    auto& state = internal.state;

    const auto& twisted_state = internal.twisted_state;

    for (size_t i = 0; i < 5; ++i) {


        C[i] = field_ct::accumulate({ twisted_state[i],

                                      twisted_state[5 + i],

                                      twisted_state[10 + i],

                                      twisted_state[15 + i],

                                      twisted_state[20 + i] });

    }


    for (size_t i = 0; i < 5; ++i) {

        const auto non_shifted_equivalent = (C[(i + 4) % 5]);

        const auto shifted_equivalent = C[(i + 1) % 5] * BASE;

        D[i] = (non_shifted_equivalent + shifted_equivalent);

    }


    static constexpr uint256_t divisor = BASE.pow(64);

    static constexpr uint256_t multiplicand = BASE.pow(65);

    for (size_t i = 0; i < 5; ++i) {

        uint256_t D_native = D[i].get_value();

        const auto [D_quotient, lo_native] = D_native.divmod(BASE);

        const uint256_t hi_native = D_quotient / divisor;

        const uint256_t mid_native = D_quotient - hi_native * divisor;


        field_ct hi(witness_ct(internal.context, hi_native));

        field_ct mid(witness_ct(internal.context, mid_native));

        field_ct lo(witness_ct(internal.context, lo_native));


        // assert equal should cost 1 gate (multipliers are all constants)

        D[i].assert_equal((hi * multiplicand).add_two(mid * 11, lo));

        internal.context->create_new_range_constraint(hi.get_witness_index(), static_cast<uint64_t>(BASE));

        internal.context->create_new_range_constraint(lo.get_witness_index(), static_cast<uint64_t>(BASE));


        // If number of bits in KECCAK_THETA_OUTPUT table does NOT cleanly divide 64,

        // we need an additional range constraint to ensure that mid < 11^64

        if constexpr (64 % plookup::keccak_tables::Theta::TABLE_BITS == 0) {

            // N.B. we could optimize out 5 gates per round here but it's very fiddly...

            // In previous section, D[i] = X + Y (non shifted equiv and shifted equiv)

            // We also want to validate D[i] == hi' + mid' + lo (where hi', mid' are hi, mid scaled by constants)

            // We *could* create a big addition gate to validate the previous logic w. following structure:

            // | w1 | w2  | w3 | w4 |

            // | -- | --- | -- | -- |

            // | hi | mid | lo | X  |

            // | P0 | P1  | P2 | Y  |

            // To save a gate, we would need to place the wires for the first KECCAK_THETA_OUTPUT plookup gate

            // at P0, P1, P2. This is fiddly builder logic that is circuit-width-dependent

            // (this would save 120 gates per hash block... not worth making the code less readable for that)

            D[i] = plookup_read<Builder>::read_from_1_to_2_table(KECCAK_THETA_OUTPUT, mid);

        } else {

            const auto accumulators = plookup_read<Builder>::get_lookup_accumulators(KECCAK_THETA_OUTPUT, D[i]);

            D[i] = accumulators[ColumnIdx::C2][0];


            // Ensure input to lookup is < 11^64,

            // by validating most significant input slice is < 11^{64 mod slice_bits}

            const field_ct most_significant_slice = accumulators[ColumnIdx::C1][accumulators[ColumnIdx::C1].size() - 1];


            // N.B. cheaper to validate (11^{64 mod slice_bits} - slice < 2^14) as this

            // prevents an extra range table from being created

            constexpr uint256_t maximum = BASE.pow(64 % plookup::keccak_tables::Theta::TABLE_BITS);

            const field_ct target = -most_significant_slice + maximum;

            BB_ASSERT_GT((uint256_t(1) << Builder::DEFAULT_PLOOKUP_RANGE_BITNUM) - 1, maximum);

            target.create_range_constraint(Builder::DEFAULT_PLOOKUP_RANGE_BITNUM,

                                           "input to KECCAK_THETA_OUTPUT too large!");

        }

    }


    // compute state[j * 5 + i] XOR D[i] in base-11 representation

    for (size_t i = 0; i < 5; ++i) {

        for (size_t j = 0; j < 5; ++j) {

            state[j * 5 + i] = state[j * 5 + i] + D[i];

        }

    }

}


template <typename Builder> void keccak<Builder>::rho(keccak_state& internal)

{

    constexpr_for<0, NUM_KECCAK_LANES, 1>(

        [&]<size_t i>() { internal.state[i] = normalize_and_rotate<i>(internal.state[i], internal.state_msb[i]); });

}


template <typename Builder> void keccak<Builder>::pi(keccak_state& internal)

{

    std::array<field_ct, NUM_KECCAK_LANES> B;


    for (size_t j = 0; j < 5; ++j) {

        for (size_t i = 0; i < 5; ++i) {

            B[j * 5 + i] = internal.state[j * 5 + i];

        }

    }


    for (size_t y = 0; y < 5; ++y) {

        for (size_t x = 0; x < 5; ++x) {

            size_t u = (0 * x + 1 * y) % 5;

            size_t v = (2 * x + 3 * y) % 5;


            internal.state[v * 5 + u] = B[5 * y + x];

        }

    }

}


template <typename Builder> void keccak<Builder>::chi(keccak_state& internal)

{

    // (cost = 12 * 25 = 300?)

    auto& state = internal.state;


    for (size_t y = 0; y < 5; ++y) {

        std::array<field_ct, 5> lane_outputs;

        for (size_t x = 0; x < 5; ++x) {

            const auto A = state[y * 5 + x];

            const auto B = state[y * 5 + ((x + 1) % 5)];

            const auto C = state[y * 5 + ((x + 2) % 5)];


            // vv should cost 1 gate

            lane_outputs[x] = (A + A + CHI_OFFSET).add_two(-B, C);

        }

        for (size_t x = 0; x < 5; ++x) {

            // Normalize lane outputs and assign to internal.state

            auto accumulators = plookup_read<Builder>::get_lookup_accumulators(KECCAK_CHI_OUTPUT, lane_outputs[x]);

            internal.state[y * 5 + x] = accumulators[ColumnIdx::C2][0];

            internal.state_msb[y * 5 + x] = accumulators[ColumnIdx::C3][accumulators[ColumnIdx::C3].size() - 1];

        }

    }

}


template <typename Builder> void keccak<Builder>::iota(keccak_state& internal, size_t round)

{

    const field_ct xor_result = internal.state[0] + SPARSE_RC[round];


    // normalize lane value so that we don't overflow our base11 modulus boundary in the next round

    internal.state[0] = normalize_and_rotate<0>(xor_result, internal.state_msb[0]);


    // No need to add constraints to compute twisted repr if this is the last round

    if (round != NUM_KECCAK_ROUNDS - 1) {

        compute_twisted_state(internal);

    }

}


template <typename Builder> void keccak<Builder>::keccakf1600(keccak_state& internal)

{

    for (size_t i = 0; i < NUM_KECCAK_ROUNDS; ++i) {

        theta(internal);

        rho(internal);

        pi(internal);

        chi(internal);

        iota(internal, i);

    }

}


// Returns the keccak f1600 permutation of the input state

// We first convert the state into 'extended' representation, along with the 'twisted' state

// and then we call keccakf1600() with this keccak 'internal state'

// Finally, we convert back the state from the extented representation

template <typename Builder>


std::array<field_t<Builder>, keccak<Builder>::NUM_KECCAK_LANES> keccak<Builder>::permutation_opcode(

    std::array<field_t<Builder>, NUM_KECCAK_LANES> state, Builder* ctx)

{

    std::vector<field_t<Builder>> converted_buffer(NUM_KECCAK_LANES);

    std::vector<field_t<Builder>> msb_buffer(NUM_KECCAK_LANES);

    // populate keccak_state, convert our 64-bit lanes into an extended base-11 representation

    keccak_state internal;

    internal.context = ctx;

    for (size_t i = 0; i < state.size(); ++i) {

        const auto accumulators = plookup_read<Builder>::get_lookup_accumulators(KECCAK_FORMAT_INPUT, state[i]);

        internal.state[i] = accumulators[ColumnIdx::C2][0];

        internal.state_msb[i] = accumulators[ColumnIdx::C3][accumulators[ColumnIdx::C3].size() - 1];

    }

    compute_twisted_state(internal);

    keccakf1600(internal);

    // we convert back to the normal lanes

    return extended_2_normal(internal);

}


// Convert the 'extended' representation of the internal Keccak state into the usual array of 64 bits lanes

template <typename Builder>


std::array<field_t<Builder>, keccak<Builder>::NUM_KECCAK_LANES> keccak<Builder>::extended_2_normal(

    keccak_state& internal)

{

    std::array<field_t<Builder>, NUM_KECCAK_LANES> conversion;


    // Each hash limb represents a little-endian integer. Need to reverse bytes before we write into the output array

    for (size_t i = 0; i < internal.state.size(); ++i) {

        field_ct output_limb = plookup_read<Builder>::read_from_1_to_2_table(KECCAK_FORMAT_OUTPUT, internal.state[i]);

        conversion[i] = output_limb;

    }


    return conversion;

}


template class keccak<bb::UltraCircuitBuilder>;

template class keccak<bb::MegaCircuitBuilder>;


} // namespace bb::stdlib

assert.hpp

BB_ASSERT_GT
#define BB_ASSERT_GT(left, right,...)
Definition assert.hpp:107

bb::ECCVMCircuitBuilder
Definition eccvm_circuit_builder.hpp:24

bb::numeric::uint256_t
Definition uint256.hpp:32

bb::numeric::uint256_t::pow
constexpr uint256_t pow(const uint256_t &exponent) const
Definition uint256_impl.hpp:299

bb::numeric::uint256_t::divmod
constexpr std::pair< uint256_t, uint256_t > divmod(const uint256_t &b) const
Definition uint256_impl.hpp:128

bb::plookup::ReadData
Container type for lookup table reads.
Definition types.hpp:341

bb::plookup::ReadData::lookup_entries
std::vector< BasicTable::LookupEntry > lookup_entries
Definition types.hpp:347

bb::plookup::keccak_tables::Rho
Generate the plookup tables used for the RHO round of the Keccak hash algorithm.
Definition keccak_rho.hpp:60

bb::plookup::keccak_tables::Theta::TABLE_BITS
static constexpr size_t TABLE_BITS
Definition keccak_theta.hpp:60

bb::stdlib::field_t
Definition field.hpp:45

bb::stdlib::field_t< Builder >::from_witness_index
static field_t from_witness_index(Builder *ctx, uint32_t witness_index)
Definition field.cpp:62

bb::stdlib::field_t< Builder >::accumulate
static field_t accumulate(const std::vector< field_t > &input)
Efficiently compute the sum of vector entries. Using big_add_gate we reduce the number of gates neede...
Definition field.cpp:1167

bb::stdlib::field_t::create_range_constraint
void create_range_constraint(size_t num_bits, std::string const &msg="field_t::range_constraint") const
Let x = *this.normalize(), constrain x.v < 2^{num_bits}.
Definition field.cpp:909

bb::stdlib::field_t::context
Builder * context
Definition field.hpp:56

bb::stdlib::field_t::get_context
Builder * get_context() const
Definition field.hpp:419

bb::stdlib::field_t::get_value
bb::fr get_value() const
Given a := *this, compute its value given by a.v * a.mul + a.add.
Definition field.cpp:828

bb::stdlib::field_t::get_witness_index
uint32_t get_witness_index() const
Get the witness index of the current field element.
Definition field.hpp:506

bb::stdlib::keccak
KECCAAAAAAAAAAK.
Definition keccak.hpp:25

bb::stdlib::keccak::rho
static void rho(keccak_state &state)
RHO round.
Definition keccak.cpp:385

bb::stdlib::keccak::pi
static void pi(keccak_state &state)
PI.
Definition keccak.cpp:400

bb::stdlib::keccak::theta
static void theta(keccak_state &state)
THETA round.
Definition keccak.cpp:251

bb::stdlib::keccak::compute_twisted_state
static void compute_twisted_state(keccak_state &internal)
Compute twisted representation of hash lane.
Definition keccak.cpp:197

bb::stdlib::keccak::chi
static void chi(keccak_state &state)
CHI.
Definition keccak.cpp:436

bb::stdlib::keccak::normalize_and_rotate
static field_t< Builder > normalize_and_rotate(const field_ct &limb, field_ct &msb)
Normalize a base-11 limb and left-rotate by keccak::ROTATIONS[lane_index] bits. This method also extr...
Definition keccak.cpp:37

bb::stdlib::keccak::permutation_opcode
static std::array< field_ct, NUM_KECCAK_LANES > permutation_opcode(std::array< field_ct, NUM_KECCAK_LANES > state, Builder *context)
Definition keccak.cpp:498

bb::stdlib::keccak::extended_2_normal
static std::array< field_ct, NUM_KECCAK_LANES > extended_2_normal(keccak_state &internal)
Definition keccak.cpp:519

bb::stdlib::keccak::keccakf1600
static void keccakf1600(keccak_state &state)
Definition keccak.cpp:482

bb::stdlib::keccak::iota
static void iota(keccak_state &state, size_t round)
IOTA.
Definition keccak.cpp:469

bb::stdlib::plookup_read
Definition plookup.hpp:17

constexpr_utils.hpp

C
bb::avm2::Column C
Definition execution_trace.cpp:35

witness_ct
bn254::witness_ct witness_ct
Definition graph_description_bigfield.test.cpp:31

keccak_rho.hpp

keccak_theta.hpp

logic.hpp

bb::avm2::Column
Column
Definition columns.hpp:31

bb::numeric::pow64
constexpr uint64_t pow64(const uint64_t input, const uint64_t exponent)
Definition pow.hpp:13

bb::plookup
Definition aes128.hpp:17

bb::plookup::MultiTableId
MultiTableId
Definition types.hpp:82

bb::plookup::KECCAK_FORMAT_INPUT
@ KECCAK_FORMAT_INPUT
Definition types.hpp:119

bb::plookup::KECCAK_FORMAT_OUTPUT
@ KECCAK_FORMAT_OUTPUT
Definition types.hpp:120

bb::plookup::KECCAK_NORMALIZE_AND_ROTATE
@ KECCAK_NORMALIZE_AND_ROTATE
Definition types.hpp:121

bb::plookup::KECCAK_CHI_OUTPUT
@ KECCAK_CHI_OUTPUT
Definition types.hpp:118

bb::plookup::KECCAK_THETA_OUTPUT
@ KECCAK_THETA_OUTPUT
Definition types.hpp:117

bb::stdlib
Definition graph_description_goblin.test.cpp:13

std::get
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Definition tuple.hpp:13

plookup.hpp

sparse_form.hpp

keccak.hpp

bb::stdlib::keccak::keccak_state
Definition keccak.hpp:146

bb::stdlib::keccak::keccak_state::state
std::array< field_ct, NUM_KECCAK_LANES > state
Definition keccak.hpp:147

bb::stdlib::keccak::keccak_state::context
Builder * context
Definition keccak.hpp:150

bb::stdlib::keccak::keccak_state::twisted_state
std::array< field_ct, NUM_KECCAK_LANES > twisted_state
Definition keccak.hpp:149

bb::stdlib::keccak::keccak_state::state_msb
std::array< field_ct, NUM_KECCAK_LANES > state_msb
Definition keccak.hpp:148