mirror of
https://github.com/monero-project/monero.git
synced 2024-12-13 11:56:31 +02:00
Merge pull request #4693
74fb3d88
multiexp: some minor speedups (moneromooo-monero)a6d2e246
bulletproofs: only enable profiling on request (moneromooo-monero)a110e6aa
multiexp: tune which variants to use for which number of points (moneromooo-monero)8b476722
bulletproofs: speedup prover (moneromooo-monero)6f9ae5b6
multiexp: handle pippenger multiexps with part precalc (moneromooo-monero)10e5a927
bulletproofs: maintain -z4, -z5, and -y0 to avoid subtractions (moneromooo-monero)8629a42c
bulletproofs: rework flow to use sarang's fast batch inversion code (moneromooo-monero)fc9f7d9c
bulletproofs: merge multiexps as per sarang's new python code (moneromooo-monero)4061960a
multiexp: pack the digits table when STRAUS_C is 4 (moneromooo-monero)bf8e4b98
bulletproofs: some more minor speedup (moneromooo-monero)c415df97
performance_tests: sc_check and ge_dsm_precomp (moneromooo-monero)a281b950
bulletproofs: remove single value prover (moneromooo-monero)484155d0
bulletproofs: some more speedup (moneromooo-monero)a621d6c8
bulletproofs: random minor speedups (moneromooo-monero)a49a1761
bulletproofs: shave off a lot of scalar muls from the g/h construction (moneromooo-monero)4564a5d1
bulletproofs: speedup PROVE (moneromooo-monero)
This commit is contained in:
commit
6d3d8635be
File diff suppressed because it is too large
Load Diff
@ -79,6 +79,25 @@ extern "C"
|
||||
// Best/cached Straus Straus Straus Straus Straus Straus Straus Straus Pip Pip Pip Pip
|
||||
// Best/uncached Straus Straus Straus Straus Straus Straus Pip Pip Pip Pip Pip Pip
|
||||
|
||||
// New timings:
|
||||
// Pippenger:
|
||||
// 2/1 always
|
||||
// 3/2 at ~13
|
||||
// 4/3 at ~29
|
||||
// 5/4 at ~83
|
||||
// 6/5 < 200
|
||||
// 7/6 at ~470
|
||||
// 8/7 at ~1180
|
||||
// 9/8 at ~2290
|
||||
// Cached Pippenger:
|
||||
// 6/5 < 200
|
||||
// 7/6 at 460
|
||||
// 8/7 at 1180
|
||||
// 9/8 at 2300
|
||||
//
|
||||
// Cached Straus/Pippenger cross at 232
|
||||
//
|
||||
|
||||
namespace rct
|
||||
{
|
||||
|
||||
@ -320,7 +339,7 @@ rct::key bos_coster_heap_conv_robust(std::vector<MultiexpData> data)
|
||||
return res;
|
||||
}
|
||||
|
||||
static constexpr unsigned int STRAUS_C = 4;
|
||||
#define STRAUS_C 4
|
||||
|
||||
struct straus_cached_data
|
||||
{
|
||||
@ -447,28 +466,26 @@ rct::key straus(const std::vector<MultiexpData> &data, const std::shared_ptr<str
|
||||
#endif
|
||||
|
||||
MULTIEXP_PERF(PERF_TIMER_START_UNIT(digits, 1000000));
|
||||
#if STRAUS_C==4
|
||||
std::unique_ptr<uint8_t[]> digits{new uint8_t[64 * data.size()]};
|
||||
#else
|
||||
std::unique_ptr<uint8_t[]> digits{new uint8_t[256 * data.size()]};
|
||||
#endif
|
||||
for (size_t j = 0; j < data.size(); ++j)
|
||||
{
|
||||
const unsigned char *bytes = data[j].scalar.bytes;
|
||||
#if STRAUS_C==4
|
||||
unsigned int i;
|
||||
for (i = 0; i < 64; i += 2, bytes++)
|
||||
{
|
||||
digits[j*64+i] = bytes[0] & 0xf;
|
||||
digits[j*64+i+1] = bytes[0] >> 4;
|
||||
}
|
||||
#elif 1
|
||||
unsigned char bytes33[33];
|
||||
memcpy(bytes33, data[j].scalar.bytes, 32);
|
||||
bytes33[32] = 0;
|
||||
const unsigned char *bytes = bytes33;
|
||||
#if 1
|
||||
static_assert(STRAUS_C == 4, "optimized version needs STRAUS_C == 4");
|
||||
unsigned int i;
|
||||
for (i = 0; i < 256; i += 8, bytes++)
|
||||
{
|
||||
digits[j*256+i] = bytes[0] & 0xf;
|
||||
digits[j*256+i+1] = (bytes[0] >> 1) & 0xf;
|
||||
digits[j*256+i+2] = (bytes[0] >> 2) & 0xf;
|
||||
digits[j*256+i+3] = (bytes[0] >> 3) & 0xf;
|
||||
digits[j*256+i+4] = ((bytes[0] >> 4) | (bytes[1]<<4)) & 0xf;
|
||||
digits[j*256+i+5] = ((bytes[0] >> 5) | (bytes[1]<<3)) & 0xf;
|
||||
digits[j*256+i+6] = ((bytes[0] >> 6) | (bytes[1]<<2)) & 0xf;
|
||||
digits[j*256+i+7] = ((bytes[0] >> 7) | (bytes[1]<<1)) & 0xf;
|
||||
}
|
||||
#elif 1
|
||||
bytes = bytes33;
|
||||
for (size_t i = 0; i < 256; ++i)
|
||||
digits[j*256+i] = ((bytes[i>>3] | (bytes[(i>>3)+1]<<8)) >> (i&7)) & mask;
|
||||
#else
|
||||
@ -521,7 +538,11 @@ skipfirst:
|
||||
if (skip[j])
|
||||
continue;
|
||||
#endif
|
||||
#if STRAUS_C==4
|
||||
const uint8_t digit = digits[j*64+i/4];
|
||||
#else
|
||||
const uint8_t digit = digits[j*256+i];
|
||||
#endif
|
||||
if (digit)
|
||||
{
|
||||
ge_add(&p1, &band_p3, &CACHE_OFFSET(local_cache, j, digit));
|
||||
@ -542,16 +563,13 @@ skipfirst:
|
||||
|
||||
size_t get_pippenger_c(size_t N)
|
||||
{
|
||||
// uncached: 2:1, 4:2, 8:2, 16:3, 32:4, 64:4, 128:5, 256:6, 512:7, 1024:7, 2048:8, 4096:9
|
||||
// cached: 2:1, 4:2, 8:2, 16:3, 32:4, 64:4, 128:5, 256:6, 512:7, 1024:7, 2048:8, 4096:9
|
||||
if (N <= 2) return 1;
|
||||
if (N <= 8) return 2;
|
||||
if (N <= 16) return 3;
|
||||
if (N <= 64) return 4;
|
||||
if (N <= 128) return 5;
|
||||
if (N <= 256) return 6;
|
||||
if (N <= 1024) return 7;
|
||||
if (N <= 2048) return 8;
|
||||
if (N <= 13) return 2;
|
||||
if (N <= 29) return 3;
|
||||
if (N <= 83) return 4;
|
||||
if (N <= 185) return 5;
|
||||
if (N <= 465) return 6;
|
||||
if (N <= 1180) return 7;
|
||||
if (N <= 2295) return 8;
|
||||
return 9;
|
||||
}
|
||||
|
||||
@ -563,12 +581,13 @@ struct pippenger_cached_data
|
||||
~pippenger_cached_data() { aligned_free(cached); }
|
||||
};
|
||||
|
||||
std::shared_ptr<pippenger_cached_data> pippenger_init_cache(const std::vector<MultiexpData> &data, size_t N)
|
||||
std::shared_ptr<pippenger_cached_data> pippenger_init_cache(const std::vector<MultiexpData> &data, size_t start_offset, size_t N)
|
||||
{
|
||||
MULTIEXP_PERF(PERF_TIMER_START_UNIT(pippenger_init_cache, 1000000));
|
||||
CHECK_AND_ASSERT_THROW_MES(start_offset <= data.size(), "Bad cache base data");
|
||||
if (N == 0)
|
||||
N = data.size();
|
||||
CHECK_AND_ASSERT_THROW_MES(N <= data.size(), "Bad cache base data");
|
||||
N = data.size() - start_offset;
|
||||
CHECK_AND_ASSERT_THROW_MES(N <= data.size() - start_offset, "Bad cache base data");
|
||||
ge_cached cached;
|
||||
std::shared_ptr<pippenger_cached_data> cache(new pippenger_cached_data());
|
||||
|
||||
@ -576,7 +595,7 @@ std::shared_ptr<pippenger_cached_data> pippenger_init_cache(const std::vector<Mu
|
||||
cache->cached = (ge_cached*)aligned_realloc(cache->cached, N * sizeof(ge_cached), 4096);
|
||||
CHECK_AND_ASSERT_THROW_MES(cache->cached, "Out of memory");
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
ge_p3_to_cached(&cache->cached[i], &data[i].point);
|
||||
ge_p3_to_cached(&cache->cached[i], &data[i+start_offset].point);
|
||||
|
||||
MULTIEXP_PERF(PERF_TIMER_STOP(pippenger_init_cache));
|
||||
return cache;
|
||||
@ -587,16 +606,21 @@ size_t pippenger_get_cache_size(const std::shared_ptr<pippenger_cached_data> &ca
|
||||
return cache->size * sizeof(*cache->cached);
|
||||
}
|
||||
|
||||
rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr<pippenger_cached_data> &cache, size_t c)
|
||||
rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr<pippenger_cached_data> &cache, size_t cache_size, size_t c)
|
||||
{
|
||||
CHECK_AND_ASSERT_THROW_MES(cache == NULL || cache->size >= data.size(), "Cache is too small");
|
||||
if (cache != NULL && cache_size == 0)
|
||||
cache_size = cache->size;
|
||||
CHECK_AND_ASSERT_THROW_MES(cache == NULL || cache_size <= cache->size, "Cache is too small");
|
||||
if (c == 0)
|
||||
c = get_pippenger_c(data.size());
|
||||
CHECK_AND_ASSERT_THROW_MES(c <= 9, "c is too large");
|
||||
|
||||
ge_p3 result = ge_p3_identity;
|
||||
bool result_init = false;
|
||||
std::unique_ptr<ge_p3[]> buckets{new ge_p3[1<<c]};
|
||||
bool buckets_init[1<<9];
|
||||
std::shared_ptr<pippenger_cached_data> local_cache = cache == NULL ? pippenger_init_cache(data) : cache;
|
||||
std::shared_ptr<pippenger_cached_data> local_cache_2 = data.size() > cache_size ? pippenger_init_cache(data, cache_size) : NULL;
|
||||
|
||||
rct::key maxscalar = rct::zero();
|
||||
for (size_t i = 0; i < data.size(); ++i)
|
||||
@ -611,7 +635,7 @@ rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr<
|
||||
|
||||
for (size_t k = groups; k-- > 0; )
|
||||
{
|
||||
if (!ge_p3_is_point_at_infinity(&result))
|
||||
if (result_init)
|
||||
{
|
||||
ge_p2 p2;
|
||||
ge_p3_to_p2(&p2, &result);
|
||||
@ -625,8 +649,7 @@ rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr<
|
||||
ge_p1p1_to_p2(&p2, &p1);
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < (1u<<c); ++i)
|
||||
buckets[i] = ge_p3_identity;
|
||||
memset(buckets_init, 0, 1u<<c);
|
||||
|
||||
// partition scalars into buckets
|
||||
for (size_t i = 0; i < data.size(); ++i)
|
||||
@ -638,22 +661,45 @@ rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr<
|
||||
if (bucket == 0)
|
||||
continue;
|
||||
CHECK_AND_ASSERT_THROW_MES(bucket < (1u<<c), "bucket overflow");
|
||||
if (!ge_p3_is_point_at_infinity(&buckets[bucket]))
|
||||
if (buckets_init[bucket])
|
||||
{
|
||||
add(buckets[bucket], local_cache->cached[i]);
|
||||
if (i < cache_size)
|
||||
add(buckets[bucket], local_cache->cached[i]);
|
||||
else
|
||||
add(buckets[bucket], local_cache_2->cached[i - cache_size]);
|
||||
}
|
||||
else
|
||||
{
|
||||
buckets[bucket] = data[i].point;
|
||||
buckets_init[bucket] = true;
|
||||
}
|
||||
}
|
||||
|
||||
// sum the buckets
|
||||
ge_p3 pail = ge_p3_identity;
|
||||
ge_p3 pail;
|
||||
bool pail_init = false;
|
||||
for (size_t i = (1<<c)-1; i > 0; --i)
|
||||
{
|
||||
if (!ge_p3_is_point_at_infinity(&buckets[i]))
|
||||
add(pail, buckets[i]);
|
||||
if (!ge_p3_is_point_at_infinity(&pail))
|
||||
add(result, pail);
|
||||
if (buckets_init[i])
|
||||
{
|
||||
if (pail_init)
|
||||
add(pail, buckets[i]);
|
||||
else
|
||||
{
|
||||
pail = buckets[i];
|
||||
pail_init = true;
|
||||
}
|
||||
}
|
||||
if (pail_init)
|
||||
{
|
||||
if (result_init)
|
||||
add(result, pail);
|
||||
else
|
||||
{
|
||||
result = pail;
|
||||
result_init = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -61,10 +61,10 @@ rct::key bos_coster_heap_conv_robust(std::vector<MultiexpData> data);
|
||||
std::shared_ptr<straus_cached_data> straus_init_cache(const std::vector<MultiexpData> &data, size_t N =0);
|
||||
size_t straus_get_cache_size(const std::shared_ptr<straus_cached_data> &cache);
|
||||
rct::key straus(const std::vector<MultiexpData> &data, const std::shared_ptr<straus_cached_data> &cache = NULL, size_t STEP = 0);
|
||||
std::shared_ptr<pippenger_cached_data> pippenger_init_cache(const std::vector<MultiexpData> &data, size_t N =0);
|
||||
std::shared_ptr<pippenger_cached_data> pippenger_init_cache(const std::vector<MultiexpData> &data, size_t start_offset = 0, size_t N =0);
|
||||
size_t pippenger_get_cache_size(const std::shared_ptr<pippenger_cached_data> &cache);
|
||||
size_t get_pippenger_c(size_t N);
|
||||
rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr<pippenger_cached_data> &cache = NULL, size_t c = 0);
|
||||
rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr<pippenger_cached_data> &cache = NULL, size_t cache_size = 0, size_t c = 0);
|
||||
|
||||
}
|
||||
|
||||
|
@ -58,15 +58,6 @@ namespace
|
||||
}
|
||||
|
||||
namespace rct {
|
||||
Bulletproof proveRangeBulletproof(key &C, key &mask, uint64_t amount)
|
||||
{
|
||||
mask = rct::skGen();
|
||||
Bulletproof proof = bulletproof_PROVE(amount, mask);
|
||||
CHECK_AND_ASSERT_THROW_MES(proof.V.size() == 1, "V has not exactly one element");
|
||||
C = proof.V[0];
|
||||
return proof;
|
||||
}
|
||||
|
||||
Bulletproof proveRangeBulletproof(keyV &C, keyV &masks, const std::vector<uint64_t> &amounts)
|
||||
{
|
||||
masks = rct::skvGen(amounts.size());
|
||||
|
@ -46,6 +46,8 @@ set(performance_tests_headers
|
||||
range_proof.h
|
||||
bulletproof.h
|
||||
crypto_ops.h
|
||||
sc_reduce32.h
|
||||
sc_check.h
|
||||
multiexp.h
|
||||
multi_tx_test_base.h
|
||||
performance_tests.h
|
||||
|
@ -47,6 +47,7 @@ enum test_op
|
||||
op_scalarmultKey,
|
||||
op_scalarmultH,
|
||||
op_scalarmult8,
|
||||
op_ge_dsm_precomp,
|
||||
op_ge_double_scalarmult_base_vartime,
|
||||
op_ge_double_scalarmult_precomp_vartime,
|
||||
op_ge_double_scalarmult_precomp_vartime2,
|
||||
@ -84,6 +85,7 @@ public:
|
||||
ge_cached tmp_cached;
|
||||
ge_p1p1 tmp_p1p1;
|
||||
ge_p2 tmp_p2;
|
||||
ge_dsmp dsmp;
|
||||
switch (op)
|
||||
{
|
||||
case op_sc_add: sc_add(key.bytes, scalar0.bytes, scalar1.bytes); break;
|
||||
@ -101,6 +103,7 @@ public:
|
||||
case op_scalarmultKey: rct::scalarmultKey(point0, scalar0); break;
|
||||
case op_scalarmultH: rct::scalarmultH(scalar0); break;
|
||||
case op_scalarmult8: rct::scalarmult8(point0); break;
|
||||
case op_ge_dsm_precomp: ge_dsm_precomp(dsmp, &p3_0); break;
|
||||
case op_ge_double_scalarmult_base_vartime: ge_double_scalarmult_base_vartime(&tmp_p2, scalar0.bytes, &p3_0, scalar1.bytes); break;
|
||||
case op_ge_double_scalarmult_precomp_vartime: ge_double_scalarmult_precomp_vartime(&tmp_p2, scalar0.bytes, &p3_0, scalar1.bytes, precomp0); break;
|
||||
case op_ge_double_scalarmult_precomp_vartime2: ge_double_scalarmult_precomp_vartime2(&tmp_p2, scalar0.bytes, precomp0, scalar1.bytes, precomp1); break;
|
||||
|
@ -50,6 +50,7 @@
|
||||
#include "is_out_to_acc.h"
|
||||
#include "subaddress_expand.h"
|
||||
#include "sc_reduce32.h"
|
||||
#include "sc_check.h"
|
||||
#include "cn_fast_hash.h"
|
||||
#include "rct_mlsag.h"
|
||||
#include "equality.h"
|
||||
@ -184,6 +185,7 @@ int main(int argc, char** argv)
|
||||
TEST_PERFORMANCE0(filter, p, test_ge_frombytes_vartime);
|
||||
TEST_PERFORMANCE0(filter, p, test_generate_keypair);
|
||||
TEST_PERFORMANCE0(filter, p, test_sc_reduce32);
|
||||
TEST_PERFORMANCE0(filter, p, test_sc_check);
|
||||
TEST_PERFORMANCE1(filter, p, test_signature, false);
|
||||
TEST_PERFORMANCE1(filter, p, test_signature, true);
|
||||
|
||||
@ -249,6 +251,7 @@ int main(int argc, char** argv)
|
||||
TEST_PERFORMANCE1(filter, p, test_crypto_ops, op_scalarmultKey);
|
||||
TEST_PERFORMANCE1(filter, p, test_crypto_ops, op_scalarmultH);
|
||||
TEST_PERFORMANCE1(filter, p, test_crypto_ops, op_scalarmult8);
|
||||
TEST_PERFORMANCE1(filter, p, test_crypto_ops, op_ge_dsm_precomp);
|
||||
TEST_PERFORMANCE1(filter, p, test_crypto_ops, op_ge_double_scalarmult_base_vartime);
|
||||
TEST_PERFORMANCE1(filter, p, test_crypto_ops, op_ge_double_scalarmult_precomp_vartime);
|
||||
TEST_PERFORMANCE1(filter, p, test_crypto_ops, op_ge_double_scalarmult_precomp_vartime2);
|
||||
|
@ -78,9 +78,9 @@ public:
|
||||
case multiexp_straus_cached:
|
||||
return res == straus(data, straus_cache);
|
||||
case multiexp_pippenger:
|
||||
return res == pippenger(data, NULL, c);
|
||||
return res == pippenger(data, NULL, 0, c);
|
||||
case multiexp_pippenger_cached:
|
||||
return res == pippenger(data, pippenger_cache, c);
|
||||
return res == pippenger(data, pippenger_cache, 0, c);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
52
tests/performance_tests/sc_check.h
Normal file
52
tests/performance_tests/sc_check.h
Normal file
@ -0,0 +1,52 @@
|
||||
// Copyright (c) 2018, The Monero Project
|
||||
//
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification, are
|
||||
// permitted provided that the following conditions are met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright notice, this list of
|
||||
// conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright notice, this list
|
||||
// of conditions and the following disclaimer in the documentation and/or other
|
||||
// materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the copyright holder nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without specific
|
||||
// prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
|
||||
// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "crypto/crypto.h"
|
||||
|
||||
class test_sc_check
|
||||
{
|
||||
public:
|
||||
static const size_t loop_count = 10000000;
|
||||
|
||||
bool init()
|
||||
{
|
||||
m_scalar = crypto::rand<crypto::ec_scalar>();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool test()
|
||||
{
|
||||
sc_check((unsigned char*)m_scalar.data);
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
crypto::ec_scalar m_scalar;
|
||||
};
|
Loading…
Reference in New Issue
Block a user