From 6f41197a912a6c44fe3a1846958b942f26346f8f Mon Sep 17 00:00:00 2001 From: jeffro256 Date: Thu, 28 Dec 2023 13:30:57 -0600 Subject: [PATCH] dont select locked outputs and remove numpy dep --- docs/DECOY_SELECTION.md | 13 ++- utils/python-rpc/decoy_selection.py | 160 +++++++++++++++++++++------- 2 files changed, 129 insertions(+), 44 deletions(-) mode change 100644 => 100755 utils/python-rpc/decoy_selection.py diff --git a/docs/DECOY_SELECTION.md b/docs/DECOY_SELECTION.md index 0611c2657..98932b4f3 100644 --- a/docs/DECOY_SELECTION.md +++ b/docs/DECOY_SELECTION.md @@ -248,7 +248,8 @@ until we have built up a set of global output indices of a certain desired size. ```Python import bisect -import numpy as np +import math +import random GAMMA_SHAPE = 19.28 GAMMA_RATE = 1.61 @@ -259,21 +260,19 @@ DIFFICULTY_TARGET_V2 = 120 DEFAULT_UNLOCK_TIME = CRYPTONOTE_DEFAULT_TX_SPENDABLE_AGE * DIFFICULTY_TARGET_V2 RECENT_SPEND_WINDOW = 15 * DIFFICULTY_TARGET_V2 -rng = np.random.Generator(np.random.PCG64(seed=None)) - def gamma_pick(crod, average_output_delay, num_usable_rct_outputs): while True: # 1 - x = rng.gamma(GAMMA_SHAPE, GAMMA_SCALE) # parameterized by scale, not rate! + x = random.gammavariate(GAMMA_SHAPE, GAMMA_SCALE) # parameterized by scale, not rate! # 2 - target_output_age = np.exp(x) + target_output_age = math.exp(x) # 3 if target_output_age > DEFAULT_UNLOCK_TIME: target_post_unlock_output_age = target_output_age - DEFAULT_UNLOCK_TIME else: - target_post_unlock_output_age = np.floor(rng.uniform(0.0, RECENT_SPEND_WINDOW)) + target_post_unlock_output_age = np.floor(random.uniform(0.0, RECENT_SPEND_WINDOW)) # 4 target_num_outputs_post_unlock = int(target_post_unlock_output_age / average_output_delay) @@ -302,7 +301,7 @@ def gamma_pick(crod, average_output_delay, num_usable_rct_outputs): continue # 11 - global_output_index_result = int(rng.uniform(block_first_global_output_index, crod[picked_block_index])) + global_output_index_result = int(random.uniform(block_first_global_output_index, crod[picked_block_index])) return global_output_index_result ``` diff --git a/utils/python-rpc/decoy_selection.py b/utils/python-rpc/decoy_selection.py old mode 100644 new mode 100755 index 047205759..baccac8d4 --- a/utils/python-rpc/decoy_selection.py +++ b/utils/python-rpc/decoy_selection.py @@ -4,19 +4,14 @@ import argparse import bisect -try: - import numpy as np -except: - print('numpy must be installed!') - exit(1) -import requests +import math +import os +import random import sys import framework.daemon -rng = np.random.Generator(np.random.PCG64(seed=None)) - -# Section: "First, Some Numeric Constants" +##### Section: "First, Some Numeric Constants" ##### GAMMA_SHAPE = 19.28 GAMMA_RATE = 1.61 GAMMA_SCALE = 1 / GAMMA_RATE @@ -29,7 +24,7 @@ RECENT_SPEND_WINDOW = 15 * DIFFICULTY_TARGET_V2 SECONDS_IN_A_YEAR = 60 * 60 * 24 * 365 BLOCKS_IN_A_YEAR = SECONDS_IN_A_YEAR // DIFFICULTY_TARGET_V2 -# Section: "How to calculate `average_output_delay`" +##### Section: "How to calculate `average_output_delay`" ##### def calculate_average_output_delay(crod): # 1 num_blocks_to_consider_for_delay = min(len(crod), BLOCKS_IN_A_YEAR) @@ -41,11 +36,12 @@ def calculate_average_output_delay(crod): num_outputs_to_consider_for_delay = crod[-1] # 3 - average_output_delay = DIFFICULTY_TARGET_V2 * num_blocks_to_consider_for_delay / num_outputs_to_consider_for_delay + average_output_delay = DIFFICULTY_TARGET_V2 * num_blocks_to_consider_for_delay \ + / num_outputs_to_consider_for_delay return average_output_delay -# Section: "How to calculate `num_usable_rct_outputs`" +##### Section: "How to calculate `num_usable_rct_outputs`" ##### def calculate_num_usable_rct_outputs(crod): # 1 num_usable_crod_blocks = len(crod) - (CRYPTONOTE_DEFAULT_TX_SPENDABLE_AGE - 1) @@ -55,20 +51,20 @@ def calculate_num_usable_rct_outputs(crod): return num_usable_rct_outputs -# Section: "The Gamma Pick" +##### Section: "The Gamma Pick" ##### def gamma_pick(crod, average_output_delay, num_usable_rct_outputs): while True: # 1 - x = rng.gamma(GAMMA_SHAPE, GAMMA_SCALE) # parameterized by scale, not rate! + x = random.gammavariate(GAMMA_SHAPE, GAMMA_SCALE) # parameterized by scale, not rate! # 2 - target_output_age = np.exp(x) + target_output_age = math.exp(x) # 3 if target_output_age > DEFAULT_UNLOCK_TIME: target_post_unlock_output_age = target_output_age - DEFAULT_UNLOCK_TIME else: - target_post_unlock_output_age = np.floor(rng.uniform(0.0, RECENT_SPEND_WINDOW)) + target_post_unlock_output_age = math.floor(random.uniform(0.0, RECENT_SPEND_WINDOW)) # 4 target_num_outputs_post_unlock = int(target_post_unlock_output_age / average_output_delay) @@ -97,54 +93,144 @@ def gamma_pick(crod, average_output_delay, num_usable_rct_outputs): continue # 11 - global_output_index_result = int(rng.uniform(block_first_global_output_index, crod[picked_block_index])) + global_output_index_result = int(random.uniform(block_first_global_output_index, + crod[picked_block_index])) return global_output_index_result +def gamma_pick_n_unlocked(num_picks, crod, get_is_outputs_unlocked): + # This is the maximum number of outputs we can fetch in one restricted RPC request + # Line 67 of src/rpc/core_rpc_server.cpp in commit ac02af92 + MAX_GETS_OUTS_COUNT = 5000 + + # Calculate average_output_delay & num_usable_rct_outputs for given CROD + average_output_delay = calculate_average_output_delay(crod) + num_usable_rct_outputs = calculate_num_usable_rct_outputs(crod) + + # Maps RingCT global output index -> whether that output is unlocked at this moment + # This saves a ton of time for huge numbers of picks + is_unlocked_cache = {} + + # Potential picks to written, # of potential picks of unknown lockedness, and total # picked + buffered_picks = [] + num_picks_unknown_locked = 0 + num_total_picked = 0 + + # Main picking / RPC loop + while num_total_picked < num_picks: + # Do gamma pick + new_pick = gamma_pick(crod, average_output_delay, num_usable_rct_outputs) + buffered_picks.append(new_pick) + num_picks_unknown_locked += int(new_pick not in is_unlocked_cache) + + # Once num_picks_unknown_locked or buffered_picks is large enough, trigger "flush"... + should_flush = num_picks_unknown_locked == MAX_GETS_OUTS_COUNT or \ + num_total_picked + len(buffered_picks) == num_picks + if should_flush: + # Update is_unlocked_cache if # outputs w/ unknown locked status is non-zero + unknown_locked_outputs = [o for o in buffered_picks if o not in is_unlocked_cache] + assert len(unknown_locked_outputs) == num_picks_unknown_locked + if unknown_locked_outputs: + is_unlocked = get_is_outputs_unlocked(unknown_locked_outputs) + assert len(is_unlocked) == len(unknown_locked_outputs) + for i, o in enumerate(unknown_locked_outputs): + is_unlocked_cache[o] = is_unlocked[i] + num_picks_unknown_locked = 0 + + # Yield the buffered picks + for buffered_pick in buffered_picks: + # If pick is locked, skip to next buffered pick + if not is_unlocked_cache[buffered_pick]: + continue + + yield buffered_pick + num_total_picked += 1 + + # Clear buffer + buffered_picks.clear() + def main(): # Handle CLI arguments arg_parser = argparse.ArgumentParser(prog='Decoy Selection Python Reference', - description='We provide an easy-to-read non-fingerprinting reference for Monero decoy selection', + description='We provide an easy-to-read non-fingerprinting reference for ' \ + 'Monero decoy selection', epilog='Remember: Don\'t be Unique!') - arg_parser.add_argument('-t', '--to-height', default=0, type=int) arg_parser.add_argument('-n', '--num-picks', default=1000000, type=int) - arg_parser.add_argument('-o', '--output-file', default='python_decoy_selections.txt') + arg_parser.add_argument('-o', '--output-file-prefix', default='decoy_selections') arg_parser.add_argument('-d', '--daemon-host', default='127.0.0.1') arg_parser.add_argument('-p', '--daemon-port', default=18081, type=int) + arg_parser.add_argument('--allow-output-overwrite', action='store_true') + arg_parser.add_argument('--allow-chain-update', action='store_true') args = arg_parser.parse_args() # Create connection to monerod daemon = framework.daemon.Daemon(host=args.daemon_host, port=args.daemon_port) - # Fetch the CROD - print("Fetching the CROD up to height {} from daemon at '{}:{}'...".format( - '' if args.to_height == 0 else args.to_height, args.daemon_host, args.daemon_port)) - try: - res = daemon.get_output_distribution(amounts=[0], cumulative=True, to_height=args.to_height) - except requests.exceptions.ConnectionError: - print("Error: could not connect to daemon!", file=sys.stderr) + # Fetch the top block hash at the beginning of the picking. We will do this again at the end to + # assert that the unlocked status of the set of all outputs didn't change. This is a practial + # detail that makes conformance testing more consistent + early_top_block_hash = daemon.get_info().top_block_hash + + # Construct output file name and check that it doesn't already exist + output_file_name = "{}_{}_{}.dat".format(args.output_file_prefix, early_top_block_hash, + args.num_picks) + if os.path.isfile(output_file_name) and not args.allow_output_overwrite: + print("File '{}' already exists".format(output_file_name), file=sys.stderr) exit(1) + + # Fetch the CROD + print("Fetching the CROD as of block {} from daemon '{}:{}'...".format( + early_top_block_hash, args.daemon_host, args.daemon_port)) + res = daemon.get_output_distribution(amounts=[0], cumulative=False) rct_dist_info = res['distributions'][0] crod = rct_dist_info['distribution'] assert rct_dist_info['base'] == 0 print("The start height of the CROD is {}, and the top height is {}.".format( rct_dist_info['start_height'], rct_dist_info['start_height'] + len(crod) - 1)) - # Calculate average_output_delay & num_usable_rct_outputs for given CROD - average_output_delay = calculate_average_output_delay(crod) - num_usable_rct_outputs = calculate_num_usable_rct_outputs(crod) + # Accumulate the CROD since it is fetched uncumulative for compactness over the wire + for i in range(len(crod) - 1): + crod[i + 1] += crod[i] - # Do gamma picking and write output - print("Performing {} picks and writing output to '{}'...".format(args.num_picks, args.output_file)) - print_period = args.num_picks // 1000 if args.num_picks >= 1000 else 1 - with open(args.output_file, 'w') as outf: - for i in range(args.num_picks): + # Define our unlockedness fetcher: this functor simply takes a list of RCT global indexes + # and returns a list of true/false if is unlocked for each index using RPC endpoint /get_outs + def get_is_outputs_unlocked(rct_output_indices): + res = daemon.get_outs([{'index': o, 'amount': 0} for o in rct_output_indices]) + assert len(res.outs) == len(rct_output_indices) + return [o.unlocked for o in res.outs] + + # Main gamma picking / output loop + print("Performing {} picks and writing output to '{}'...".format(args.num_picks, output_file_name)) + with open(output_file_name, 'w') as outf: + for i, pick in enumerate(gamma_pick_n_unlocked(args.num_picks, crod, get_is_outputs_unlocked)): + # Print progress + print_period = args.num_picks // 1000 if args.num_picks >= 1000 else 1 if (i+1) % print_period == 0: progress = (i+1) / args.num_picks * 100 print("Progress: {:.1f}%".format(progress), end='\r') - pick = gamma_pick(crod, average_output_delay, num_usable_rct_outputs) + + # Write pick to file print(pick, file=outf) - print() + + print() + + # Fetch the top block hash at the end of the picking and check that it matches the top block + # hash at the beginning of the picking. If it doesn't, then exit with a failure message and + # delete the data file (if enforcing). There is a tiny chance that we start with block A, + # reorg to B, then reorg back to A, but this unlikely scenario is probably not worth handling. + later_top_block_hash = daemon.get_info().top_block_hash + if later_top_block_hash != early_top_block_hash: + print("The top block hash changed from {} to {}! This will harm statistical analysis!". + format(early_top_block_hash, later_top_block_hash), file=sys.stderr) + + if not args.allow_chain_update: + os.remove(output_file_name) + print("This script enforces that we start and finish with the same top block hash " + "so we can get more consistent results. If you want to ensure that your node " + "doesn't update its blockchain state, you can start it offline with the CLI flag " + "--offline. Alternatively, you can run this script with the CLI flag " + "--allow-chain-update.") + exit(1) if __name__ == '__main__': main()