Use different fwdOut multipliers for each subnetwork. These are SPSA tuned alongside L1 biases.
To apply the tuned parameters, I just scanned bytes in NNUE files and replaced matching patterns with them, because it was too tedious for me to calculate offsets.
patch-net.py
import argparseimport jsonimport osimport shutilimport subprocessSOURCE ="""#ifndef PATCHER_H_#define PATCHER_H_#include <cstdint>std::int32_t gBigL1BiasesPatch[8][16] = {{{big}}};std::int32_t gSmallL1BiasesPatch[8][16] = {{{small}}};#endif // PATCHER_H_""".lstrip()defmain(): parser = argparse.ArgumentParser() parser.add_argument("spsa", type=argparse.FileType("r")) args = parser.parse_args() spsa = json.load(args.spsa) biases_big = [ [round(float(spsa[f"gBigL1Biases[{i}][{j}]"]["value"]))for j inrange(16)]for i inrange(8) ] biases_small = [ [round(float(spsa[f"gSmallL1Biases[{i}][{j}]"]["value"]))for j inrange(16)]for i inrange(8) ] replace = SOURCE.format( big=",\n".join( [f" \x7B{', '.join([str(n) for n in l])}\x7D"for l in biases_big] ), small=",\n".join( [f" \x7B{', '.join([str(n) for n in l])}\x7D"for l in biases_small] ), )withopen("patcher.h", "w")as f: f.write(replace)print("Building patcher...") p = subprocess.Popen(["clang++", "-o", "patcher", "patcher.cc"]) p.wait()print("Running patcher...") p = subprocess.Popen(["./patcher"]) p.wait() os.unlink("patcher")defrename(filename:str): p = subprocess.Popen(["sha256sum", filename], stdout=subprocess.PIPE) out, _ = p.communicate() new_filename =f"nn-{out.decode()[:12]}.nnue" os.rename(filename, new_filename)print(f" {filename} -> {new_filename}")return new_filenameprint("Renaming patched networks...") big =rename("nn-big.nnue") small =rename("nn-small.nnue")print("Copying patched networks...") shutil.copy(big, f"../../src/{big}") shutil.copy(small, f"../../src/{small}") fwdout_big = [round(float(spsa[f"gBigFwdOutMultiplier[{i}]"]["value"]))for i inrange(8)] fwdout_small = [round(float(spsa[f"gSmallFwdOutMultiplier[{i}]"]["value"]))for i inrange(8)]print()print(f"FwdOutMultipliersBig = \x7B{', '.join([str(n) for n in fwdout_big])}\x7D")print(f"FwdOutMultipliersSmall = \x7B{', '.join([str(n) for n in fwdout_small])}\x7D")if__name__=="__main__":main()
The first SPSA tune session was not good, presumably due to too high ck values. Following linrock and Viren's suggestion, the second SPSA test is launched with much lower cend values (128).