Use different fwdOut multipliers for each subnetwork. These are SPSA tuned alongside L1 biases.
To apply the tuned parameters, I just scanned bytes in NNUE files and replaced matching patterns with them, because it was too tedious for me to calculate offsets.
patch-net.py
import argparse
import json
import os
import shutil
import subprocess
SOURCE = """
#ifndef PATCHER_H_
#define PATCHER_H_
#include <cstdint>
std::int32_t gBigL1BiasesPatch[8][16] = {{
{big}
}};
std::int32_t gSmallL1BiasesPatch[8][16] = {{
{small}
}};
#endif // PATCHER_H_
""".lstrip()
def main():
parser = argparse.ArgumentParser()
parser.add_argument("spsa", type=argparse.FileType("r"))
args = parser.parse_args()
spsa = json.load(args.spsa)
biases_big = [
[round(float(spsa[f"gBigL1Biases[{i}][{j}]"]["value"])) for j in range(16)]
for i in range(8)
]
biases_small = [
[round(float(spsa[f"gSmallL1Biases[{i}][{j}]"]["value"])) for j in range(16)]
for i in range(8)
]
replace = SOURCE.format(
big=",\n".join(
[f" \x7B {', '.join([str(n) for n in l])} \x7D" for l in biases_big]
),
small=",\n".join(
[f" \x7B {', '.join([str(n) for n in l])} \x7D" for l in biases_small]
),
)
with open("patcher.h", "w") as f:
f.write(replace)
print("Building patcher...")
p = subprocess.Popen(["clang++", "-o", "patcher", "patcher.cc"])
p.wait()
print("Running patcher...")
p = subprocess.Popen(["./patcher"])
p.wait()
os.unlink("patcher")
def rename(filename: str):
p = subprocess.Popen(["sha256sum", filename], stdout=subprocess.PIPE)
out, _ = p.communicate()
new_filename = f"nn-{out.decode()[:12]}.nnue"
os.rename(filename, new_filename)
print(f" {filename} -> {new_filename}")
return new_filename
print("Renaming patched networks...")
big = rename("nn-big.nnue")
small = rename("nn-small.nnue")
print("Copying patched networks...")
shutil.copy(big, f"../../src/{big}")
shutil.copy(small, f"../../src/{small}")
fwdout_big = [round(float(spsa[f"gBigFwdOutMultiplier[{i}]"]["value"])) for i in range(8)]
fwdout_small = [round(float(spsa[f"gSmallFwdOutMultiplier[{i}]"]["value"])) for i in range(8)]
print()
print(f"FwdOutMultipliersBig = \x7B {', '.join([str(n) for n in fwdout_big])} \x7D")
print(f"FwdOutMultipliersSmall = \x7B {', '.join([str(n) for n in fwdout_small])} \x7D")
if __name__ == "__main__":
main()
The first SPSA tune session was not good, presumably due to too high ck​ values. Following linrock and Viren's suggestion, the second SPSA test is launched with much lower cend​ values (128).