def evaluate_model(params):
hidden_sizes, lr, epochs, sim, X, y, input_size, output_size, activation_type = params
# np.random.seed(sim) # Uncomment for reproducibility if desired
weights, loss_history = train_NN(X, y, epochs, lr, hidden_sizes, output_size, activation_type)
# print(f"Simulations: {sim}; Final loss: {loss_history[epochs - 1]}")
loss_dict = {}
for i in range(epochs):
loss_dict[i] = loss_history[i]
return loss_dict, hidden_sizes, lr, epochs
def tune_nn_parallel(X, y, num_sims=10, output_size=1, epochs=[10000], learning_rates=[0.1], num_hidden_layers=[2], activation_type="sigmoid"):
input_size = X.shape[1]
# Convert DataFrame values to float64.
X = X.values.astype(np.float64)
y = y.values.astype(np.float64)
hidden_sizes_options = [[input_size for _ in range(hidden_layer_length)] for hidden_layer_length in num_hidden_layers]
epochs_options = epochs
params_list = []
for sim in range(num_sims):
for h, hs in enumerate(hidden_sizes_options):
for l, lr in enumerate(learning_rates):
for e, epoch in enumerate(epochs_options):
sim_num = sim * len(hidden_sizes_options) * len(learning_rates) * len(epochs_options) \
+ h * len(learning_rates) * len(epochs_options) \
+ l * len(epochs_options) + e
params_list.append((hs, lr, epoch, sim_num, X, y, input_size, output_size, activation_type))
with concurrent.futures.ThreadPoolExecutor(max_workers=multiprocessing.cpu_count()) as executor:
results = list(executor.map(evaluate_model, params_list))
loss_dict_list = [r[0] for r in results]
loss_df = pd.DataFrame(loss_dict_list).T
loss_df.rename(columns={i: f"{hs}{lr}" for i, (loss, hs, lr, epoch) in enumerate(results)}, inplace=True)
return loss_df
def main():
best_dict = {}
activation_type = "sigmoid"
for seed in range(10):
np.random.seed(seed)
# Create a small dataset.
n = 100
k = 30 # number of raw features
x_names = [f"x{i}" for i in range(1, k + 1)]
X_data = [np.random.randint(0, 2, k) for _ in range(n)]
y_data = np.random.randint(0, 2, n).reshape(-1, 1)
learning_rates = [0.1, 0.2, 0.3]
epochs = 100000
df = pd.DataFrame(X_data, columns=x_names)
df['y'] = y_data
X_df = df[x_names]
X_df["bias"] = 1
y_df = df[['y']]
output_size = 1
num_sims = 10
num_hidden_layers = [i for i in range(1, 4)] # Try 1 to 3 hidden layers
# build kwargs
kwargs = {"X":X_df, "y":y_df,
"num_sims": num_sims, "output_size": output_size, "epochs": [epochs],
"num_hidden_layers": num_hidden_layers, "learning_rates": learning_rates,
"activation_type": activation_type}
loss_df = tune_nn_parallel(**kwargs) # Change to "relu" or "tanh" to test other activations.
loss_keys = list(loss_df.keys())
# Extract hidden layer and learning rate info from column names.
hidden_layers = []
learning_keys = []
for key in loss_keys:
split_index = key.find("]")
hidden_layers.append(key[:split_index+1])
learning_keys.append(key[split_index+1:])
hidden_layers = set(hidden_layers)
learning_keys = set(learning_keys)
hidden_layer_colors = {hidden_layer: f"C{i}" for i, hidden_layer in enumerate(hidden_layers)}
learning_key_alphas = {learning_key: 1 - 0.05 * i for i, learning_key in enumerate(learning_keys)}
ID = f"Set: {seed}"
# Fix the list comprehension syntax for the color argument.
title = f"Loss by Epoch\n {ID}"
fig, ax = plt.subplots(figsize=(20,10))
loss_df.plot(legend=False, ls="", marker=".", markersize=0.5, alpha=0.02,
color=[hidden_layer_colors[key[:key.find("]")+1]] for key in loss_keys],
fontsize = 28, ax = ax )
ax.set_title(title, fontsize=28)
fig, ax = plt.subplots(figsize=(20,10))
converged_to_1 = {}
for i, hidden_layer in enumerate(hidden_layers):
# hidden layer is a list transformed to a string. I want to count the number of elements in the list
num_hl = hidden_layer.count(",") + 1
converged_to_1[num_hl] = {}
hidden_layer_df = loss_df[[key for key in loss_keys if hidden_layer in key]]
for j, learning_key in enumerate(learning_keys):
learning_key_df = hidden_layer_df[[key for key in hidden_layer_df.keys() if learning_key in key]]
pct_less_than_1e4 = (learning_key_df < 1e-4).mean(axis=1)
pct_less_than_1e4.plot(ax=ax,
color=f"C{i * len(learning_keys) + j}",
label=f"HL: {num_hl}, LR: {learning_key}",
legend=False, fontsize=28)
converged_to_1[num_hl][str(learning_key)] = (pct_less_than_1e4 == 1.0).mean()
ID = f"Set: {seed}"
title = f"Percentage of Simulations with Loss < 1e-4 by Epoch\n {ID}"
ax.set_title(title, fontsize=28)
ax.legend(ncol=1, fontsize=24,
# set above plot on right side
bbox_to_anchor=(0.99, 1.0225), loc='upper left')
plt.show()
# find earliest convergence to 1.0
# find epoch where 100% of simulations have converged for each parameter set
converged_to_1 = pd.DataFrame(converged_to_1)
# sort keys
converged_to_1 = converged_to_1[sorted(converged_to_1.keys())]
# sort index
converged_to_1 = converged_to_1.sort_index()
# find column and index pair with highest value
max_row_index, max_col_index = np.unravel_index(converged_to_1.values.argmax(), converged_to_1.shape)
max_col, max_row = converged_to_1.columns[max_col_index], converged_to_1.index[max_row_index]
best_dict[seed] = f"{max_col}, {max_row}"
pd.DataFrame(best_dict, index = ["Best Parameters"]).T.value_counts().plot(kind="bar", figsize=(20,12), fontsize=28)
plt.title("Hyperparameters Counts: (Hidden Layers, Learning Rate)", fontsize=28)
if __name__ == '__main__':
try:
multiprocessing.set_start_method("fork")
except RuntimeError:
pass
multiprocessing.freeze_support()
main()