import numpy as np
import pandas as pd
try:
import matplotlib.pyplot as plt
import seaborn as sns
matplotlib_seaborn_installed = True
except ImportError:
matplotlib_seaborn_installed = False
from rumboost.utility_smoothing import (
monotone_spline,
mean_monotone_spline,
data_leaf_value,
)
if not matplotlib_seaborn_installed:
raise ImportError(
"Please install matplotlib and seaborn to use this module. You can do so by running 'pip install matplotlib seaborn'"
)
[docs]
def plot_2d(
model,
feature1: str,
feature2: str,
min1: int,
max1: int,
min2: int,
max2: int,
save_figure: bool = False,
utility_names: list[str] = ["Walking", "Cycling", "Public Transport", "Driving"],
num_points=1000,
):
"""
Plot a 2nd order feature interaction as a contour plot.
Parameters
----------
model : RUMBoost
A RUMBoost object.
feature1 : str
Name of feature 1.
feature2 : str
Name of feature 2.
min1 : int
Minimum value of feature 1.
max1 : int
Maximum value of feature 1.
min2 : int
Minimum value of feature 2.
max2 : int
Maximum value of feature 2.
save_figure : bool, optional (default = False)
If true, save the figure as a png file
utility_names : list[str], optional (default=['Walking', 'Cycling', 'Public Transport', 'Driving'])
List of the alternative names
num_points : int, optional (default=1000)
The number of points per axis. The total number of points is num_points**2.
"""
_, weights_2d, _ = get_weights(model=model)
weights_ordered = weights_to_plot_v2(model=model)
name1 = feature1 + "-" + feature2
name2 = feature2 + "-" + feature1
x_vect = np.linspace(min1, max1, num_points)
y_vect = np.linspace(min2, max2, num_points)
# to generalise
utility_names = ["Walking", "Cycling", "PT", "Driving"]
tex_fonts = {
# Use LaTeX to write all text
# "text.usetex": True,
# "font.family": "serif",
# "font.serif": "Computer Modern Roman",
# Use 14pt font in plots, to match 10pt font in document
"axes.labelsize": 7,
"axes.linewidth": 0.5,
"axes.labelpad": 1,
"font.size": 7,
# Make the legend/label fonts a little smaller
"legend.fontsize": 6,
"legend.fancybox": False,
"legend.edgecolor": "inherit",
"legend.borderaxespad": 0.4,
"legend.borderpad": 0.4,
"xtick.labelsize": 6,
"ytick.labelsize": 6,
"xtick.major.pad": 0.5,
"ytick.major.pad": 0.5,
"grid.linewidth": 0.5,
"lines.linewidth": 0.8,
}
sns.set_theme(font_scale=1, rc=tex_fonts)
# sns.set_context(tex_fonts)
sns.set_style("whitegrid")
# plt.rcParams.update({
# # "text.usetex": True,
# "font.family": "serif"
# #"font.sans-serif": "Computer Modern Roman",
# })
for u in weights_2d.Utility.unique():
weights_2d_util = weights_2d[weights_2d.Utility == u]
contour_plot1 = function_2d(
weights_2d_util[weights_2d_util.Feature == name1], x_vect, y_vect
)
contour_plot2 = function_2d(
weights_2d_util[weights_2d_util.Feature == name2], y_vect, x_vect
)
contour_plot = contour_plot1 + contour_plot2.T
if np.sum(contour_plot) == 0:
continue
if (feature1 in weights_ordered[str(u)].keys()) and (
feature2 in weights_ordered[str(u)].keys()
):
_, feature1_alone = non_lin_function(
weights_ordered[str(u)][feature1], min1, max1, num_points
)
feature1_grid = np.repeat(feature1_alone, num_points).reshape(
(num_points, num_points)
)
contour_plot += feature1_grid
_, feature2_alone = non_lin_function(
weights_ordered[str(u)][feature2], min2, max2, num_points
)
feature2_grid = (
np.repeat(feature2_alone, num_points)
.reshape((num_points, num_points))
.T
)
contour_plot += feature2_grid
contour_plot -= contour_plot.max()
colors = ["#F5E5E2", "#DF7057", "#A31D04"]
customPalette = sns.set_palette(sns.color_palette(colors, as_cmap=True))
if np.sum(contour_plot) != 0:
X, Y = np.meshgrid(x_vect, y_vect)
fig, axes = plt.subplots(figsize=(3.49, 3), layout="constrained", dpi=1000)
res = num_points
c_plot = axes.contourf(
X,
Y,
contour_plot.T,
levels=res,
linewidths=0,
cmap=customPalette,
vmin=-12,
vmax=0,
)
# axes.set_title(f'{utility_names[int(u)]}')
axes.set_xlabel(f"{feature1} [h]")
axes.set_ylabel(f"{feature2}")
cbar = fig.colorbar(c_plot, ax=axes, ticks=[-10, -8, -6, -4, -2, 0])
cbar.ax.set_ylabel("Utility")
if save_figure:
plt.savefig(
"Figures/FI RUMBoost/age_travel_time_{}.png".format(
utility_names[int(u)]
)
)
plt.show()
[docs]
def plot_parameters(
model,
X,
utility_names,
feature_names=None,
asc_normalised=True,
with_asc=False,
xlabel_max=None,
only_tt=False,
only_1d=True,
sm_tt_cost=False,
num_iteration=None,
ylim=None,
boost_from_parameter_space=None,
group_feature=None,
save_file="",
):
"""
Plot the non linear impact of parameters on the utility function.
Parameters
----------
model : RUMBoost
A RUMBoost object.
X : pandas dataframe
Features used to train the model, in a pandas dataframe.
utility_name : dict
Dictionary mapping booster indices to their utility names.
Keys should be a string of the booster index, and values should be the utility name.
feature_names : list, optional (default = None)
List of feature names.
asc_normalised : bool, optional (default = True)
If True, scale down utilities to be zero at the y axis.
with_asc : bool, optional (default = False)
If True, add the ASCs to all graphs (one is normalised, and asc_normalised must be True).
xlabel_max : dict, optional (default = None)
Dictionary mapping boosters to their maximum value on the x axis.
only_tt : bool, optional (default = False)
If True, plot only travel time and distance.
only_1d : bool, optional (default = True)
If False, plot only the features separately.
sm_tt_cost : bool, optional (default = False)
If True, plot only the swissmetro travel time and cost on the same figure.
num_iteration : int, optional (default = None)
The number of iterations to plot. If None, plot all iterations.
ylim : list[tuple], optional (default = None)
List of tuples containing the y limits for each plot.
boost_from_parameter_space : dict[dict[bool]], optional (default = None)
Dictionary of dictionary mapping booster to their type of boosting (parameter or utility space).
First key should be a string of the booster index, first value / second key
should be the utility name and second value is True if boosted from parameter space, False otherwise.
group_feature : dict, optional (default = None)
This variable can be used if a feature have several ensembles, and we want to group all ensembles in one plot.
Keys should be the feature name, and values should be the list of ensembles index in rum_structure.
save_file : str, optional (default='')
The name to save the figure with. The figure will be saved only if save_file is not an empty string.
"""
weights_arranged = weights_to_plot_v2(model, num_iteration=num_iteration)
if with_asc:
ASCs = get_asc(weights_arranged)
tex_fonts = {
# Use LaTeX to write all text
# "text.usetex": True,
# "font.family": "serif",
# "font.serif": "Computer Modern Roman",
# Use 14pt font in plots, to match 10pt font in document
"axes.labelsize": 7,
"axes.linewidth": 0.5,
"axes.labelpad": 1,
"font.size": 7,
# Make the legend/label fonts a little smaller
"legend.fontsize": 6,
"legend.fancybox": False,
"legend.edgecolor": "inherit",
"legend.borderaxespad": 0.4,
"legend.borderpad": 0.4,
"xtick.labelsize": 6,
"ytick.labelsize": 6,
"xtick.major.pad": 0.5,
"ytick.major.pad": 0.5,
"grid.linewidth": 0.5,
"lines.linewidth": 0.8,
}
sns.set_theme(font_scale=1, rc=tex_fonts)
# sns.set_context(tex_fonts)
sns.set_style("whitegrid")
# plt.rcParams.update({
# # "text.usetex": True,
# "font.family": "serif"
# #"font.sans-serif": "Computer Modern Roman",
# })
if sm_tt_cost:
# plot for travel time on one figure
plt.figure(figsize=(3.49, 3.49), dpi=1000)
x_w, non_lin_func_rail = non_lin_function(
weights_arranged["0"]["TRAIN_TT"], 0, 600, 10000
)
if asc_normalised:
non_lin_func_rail = [n - non_lin_func_rail[0] for n in non_lin_func_rail]
if with_asc:
non_lin_func_rail = [n + ASCs[0] for n in non_lin_func_rail]
x_c, non_lin_func_SM = non_lin_function(
weights_arranged["1"]["SM_TT"], 0, 600, 10000
)
if asc_normalised:
non_lin_func_SM = [n - non_lin_func_SM[0] for n in non_lin_func_SM]
if with_asc:
non_lin_func_SM = [n + ASCs[1] for n in non_lin_func_SM]
x_d, non_lin_func_driving = non_lin_function(
weights_arranged["2"]["CAR_TT"], 0, 600, 10000
)
if asc_normalised:
non_lin_func_driving = [
n - non_lin_func_driving[0] for n in non_lin_func_driving
]
if with_asc:
non_lin_func_driving = [n + ASCs[3] for n in non_lin_func_driving]
sns.lineplot(x=x_w / 60, y=non_lin_func_rail, color="g", label="Rail")
sns.lineplot(x=x_c / 60, y=non_lin_func_SM, color="#6b8ba4", label="Swissmetro")
sns.lineplot(
x=x_d / 60, y=non_lin_func_driving, color="orange", label="Driving"
)
# plt.title('Influence of alternative travel time on the utility function', fontdict={'fontsize': 16})
plt.xlabel("Travel time [h]")
plt.ylabel("Utility")
plt.tight_layout()
if save_file:
plt.savefig("Figures/RUMBoost/SwissMetro/travel_time.png")
# plot for travel time on one figure
plt.figure(figsize=(3.49, 3.49), dpi=1000)
x_w, non_lin_func_rail = non_lin_function(
weights_arranged["0"]["TRAIN_COST"], 0, 500, 10000
)
if asc_normalised:
non_lin_func_rail = [n - non_lin_func_rail[0] for n in non_lin_func_rail]
if with_asc:
non_lin_func_rail = [n + ASCs[0] for n in non_lin_func_rail]
x_c, non_lin_func_SM = non_lin_function(
weights_arranged["1"]["SM_COST"], 0, 500, 10000
)
if asc_normalised:
non_lin_func_SM = [n - non_lin_func_SM[0] for n in non_lin_func_SM]
if with_asc:
non_lin_func_SM = [n + ASCs[1] for n in non_lin_func_SM]
x_d, non_lin_func_driving = non_lin_function(
weights_arranged["2"]["CAR_CO"], 0, 500, 10000
)
if asc_normalised:
non_lin_func_driving = [
n - non_lin_func_driving[0] for n in non_lin_func_driving
]
if with_asc:
non_lin_func_driving = [n + ASCs[3] for n in non_lin_func_driving]
sns.lineplot(x=x_w, y=non_lin_func_rail, color="g", label="Rail")
sns.lineplot(x=x_c, y=non_lin_func_SM, color="#6b8ba4", label="Swissmetro")
sns.lineplot(x=x_d, y=non_lin_func_driving, color="orange", label="Driving")
# plt.title('Influence of alternative cost on the utility function', fontdict={'fontsize': 16})
plt.xlabel("Cost [chf]")
plt.ylabel("Utility")
plt.tight_layout()
if save_file:
plt.savefig("Figures/RUMBoost/SwissMetro/cost.png")
if not only_1d:
# plot for travel time on one figure
plt.figure(figsize=(3.49, 3.49), dpi=1000)
x_w, non_lin_func_walk = non_lin_function(
weights_arranged["0"]["dur_walking"], 0, 2.5, 10000
)
if asc_normalised:
non_lin_func_walk = [n - non_lin_func_walk[0] for n in non_lin_func_walk]
if with_asc:
non_lin_func_walk = [n + ASCs[0] for n in non_lin_func_walk]
x_c, non_lin_func_cycle = non_lin_function(
weights_arranged["1"]["dur_cycling"], 0, 2.5, 10000
)
if asc_normalised:
non_lin_func_cycle = [n - non_lin_func_cycle[0] for n in non_lin_func_cycle]
if with_asc:
non_lin_func_cycle = [n + ASCs[1] for n in non_lin_func_cycle]
x_ptb, non_lin_func_pt_bus = non_lin_function(
weights_arranged["2"]["dur_pt_bus"], 0, 2.5, 10000
)
if asc_normalised:
non_lin_func_pt_bus = [
n - non_lin_func_pt_bus[0] for n in non_lin_func_pt_bus
]
if with_asc:
non_lin_func_pt_bus = [n + ASCs[2] for n in non_lin_func_pt_bus]
x_ptr, non_lin_func_pt_rail = non_lin_function(
weights_arranged["2"]["dur_pt_rail"], 0, 2.5, 10000
)
if asc_normalised:
non_lin_func_pt_rail = [
n - non_lin_func_pt_rail[0] for n in non_lin_func_pt_rail
]
if with_asc:
non_lin_func_pt_rail = [n + ASCs[2] for n in non_lin_func_pt_rail]
x_d, non_lin_func_driving = non_lin_function(
weights_arranged["3"]["dur_driving"], 0, 2.5, 10000
)
if asc_normalised:
non_lin_func_driving = [
n - non_lin_func_driving[0] for n in non_lin_func_driving
]
if with_asc:
non_lin_func_driving = [n + ASCs[3] for n in non_lin_func_driving]
sns.lineplot(x=x_w, y=non_lin_func_walk, color="b", label="Walking")
sns.lineplot(x=x_c, y=non_lin_func_cycle, color="r", label="Cycling")
sns.lineplot(x=x_ptb, y=non_lin_func_pt_bus, color="#02590f", label="PT Bus")
sns.lineplot(x=x_ptr, y=non_lin_func_pt_rail, color="g", label="PT Rail")
sns.lineplot(x=x_d, y=non_lin_func_driving, color="orange", label="Driving")
# plt.title('Influence of alternative travel time on the utility function', fontdict={'fontsize': 16})
plt.xlabel("Travel time [h]")
plt.ylabel("Utility")
plt.tight_layout()
if save_file:
plt.savefig(
f"Figures/RUMBoost/LPMC/travel_time_iteration_{num_iteration}.png"
)
# plot for distance on one figure
plt.figure(figsize=(3.49, 3.49), dpi=1000)
x_pt, non_lin_func_pt = non_lin_function(
weights_arranged["2"]["cost_transit"], 0, 10, 10000
)
if asc_normalised:
non_lin_func_pt = [n - non_lin_func_pt[0] for n in non_lin_func_pt]
if with_asc:
non_lin_func_pt = [n + ASCs[2] for n in non_lin_func_pt]
x_d, non_lin_func_driving = non_lin_function(
weights_arranged["3"]["cost_driving_fuel"], 0, 10, 10000
)
if asc_normalised:
non_lin_func_driving = [
n - non_lin_func_driving[0] for n in non_lin_func_driving
]
if with_asc:
non_lin_func_driving = [n + ASCs[3] for n in non_lin_func_driving]
# sns.lineplot(x=x_w, y=non_lin_func_walk, lw=2, color='#fab9a5', label='Walking')
# sns.lineplot(x=x_c, y=non_lin_func_cycle, lw=2, color='#B65FCF', label='Cycling')
sns.lineplot(x=x_pt, y=non_lin_func_pt, color="g", label="PT")
sns.lineplot(x=x_d, y=non_lin_func_driving, color="orange", label="Driving")
# plt.title('Influence of straight line distance on the utility function', fontdict={'fontsize': 16})
plt.xlabel("Cost [£]")
plt.ylabel("Utility")
plt.tight_layout()
if save_file:
plt.savefig("Figures/RUMBoost/LPMC/cost.png")
plt.show()
plt.figure(figsize=(3.49, 3.49), dpi=1000)
x_w, non_lin_func_walk = non_lin_function(
weights_arranged["0"]["age"], 0, 100, 10000
)
if asc_normalised:
non_lin_func_walk = [n - non_lin_func_walk[0] for n in non_lin_func_walk]
if with_asc:
non_lin_func_walk = [n + ASCs[0] for n in non_lin_func_walk]
x_c, non_lin_func_cycle = non_lin_function(
weights_arranged["1"]["age"], 0, 100, 10000
)
if asc_normalised:
non_lin_func_cycle = [n - non_lin_func_cycle[0] for n in non_lin_func_cycle]
if with_asc:
non_lin_func_cycle = [n + ASCs[1] for n in non_lin_func_cycle]
x_pt, non_lin_func_pt = non_lin_function(
weights_arranged["2"]["age"], 0, 100, 10000
)
if asc_normalised:
non_lin_func_pt = [n - non_lin_func_pt[0] for n in non_lin_func_pt]
if with_asc:
non_lin_func_pt = [n + ASCs[2] for n in non_lin_func_pt]
x_d, non_lin_func_driving = non_lin_function(
weights_arranged["3"]["age"], 0, 100, 10000
)
if asc_normalised:
non_lin_func_driving = [
n - non_lin_func_driving[0] for n in non_lin_func_driving
]
if with_asc:
non_lin_func_driving = [n + ASCs[3] for n in non_lin_func_driving]
sns.lineplot(x=x_w, y=non_lin_func_walk, color="b", label="Walking")
sns.lineplot(x=x_c, y=non_lin_func_cycle, color="r", label="Cycling")
sns.lineplot(x=x_pt, y=non_lin_func_pt, color="g", label="PT")
sns.lineplot(x=x_d, y=non_lin_func_driving, color="orange", label="Driving")
# plt.title('Influence of straight line distance on the utility function', fontdict={'fontsize': 16})
plt.xlabel("Age")
plt.ylabel("Utility")
plt.tight_layout()
if save_file:
plt.savefig("Figures/RUMBoost/LPMC/age.png")
plt.show()
plt.figure(figsize=(3.49, 3.49), dpi=1000)
x_w, non_lin_func_walk = non_lin_function(
weights_arranged["0"]["start_time_linear"], 0, 24, 10000
)
if asc_normalised:
non_lin_func_walk = [n - non_lin_func_walk[0] for n in non_lin_func_walk]
if with_asc:
non_lin_func_walk = [n + ASCs[0] for n in non_lin_func_walk]
x_c, non_lin_func_cycle = non_lin_function(
weights_arranged["1"]["start_time_linear"], 0, 24, 10000
)
if asc_normalised:
non_lin_func_cycle = [n - non_lin_func_cycle[0] for n in non_lin_func_cycle]
if with_asc:
non_lin_func_cycle = [n + ASCs[1] for n in non_lin_func_cycle]
x_pt, non_lin_func_pt = non_lin_function(
weights_arranged["2"]["start_time_linear"], 0, 24, 10000
)
if asc_normalised:
non_lin_func_pt = [n - non_lin_func_pt[0] for n in non_lin_func_pt]
if with_asc:
non_lin_func_pt = [n + ASCs[2] for n in non_lin_func_pt]
x_d, non_lin_func_driving = non_lin_function(
weights_arranged["3"]["start_time_linear"], 0, 24, 10000
)
if asc_normalised:
non_lin_func_driving = [
n - non_lin_func_driving[0] for n in non_lin_func_driving
]
if with_asc:
non_lin_func_driving = [n + ASCs[3] for n in non_lin_func_driving]
sns.lineplot(x=x_w, y=non_lin_func_walk, color="b", label="Walking")
sns.lineplot(x=x_c, y=non_lin_func_cycle, color="r", label="Cycling")
sns.lineplot(x=x_pt, y=non_lin_func_pt, color="g", label="PT")
sns.lineplot(x=x_d, y=non_lin_func_driving, color="orange", label="Driving")
# plt.title('Influence of straight line distance on the utility function', fontdict={'fontsize': 16})
plt.xlabel("Departure time")
plt.ylabel("Utility")
plt.tight_layout()
if save_file:
plt.savefig("Figures/RUMBoost/LPMC/departure_time.png")
plt.show()
# for all features parameters
if not only_tt:
for u in weights_arranged:
for i, f in enumerate(weights_arranged[u]):
# create nonlinear plot
if boost_from_parameter_space and boost_from_parameter_space[u][f]:
x_max = (
1.05 * max(X[f])
if f in list(X.columns)
else (
1.05 * xlabel_max[u]
if xlabel_max
else 1.05 * weights_arranged[u][f]["Splitting points"][-1]
)
)
x = np.linspace(0, 1.05 * x_max, 10000)
non_lin_func = model._linear_predict(int(u), x)
if model.device is not None and not isinstance(non_lin_func, list):
non_lin_func = non_lin_func.cpu().numpy()
if f in list(X.columns):
x, non_lin_func = non_lin_function(
weights_arranged[u][f],
0,
1.05 * max(X[f]),
10000,
(
boost_from_parameter_space[u][f]
if boost_from_parameter_space
else False
),
)
elif xlabel_max:
x, non_lin_func = non_lin_function(
weights_arranged[u][f],
0,
1.05 * xlabel_max[u],
10000,
(
boost_from_parameter_space[u][f]
if boost_from_parameter_space
else False
),
)
else:
x, non_lin_func = non_lin_function(
weights_arranged[u][f],
0,
1.05 * weights_arranged[u][f]["Splitting points"][-1],
10000,
(
boost_from_parameter_space[u][f]
if boost_from_parameter_space
else False
),
)
if asc_normalised:
val_0 = non_lin_func[0]
non_lin_func = [n - val_0 for n in non_lin_func]
if with_asc and (
not boost_from_parameter_space
or not boost_from_parameter_space[u][f]
):
non_lin_func = [n + ASCs[int(u)] for n in non_lin_func]
# plot parameters
plt.figure(figsize=(3.49, 2.09), dpi=1000)
# plt.title('Influence of {} on the predictive function ({} utility)'.format(f, utility_names[u]), fontdict={'fontsize': 16})
plt.ylabel("{} utility".format(utility_names[u]))
if feature_names:
plt.xlabel("{}".format(feature_names[u][i]))
elif "dur" in f:
plt.xlabel("{} [h]".format(f))
elif "TIME" in f:
plt.xlabel("{} [min]".format(f))
elif "cost" in f:
plt.xlabel("{} [£]".format(f))
elif "distance" in f:
plt.xlabel("{} [km]".format(f))
elif "CO" in f:
plt.xlabel("{} [chf]".format(f))
else:
plt.xlabel("{}".format(f))
sns.lineplot(x=x, y=non_lin_func, color="k", label="RUMBoost")
if f in list(X.columns):
plt.xlim([0 - 0.05 * np.max(X[f]), np.max(X[f]) * 1.05])
elif xlabel_max:
plt.xlim([0 - 0.05 * xlabel_max[u], xlabel_max[u] * 1.05])
else:
plt.xlim(
[
0 - 0.05 * weights_arranged[u][f]["Splitting points"][-1],
weights_arranged[u][f]["Splitting points"][-1] * 1.05,
]
)
if ylim:
plt.ylim(ylim[int(u)])
else:
plt.ylim(
[
np.min(non_lin_func)
- 0.05 * (np.max(non_lin_func) - np.min(non_lin_func)),
np.max(non_lin_func)
+ 0.05 * (np.max(non_lin_func) - np.min(non_lin_func)),
]
)
plt.tight_layout()
if save_file and feature_names:
plt.savefig(
f"{save_file}_{utility_names[u]}_{feature_names[u][i]}.png",
facecolor="white",
)
elif save_file:
plt.savefig(
f"{save_file}_{utility_names[u]}_{f}.png", facecolor="white"
)
plt.show()
if group_feature:
for f, indices in group_feature.items():
x_tot = np.linspace(0, 1.05 * max(X[f]), 10000)
non_lin_func_tot = [0] * 10000
for i in indices:
if str(i) not in weights_arranged or f not in weights_arranged[str(i)]:
continue
if f in list(X.columns):
x, non_lin_func = non_lin_function(
weights_arranged[str(i)][f],
0,
1.05 * max(X[f]),
10000,
boost_from_parameter_space[str(i)][f],
)
elif xlabel_max:
x, non_lin_func = non_lin_function(
weights_arranged[str(i)][f],
0,
1.05 * xlabel_max[str(i)],
10000,
boost_from_parameter_space[str(i)][f],
)
else:
x, non_lin_func = non_lin_function(
weights_arranged[str(i)][f],
0,
1.05 * weights_arranged[str(i)][f]["Splitting points"][-1],
10000,
boost_from_parameter_space[str(i)][f],
)
if asc_normalised and not boost_from_parameter_space[str(i)][f]:
val_0 = non_lin_func[0]
non_lin_func = [n - val_0 for n in non_lin_func]
elif boost_from_parameter_space[str(i)][f]:
val_0 = 0
non_lin_func = [n + val_0 for n in non_lin_func]
non_lin_func_tot = [
n_t + n for n_t, n in zip(non_lin_func_tot, non_lin_func)
]
x = x_tot
non_lin_func = non_lin_func_tot
# plot parameters
plt.figure(figsize=(3.49, 2.09), dpi=1000)
# plt.title('Influence of {} on the predictive function ({} utility)'.format(f, utility_names[u]), fontdict={'fontsize': 16})
plt.ylabel("{} utility".format(utility_names[str(i)]))
if "dur" in f:
plt.xlabel("{} [h]".format(f))
elif "TIME" in f:
plt.xlabel("{} [min]".format(f))
elif "cost" in f:
plt.xlabel("{} [£]".format(f))
elif "distance" in f:
plt.xlabel("{} [km]".format(f))
elif "CO" in f:
plt.xlabel("{} [chf]".format(f))
else:
plt.xlabel("{}".format(f))
sns.lineplot(x=x, y=non_lin_func, color="k", label="RUMBoost")
if f in list(X.columns):
plt.xlim([0 - 0.05 * np.max(X[f]), np.max(X[f]) * 1.05])
elif xlabel_max:
plt.xlim([0 - 0.05 * xlabel_max[str(i)], xlabel_max[str(i)] * 1.05])
else:
plt.xlim(
[
0 - 0.05 * weights_arranged[str(i)][f]["Splitting points"][-1],
weights_arranged[str(i)][f]["Splitting points"][-1] * 1.05,
]
)
if ylim:
plt.ylim(ylim[i])
else:
plt.ylim(
[
np.min(non_lin_func)
- 0.05 * (np.max(non_lin_func) - np.min(non_lin_func)),
np.max(non_lin_func)
+ 0.05 * (np.max(non_lin_func) - np.min(non_lin_func)),
]
)
plt.tight_layout()
if save_file:
plt.savefig(
f"{save_file}_{utility_names[int(i)]}_{f}.png", facecolor="white"
)
plt.show()
[docs]
def plot_market_segm(
model,
X,
asc_normalised: bool = True,
utility_names: list[str] = ["Walking", "Cycling", "Public Transport", "Driving"],
):
"""
Plot the market segmentation.
Parameters
----------
model : RUMBoost
A RUMBoost object.
X : pandas DataFrame
Training data.
asc_normalised : bool, optional (default = False)
If True, scale down utilities to be zero at the y axis.
utility_names : list[str], optional (default = ['Walking', 'Cycling', 'Public Transport', 'Driving'])
Names of utilities.
"""
sns.set_theme()
weights_arranged = weights_to_plot_v2(model, market_segm=True)
label = {0: "Weekdays", 1: "Weekends"}
color = ["r", "b"]
for u in weights_arranged:
plt.figure(figsize=(10, 6))
for i, f in enumerate(weights_arranged[u]):
# create nonlinear plot
x, non_lin_func = non_lin_function(
weights_arranged[u][f], 0, 1.05 * max(X[f]), 10000
)
if asc_normalised:
val_0 = non_lin_func[0]
non_lin_func = [n - val_0 for n in non_lin_func]
sns.lineplot(x=x, y=non_lin_func, lw=2, color=color[i], label=label[i])
plt.title(
"Impact of travel time in weekdays and weekends on {} utility".format(
utility_names[u]
),
fontdict={"fontsize": 16},
)
plt.ylabel("{} utility".format(utility_names[u]))
plt.xlabel("Travel time [h]")
plt.show()
[docs]
def plot_util(model, data_train, points=10000):
"""
Plot the raw utility functions of all features. This is done directly from the predict attribute of lightgbm.Boosters.
Parameters
----------
model : RUMBoost
A RUMBoost object.
data_train : pandas Dataframe
The full training dataset.
points : int, optional (default = 10000)
The number of points used to draw the line plot.
"""
sns.set_theme()
for j, struct in enumerate(model.rum_structure):
booster = model.boosters[j]
for i, f in enumerate(struct["columns"]):
xin = np.zeros(shape=(points, len(struct["columns"])))
xin[:, i] = np.linspace(0, 1.05 * max(data_train[f]), points)
ypred = booster.predict(xin)
plt.figure()
plt.plot(np.linspace(0, 1.05 * max(data_train[f]), points), ypred)
plt.title(f)
[docs]
def plot_spline(
model,
data_train,
spline_collection,
utility_names,
mean_splines=False,
x_knots_dict=None,
linear_extrapolation=False,
save_fig=False,
lpmc_tt_cost=False,
sm_tt_cost=False,
save_file="",
):
"""
Plot the spline interpolation for all utilities interpolated.
Parameters
----------
model : RUMBoost
A RUMBoost object.
data_train : pandas Dataframe
The full training dataset.
spline_collection : dict
A dictionary containing the optimal number of splines for each feature interpolated of each utility
mean_splines : bool, optional (default = False)
Must be True if the splines are computed at the mean distribution of data for stairs.
x_knots_dict : dict, optional (default = None)
A dictionary in the form of {utility: {attribute: x_knots}} where x_knots are the spline knots for the corresponding
utility and attributes
linear_extrapolation : bool, optional (default = False)
If True, the splines are linearly extrapolated.
save_fig : bool, optional (default = False)
If True, save the plot as a png file.
lpmc_tt_cost : bool, optional (default = False)
If True, plot only the LPMC travel time and cost on the same figure.
sm_tt_cost : bool, optional (default = False)
If True, plot only the swissmetro travel time and cost on the same figure.
save_file : str, optional (default='')
The name to save the figure with.
"""
# get weights ordered by features
weights = weights_to_plot_v2(model)
tex_fonts = {
# Use LaTeX to write all text
# "text.usetex": True,
# "font.family": "serif",
# "font.serif": "Computer Modern Roman",
# Use 14pt font in plots, to match 10pt font in document
"axes.labelsize": 7,
"axes.linewidth": 0.5,
"axes.labelpad": 1,
"font.size": 7,
# Make the legend/label fonts a little smaller
"legend.fontsize": 6,
"legend.fancybox": False,
"legend.edgecolor": "inherit",
"legend.borderaxespad": 0.4,
"legend.borderpad": 0.4,
"xtick.labelsize": 6,
"ytick.labelsize": 6,
"xtick.major.pad": 0.5,
"ytick.major.pad": 0.5,
"grid.linewidth": 0.5,
"lines.linewidth": 0.8,
"scatter.edgecolors": "none",
}
sns.set_theme(font_scale=1, rc=tex_fonts)
# sns.set_context(tex_fonts)
sns.set_style("whitegrid")
# plt.rcParams.update({
# # "text.usetex": True,
# "font.family": "serif"
# #"font.sans-serif": "Computer Modern Roman",
# })
if lpmc_tt_cost:
x_plot_w, y_plot_w = data_leaf_value(
data_train["dur_walking"], weights["0"]["dur_walking"], "data_weighted"
)
y_plot_norm_w = [y - y_plot_w[0] for y in y_plot_w]
x_spline_w = np.linspace(
np.min(data_train["dur_walking"]),
np.max(data_train["dur_walking"]),
num=10000,
)
x_knots_temp_w, y_knots_w = data_leaf_value(
x_knots_dict["0"]["dur_walking"], weights["0"]["dur_walking"]
)
_, y_spline_w, _, x_knot_w, y_knot_w = monotone_spline(
x_spline_w,
weights["0"]["dur_walking"],
num_splines=spline_collection["0"]["dur_walking"],
x_knots=x_knots_temp_w,
y_knots=y_knots_w,
linear_extrapolation=linear_extrapolation,
)
y_spline_norm_w = [y - y_plot_w[0] for y in y_spline_w]
y_knot_norm_w = [y - y_plot_w[0] for y in y_knot_w]
plt.figure(figsize=(3.49, 2.09), dpi=1000)
# data
plt.scatter(
x_plot_w, y_plot_norm_w, color="b", s=0.3, alpha=1, edgecolors="none"
)
# splines
plt.plot(
x_spline_w,
y_spline_norm_w,
color="b",
label=f'Walking travel time ({spline_collection["0"]["dur_walking"]} splines)',
)
# knots position
plt.scatter(x_knot_w, y_knot_norm_w, color="k", s=1)
x_plot_c, y_plot_c = data_leaf_value(
data_train["dur_cycling"], weights["1"]["dur_cycling"], "data_weighted"
)
y_plot_norm_c = [y - y_plot_c[0] for y in y_plot_c]
x_spline_c = np.linspace(
np.min(data_train["dur_cycling"]),
np.max(data_train["dur_cycling"]),
num=10000,
)
x_knots_temp_c, y_knots_c = data_leaf_value(
x_knots_dict["1"]["dur_cycling"], weights["1"]["dur_cycling"]
)
_, y_spline_c, _, x_knot_c, y_knot_c = monotone_spline(
x_spline_c,
weights["1"]["dur_cycling"],
num_splines=spline_collection["1"]["dur_cycling"],
x_knots=x_knots_temp_c,
y_knots=y_knots_c,
linear_extrapolation=linear_extrapolation,
)
y_spline_norm_c = [y - y_plot_c[0] for y in y_spline_c]
y_knot_norm_c = [y - y_plot_c[0] for y in y_knot_c]
# data
plt.scatter(
x_plot_c, y_plot_norm_c, color="r", s=0.3, alpha=1, edgecolors="none"
)
# splines
plt.plot(
x_spline_c,
y_spline_norm_c,
color="r",
label=f'Cycling travel time ({spline_collection["1"]["dur_cycling"]} splines)',
)
# knots position
plt.scatter(x_knot_c, y_knot_norm_c, color="k", s=1)
x_plot_p, y_plot_p = data_leaf_value(
data_train["dur_pt_rail"], weights["2"]["dur_pt_rail"], "data_weighted"
)
y_plot_norm_p = [y - y_plot_p[0] for y in y_plot_p]
x_spline_p = np.linspace(
np.min(data_train["dur_pt_rail"]),
np.max(data_train["dur_pt_rail"]),
num=10000,
)
x_knots_temp_p, y_knots_p = data_leaf_value(
x_knots_dict["2"]["dur_pt_rail"], weights["2"]["dur_pt_rail"]
)
_, y_spline_p, _, x_knot_p, y_knot_p = monotone_spline(
x_spline_p,
weights["2"]["dur_pt_rail"],
num_splines=spline_collection["2"]["dur_pt_rail"],
x_knots=x_knots_temp_p,
y_knots=y_knots_p,
linear_extrapolation=linear_extrapolation,
)
y_spline_norm_p = [y - y_plot_p[0] for y in y_spline_p]
y_knot_norm_p = [y - y_plot_p[0] for y in y_knot_p]
# data
plt.scatter(
x_plot_p, y_plot_norm_p, color="g", s=0.3, alpha=1, edgecolors="none"
)
# splines
plt.plot(
x_spline_p,
y_spline_norm_p,
color="g",
label=f'Rail travel time ({spline_collection["2"]["dur_pt_rail"]} splines)',
)
# knots position
plt.scatter(x_knot_p, y_knot_norm_p, color="k", s=1)
x_plot_d, y_plot_d = data_leaf_value(
data_train["dur_driving"], weights["3"]["dur_driving"], "data_weighted"
)
y_plot_norm_d = [y - y_plot_d[0] for y in y_plot_d]
x_spline_d = np.linspace(
np.min(data_train["dur_driving"]),
np.max(data_train["dur_driving"]),
num=10000,
)
x_knots_temp_d, y_knots_d = data_leaf_value(
x_knots_dict["3"]["dur_driving"], weights["3"]["dur_driving"]
)
_, y_spline_d, _, x_knot_d, y_knot_d = monotone_spline(
x_spline_d,
weights["3"]["dur_driving"],
num_splines=spline_collection["3"]["dur_driving"],
x_knots=x_knots_temp_d,
y_knots=y_knots_d,
linear_extrapolation=linear_extrapolation,
)
y_spline_norm_d = [y - y_plot_d[0] for y in y_spline_d]
y_knot_norm_d = [y - y_plot_d[0] for y in y_knot_d]
# data
plt.scatter(
x_plot_d, y_plot_norm_d, color="orange", s=0.3, alpha=1, edgecolors="none"
)
# splines
plt.plot(
x_spline_d,
y_spline_norm_d,
color="orange",
label=f'Driving travel time ({spline_collection["3"]["dur_driving"]} splines)',
)
# knots position
plt.scatter(x_knot_d, y_knot_norm_d, color="k", s=1, label="Knots")
# plt.title('Spline interpolation of {}'.format(f))
plt.ylabel("Utility")
plt.xlim([0, 5])
plt.xlabel("Travel time [h]")
plt.legend()
plt.tight_layout()
if save_fig:
plt.savefig("Figures/RUMBoost/LPMC/splines_travel_time.png")
plt.show()
plt.figure(figsize=(3.49, 2.09), dpi=1000)
x_plot_p, y_plot_p = data_leaf_value(
data_train["cost_transit"], weights["2"]["cost_transit"], "data_weighted"
)
y_plot_norm_p = [y - y_plot_p[0] for y in y_plot_p]
x_spline_p = np.linspace(
np.min(data_train["cost_transit"]),
np.max(data_train["cost_transit"]),
num=10000,
)
x_knots_temp_p, y_knots_p = data_leaf_value(
x_knots_dict["2"]["cost_transit"], weights["2"]["cost_transit"]
)
_, y_spline_p, _, x_knot_p, y_knot_p = monotone_spline(
x_spline_p,
weights["2"]["cost_transit"],
num_splines=spline_collection["2"]["cost_transit"],
x_knots=x_knots_temp_p,
y_knots=y_knots_p,
linear_extrapolation=linear_extrapolation,
)
y_spline_norm_p = [y - y_plot_p[0] for y in y_spline_p]
y_knot_norm_p = [y - y_plot_p[0] for y in y_knot_p]
# data
plt.scatter(
x_plot_p, y_plot_norm_p, color="g", s=0.3, alpha=1, edgecolors="none"
)
# splines
plt.plot(
x_spline_p,
y_spline_norm_p,
color="g",
label=f'PT cost ({spline_collection["2"]["cost_transit"]} splines)',
)
# knots position
plt.scatter(x_knot_p, y_knot_norm_p, color="k", s=1)
x_plot_d, y_plot_d = data_leaf_value(
data_train["cost_driving_fuel"],
weights["3"]["cost_driving_fuel"],
"data_weighted",
)
y_plot_norm_d = [y - y_plot_d[0] for y in y_plot_d]
x_spline_d = np.linspace(
np.min(data_train["cost_driving_fuel"]),
np.max(data_train["cost_driving_fuel"]),
num=10000,
)
x_knots_temp_d, y_knots_d = data_leaf_value(
x_knots_dict["3"]["cost_driving_fuel"], weights["3"]["cost_driving_fuel"]
)
_, y_spline_d, _, x_knot_d, y_knot_d = monotone_spline(
x_spline_d,
weights["3"]["cost_driving_fuel"],
num_splines=spline_collection["3"]["cost_driving_fuel"],
x_knots=x_knots_temp_d,
y_knots=y_knots_d,
linear_extrapolation=linear_extrapolation,
)
y_spline_norm_d = [y - y_plot_d[0] for y in y_spline_d]
y_knot_norm_d = [y - y_plot_d[0] for y in y_knot_d]
# data
plt.scatter(
x_plot_d, y_plot_norm_d, color="orange", s=0.3, alpha=1, edgecolors="none"
)
# splines
plt.plot(
x_spline_d,
y_spline_norm_d,
color="orange",
label=f'Driving cost ({spline_collection["3"]["cost_driving_fuel"]} splines)',
)
# knots position
plt.scatter(x_knot_d, y_knot_norm_d, color="k", s=1, label="Knots")
# plt.title('Spline interpolation of {}'.format(f))
plt.ylabel("Utility")
plt.xlim([0, 10])
plt.xlabel("Cost [£]")
plt.legend()
plt.tight_layout()
if save_fig:
plt.savefig("Figures/RUMBoost/LPMC/splines_cost.png")
plt.show()
if sm_tt_cost:
x_plot_p, y_plot_p = data_leaf_value(
data_train["TRAIN_TT"], weights["0"]["TRAIN_TT"], "data_weighted"
)
y_plot_norm_p = [y - y_plot_p[0] for y in y_plot_p]
x_spline_p = np.linspace(
np.min(data_train["TRAIN_TT"]), np.max(data_train["TRAIN_TT"]), num=10000
)
x_knots_temp_p, y_knots_p = data_leaf_value(
x_knots_dict["0"]["TRAIN_TT"], weights["0"]["TRAIN_TT"]
)
_, y_spline_p, _, x_knot_p, y_knot_p = monotone_spline(
x_spline_p,
weights["0"]["TRAIN_TT"],
num_splines=spline_collection["0"]["TRAIN_TT"],
x_knots=x_knots_temp_p,
y_knots=y_knots_p,
linear_extrapolation=linear_extrapolation,
)
y_spline_norm_p = [y - y_plot_p[0] for y in y_spline_p]
y_knot_norm_p = [y - y_plot_p[0] for y in y_knot_p]
plt.figure(figsize=(3.49, 2.09), dpi=1000)
# data
plt.scatter(
x_plot_p / 60, y_plot_norm_p, color="g", s=0.3, alpha=1, edgecolors="none"
)
# splines
plt.plot(
x_spline_p / 60,
y_spline_norm_p,
color="g",
label=f'Rail travel time ({spline_collection["0"]["TRAIN_TT"]} splines)',
)
# knots position
plt.scatter(x_knot_p / 60, y_knot_norm_p, color="k", s=1)
x_plot_s, y_plot_s = data_leaf_value(
data_train["SM_TT"], weights["1"]["SM_TT"], "data_weighted"
)
y_plot_norm_s = [y - y_plot_s[0] for y in y_plot_s]
x_spline_s = np.linspace(
np.min(data_train["SM_TT"]), np.max(data_train["SM_TT"]), num=10000
)
x_knots_temp_s, y_knots_s = data_leaf_value(
x_knots_dict["1"]["SM_TT"], weights["1"]["SM_TT"]
)
_, y_spline_s, _, x_knot_s, y_knot_s = monotone_spline(
x_spline_s,
weights["1"]["SM_TT"],
num_splines=spline_collection["1"]["SM_TT"],
x_knots=x_knots_temp_s,
y_knots=y_knots_s,
linear_extrapolation=linear_extrapolation,
)
y_spline_norm_s = [y - y_plot_s[0] for y in y_spline_s]
y_knot_norm_s = [y - y_plot_s[0] for y in y_knot_s]
# data
plt.scatter(
x_plot_s / 60,
y_plot_norm_s,
color="#6b8ba4",
s=0.3,
alpha=1,
edgecolors="none",
)
# splines
plt.plot(
x_spline_s / 60,
y_spline_norm_s,
color="#6b8ba4",
label=f'SwissMetro travel time ({spline_collection["1"]["SM_TT"]} splines)',
)
# knots position
plt.scatter(x_knot_s / 60, y_knot_norm_s, color="k", s=1)
x_plot_d, y_plot_d = data_leaf_value(
data_train["CAR_TT"], weights["2"]["CAR_TT"], "data_weighted"
)
y_plot_norm_d = [y - y_plot_d[0] for y in y_plot_d]
x_spline_d = np.linspace(
np.min(data_train["CAR_TT"]), np.max(data_train["CAR_TT"]), num=10000
)
x_knots_temp_d, y_knots_d = data_leaf_value(
x_knots_dict["2"]["CAR_TT"], weights["2"]["CAR_TT"]
)
_, y_spline_d, _, x_knot_d, y_knot_d = monotone_spline(
x_spline_d,
weights["2"]["CAR_TT"],
num_splines=spline_collection["2"]["CAR_TT"],
x_knots=x_knots_temp_d,
y_knots=y_knots_d,
linear_extrapolation=linear_extrapolation,
)
y_spline_norm_d = [y - y_plot_d[0] for y in y_spline_d]
y_knot_norm_d = [y - y_plot_d[0] for y in y_knot_d]
# data
plt.scatter(
x_plot_d / 60,
y_plot_norm_d,
color="orange",
s=0.3,
alpha=1,
edgecolors="none",
)
# splines
plt.plot(
x_spline_d / 60,
y_spline_norm_d,
color="orange",
label=f'Driving travel time ({spline_collection["2"]["CAR_TT"]} splines)',
)
# knots position
plt.scatter(x_knot_d / 60, y_knot_norm_d, color="k", s=1, label="Knots")
# plt.title('Spline interpolation of {}'.format(f))
plt.ylabel("Utility")
plt.xlim([0, 10])
plt.xlabel("Travel time [h]")
plt.legend()
plt.tight_layout()
if save_fig:
plt.savefig("Figures/RUMBoost/SwissMetro/splines_travel_time.png")
plt.show()
plt.figure(figsize=(3.49, 2.09), dpi=1000)
x_plot_p, y_plot_p = data_leaf_value(
data_train["TRAIN_COST"], weights["0"]["TRAIN_COST"], "data_weighted"
)
y_plot_norm_p = [y - y_plot_p[0] for y in y_plot_p]
x_spline_p = np.linspace(
np.min(data_train["TRAIN_COST"]),
np.max(data_train["TRAIN_COST"]),
num=10000,
)
x_knots_temp_p, y_knots_p = data_leaf_value(
x_knots_dict["0"]["TRAIN_COST"], weights["0"]["TRAIN_COST"]
)
_, y_spline_p, _, x_knot_p, y_knot_p = monotone_spline(
x_spline_p,
weights["0"]["TRAIN_COST"],
num_splines=spline_collection["0"]["TRAIN_COST"],
x_knots=x_knots_temp_p,
y_knots=y_knots_p,
linear_extrapolation=linear_extrapolation,
)
y_spline_norm_p = [y - y_plot_p[0] for y in y_spline_p]
y_knot_norm_p = [y - y_plot_p[0] for y in y_knot_p]
# data
plt.scatter(
x_plot_p, y_plot_norm_p, color="g", s=0.3, alpha=1, edgecolors="none"
)
# splines
plt.plot(
x_spline_p,
y_spline_norm_p,
color="g",
label=f'Rail cost ({spline_collection["0"]["TRAIN_COST"]} splines)',
)
# knots position
plt.scatter(x_knot_p, y_knot_norm_p, color="k", s=1)
x_plot_s, y_plot_s = data_leaf_value(
data_train["SM_COST"], weights["1"]["SM_COST"], "data_weighted"
)
y_plot_norm_s = [y - y_plot_s[0] for y in y_plot_s]
x_spline_s = np.linspace(
np.min(data_train["SM_COST"]), np.max(data_train["SM_COST"]), num=10000
)
x_knots_temp_s, y_knots_s = data_leaf_value(
x_knots_dict["1"]["SM_COST"], weights["1"]["SM_COST"]
)
_, y_spline_s, _, x_knot_s, y_knot_s = monotone_spline(
x_spline_s,
weights["1"]["SM_COST"],
num_splines=spline_collection["1"]["SM_COST"],
x_knots=x_knots_temp_s,
y_knots=y_knots_s,
linear_extrapolation=linear_extrapolation,
)
y_spline_norm_s = [y - y_plot_s[0] for y in y_spline_s]
y_knot_norm_s = [y - y_plot_s[0] for y in y_knot_s]
# data
plt.scatter(
x_plot_s, y_plot_norm_s, color="#6b8ba4", s=0.3, alpha=1, edgecolors="none"
)
# splines
plt.plot(
x_spline_s,
y_spline_norm_s,
color="#6b8ba4",
label=f'SwissMetro cost ({spline_collection["1"]["SM_COST"]} splines)',
)
# knots position
plt.scatter(x_knot_s, y_knot_norm_s, color="k", s=1)
x_plot_d, y_plot_d = data_leaf_value(
data_train["CAR_CO"], weights["2"]["CAR_CO"], "data_weighted"
)
y_plot_norm_d = [y - y_plot_d[0] for y in y_plot_d]
x_spline_d = np.linspace(
np.min(data_train["CAR_CO"]), np.max(data_train["CAR_CO"]), num=10000
)
x_knots_temp_d, y_knots_d = data_leaf_value(
x_knots_dict["2"]["CAR_CO"], weights["2"]["CAR_CO"]
)
_, y_spline_d, _, x_knot_d, y_knot_d = monotone_spline(
x_spline_d,
weights["2"]["CAR_CO"],
num_splines=spline_collection["2"]["CAR_CO"],
x_knots=x_knots_temp_d,
y_knots=y_knots_d,
linear_extrapolation=linear_extrapolation,
)
y_spline_norm_d = [y - y_plot_d[0] for y in y_spline_d]
y_knot_norm_d = [y - y_plot_d[0] for y in y_knot_d]
# data
plt.scatter(
x_plot_d, y_plot_norm_d, color="orange", s=0.3, alpha=1, edgecolors="none"
)
# splines
plt.plot(
x_spline_d,
y_spline_norm_d,
color="orange",
label=f'Driving cost ({spline_collection["2"]["CAR_CO"]} splines)',
)
# knots position
plt.scatter(x_knot_d, y_knot_norm_d, color="k", s=1, label="Knots")
# plt.title('Spline interpolation of {}'.format(f))
plt.ylabel("Utility")
plt.xlim([0, 500])
plt.xlabel("Cost [chf]")
plt.legend()
plt.tight_layout()
if save_fig:
plt.savefig("Figures/RUMBoost/SwissMetro/splines_cost.png")
plt.show()
for u in spline_collection:
for f in spline_collection[u]:
# data points and their utilities
x_plot, y_plot = data_leaf_value(
data_train[f], weights[u][f], "data_weighted"
)
y_plot_norm = [y - y_plot[0] for y in y_plot]
x_spline = np.linspace(
np.min(data_train[f]), np.max(data_train[f]), num=10000
)
# if using splines
# if mean technique
if mean_splines:
x_mean, y_mean = data_leaf_value(
data_train[f], weights[u][f], technique="mean_data"
)
x_spline, y_spline, _, x_knot, y_knot = mean_monotone_spline(
x_plot, x_mean, y_plot, y_mean, num_splines=spline_collection[u][f]
)
# else, i.e. linearly sampled points
else:
if x_knots_dict is not None:
x_knots_temp, y_knots = data_leaf_value(
x_knots_dict[u][f], weights[u][f]
)
_, y_spline, _, x_knot, y_knot = monotone_spline(
x_spline,
weights[u][f],
num_splines=spline_collection[u][f],
x_knots=x_knots_temp,
y_knots=y_knots,
linear_extrapolation=linear_extrapolation,
)
else:
x_spline, y_spline, _, x_knot, y_knot = monotone_spline(
x_plot,
y_plot,
num_splines=spline_collection[u][f],
linear_extrapolation=linear_extrapolation,
)
y_spline_norm = [y - y_plot[0] for y in y_spline]
y_knot_norm = [y - y_plot[0] for y in y_knot]
plt.figure(figsize=(3.49, 2.09), dpi=1000)
# data
plt.scatter(x_plot, y_plot_norm, color="k", s=0.3, zorder=1)
# splines
plt.plot(x_spline, y_spline_norm, color="#5badc7", zorder=2)
# knots position
plt.scatter(x_knot, y_knot_norm, color="#CC5500", s=1.5, zorder=3)
plt.legend(
["Data", "Splines ({})".format(spline_collection[u][f]), "Knots"]
)
# plt.title('Spline interpolation of {}'.format(f))
plt.ylabel("{} utility".format(utility_names[u]))
plt.tight_layout()
if "dur" in f:
plt.xlabel("{} [h]".format(f))
elif "TIME" in f:
plt.xlabel("{} [h]".format(f))
elif "cost" in f:
plt.xlabel("{} [£]".format(f))
elif "CO" in f:
plt.xlabel("{} [chf]".format(f))
elif "distance" in f:
plt.xlabel("{} [km]".format(f))
else:
plt.xlabel("{}".format(f))
# plt.xlim([-0.2, 3.3])
# plt.ylim([-9, 0.3])
if save_fig:
# plt.savefig(save_file + "{} utility, {} feature.png".format(u, f))
plt.savefig(save_file, facecolor="white")
plt.show()
[docs]
def plot_VoT(
data_train,
util_collection,
attribute_VoT,
utility_names,
draw_range,
save_figure=False,
num_points=1000,
):
"""
The function plot the Value of Time of the attributes specified in attribute_VoT.
Parameters
----------
util_collection : dict
A dictionary containing the type of utility to use for all features in all utilities.
attribute_VoT : dict
A dictionary with keys being the utility number (as string) and values being a tuple of the attributes to compute the VoT on.
The structure follows this form: {utility: (attribute1, attribute2)}
utility_names : dict
A dictionary containing the names of the utilities.
The structure of the dictionary follows this form: {utility: names}
draw_range : dict
A dictionary containing the range of the attributes to draw the VoT.
The structure of the dictionary follows this form: {utility: {attribute: (min, max)}}
save_figure : bool, optional (default = False)
If True, save the plot as a png file.
num_points : int, optional (default = 1000)
The number of points used to draw the contour plot.
"""
tex_fonts = {
# Use LaTeX to write all text
# "text.usetex": True,
# "font.family": "serif",
# "font.serif": "Computer Modern Roman",
# Use 14pt font in plots, to match 10pt font in document
"axes.labelsize": 7,
"axes.linewidth": 0.5,
"axes.labelpad": 1,
"font.size": 7,
# Make the legend/label fonts a little smaller
"legend.fontsize": 6,
"legend.fancybox": False,
"legend.edgecolor": "inherit",
"legend.borderaxespad": 0.4,
"legend.borderpad": 0.4,
"xtick.labelsize": 6,
"ytick.labelsize": 6,
"xtick.major.pad": 0.1,
"ytick.major.pad": 0.1,
"grid.linewidth": 0.5,
"lines.linewidth": 0.8,
}
sns.set_theme(font_scale=1, rc=tex_fonts)
# sns.set_context(tex_fonts)
sns.set_style("whitegrid")
# plt.rcParams.update({
# # "text.usetex": True,
# "font.family": "serif"
# #"font.sans-serif": "Computer Modern Roman",
# })
for u in attribute_VoT:
f1, f2 = attribute_VoT[u]
x_vect = np.linspace(draw_range[u][f1][0], draw_range[u][f1][1], num_points)
y_vect = np.linspace(draw_range[u][f2][0], draw_range[u][f2][1], num_points)
d_f1 = util_collection[u][f1].derivative()
d_f2 = util_collection[u][f2].derivative()
VoT = lambda x1, x2, df1=d_f1, df2=d_f2: df1(x1) / df2(x2)
VoT_contour_plot = np.array(np.zeros((len(x_vect), len(y_vect))))
X, Y = np.meshgrid(x_vect, y_vect, indexing="ij")
for i in range(len(x_vect)):
for j in range(len(y_vect)):
if d_f2(Y[i, j]) == 0:
VoT_contour_plot[i, j] = 100
elif VoT(X[i, j], Y[i, j]) > 100:
VoT_contour_plot[i, j] = 100
elif VoT(X[i, j], Y[i, j]) < 0.1:
VoT_contour_plot[i, j] = 0.1
else:
VoT_contour_plot[i, j] = VoT(X[i, j], Y[i, j])
fig, axes = plt.subplots(figsize=(3.49, 3.49), dpi=1000)
# fig.suptitle(f'VoT ({f1} and {f2}) of {utility_names[u]}')
res = 100
c_plot = axes.contourf(
X,
Y,
np.log(VoT_contour_plot) / np.log(10),
levels=res,
linewidths=0,
cmap=sns.color_palette("Blues", as_cmap=True),
vmin=-1,
vmax=2,
)
# axes.set_title(f'{utility_names[u]}')
axes.set_xlabel(f"{f1} [h]")
axes.set_ylabel(f"{f2} [£]")
cbar = fig.colorbar(c_plot, ax=axes, ticks=[-1, 0, 1, 2])
cbar.set_ticklabels([0.1, 1, 10, 100])
cbar.ax.set_ylabel("VoT [£/h]")
cbar.ax.set_ylim([-1, 2])
# plt.tight_layout()
if save_figure:
plt.savefig("Figures/RUMBoost/LPMC/VoT_{}.png".format(utility_names[u]))
plt.show()
[docs]
def plot_pop_VoT(data_train, util_collection, attribute_VoT, save_figure=False):
"""
Plot the Value of Time for the given observations.
Parameters
----------
data_train : pd.DataFrame
The training dataset.
util_collection : dict
A dictionary containing the utility function (spline or tree) to use for all features in all utilities where the VoT is computed. it follows this structure {utility: {feature: tree/spline function}}
attribute_VoT : dict
A dictionary with keys being the utility number (as string) and values being a tuple of the attributes to compute the VoT on.
The structure follows this form: {utility: (attribute1, attribute2)}
save_figure : bool, optional (default = False)
If True, save the plot as a png file.
"""
tex_fonts = {
# Use LaTeX to write all text
# "text.usetex": True,
# "font.family": "serif",
# "font.serif": "Computer Modern Roman",
# Use 14pt font in plots, to match 10pt font in document
"axes.labelsize": 7,
"axes.linewidth": 0.5,
"axes.labelpad": 1,
"font.size": 7,
# Make the legend/label fonts a little smaller
"legend.fontsize": 6,
"legend.fancybox": False,
"legend.edgecolor": "inherit",
"legend.borderaxespad": 0.4,
"legend.borderpad": 0.4,
"xtick.labelsize": 6,
"ytick.labelsize": 6,
"xtick.major.pad": 0.5,
"ytick.major.pad": 0.5,
"grid.linewidth": 0.5,
"lines.linewidth": 0.8,
}
sns.set_theme(font_scale=1, rc=tex_fonts)
# sns.set_context(tex_fonts)
sns.set_style("whitegrid")
# plt.rcParams.update({
# # "text.usetex": True,
# "font.family": "serif"
# #"font.sans-serif": "Computer Modern Roman",
# })
for u in attribute_VoT:
f1, f2 = attribute_VoT[u]
d_f1 = util_collection[u][f1].derivative()
d_f2 = util_collection[u][f2].derivative()
VoT_pop = d_f1(data_train[f1]) / d_f2(data_train[f2])
filtered_VoT_pop = VoT_pop[~np.isnan(VoT_pop)]
limited_VoT_pop = filtered_VoT_pop[
(filtered_VoT_pop > 0)
& (filtered_VoT_pop < np.quantile(filtered_VoT_pop, 0.99))
]
# fig, axes = plt.subplots(figsize=(10,8), layout='constrained')
plt.figure(figsize=(3.49, 2.09), dpi=1000)
sns.histplot(limited_VoT_pop, color="b", alpha=0.5, kde=True, bins=50)
plt.xlabel("VoT [£/h]")
plt.tight_layout()
plt.show()
if save_figure:
plt.savefig("Figures/RUMBoost/SwissMetro/pop_VoT_{}.png".format(u))
[docs]
def plot_ind_spec_constant(socec_model, dataset_train, alternatives: list[str]):
"""
Plot a histogram of all alternatives individual specific constant of a functional effect model.
Parameters
----------
socec_model:
The part of the functional effect model with full interactions of socio-economic characteristics.
dataset_train:
The dataset used to train the model. It must be a lightGBM Dataset object.
alternatives: list[str]
The list of alternatives name.
"""
tex_fonts = {
# Use LaTeX to write all text
# "text.usetex": True,
# "font.family": "serif",
# "font.serif": "Computer Modern Roman",
# Use 14pt font in plots, to match 10pt font in document
"axes.labelsize": 7,
"axes.linewidth": 0.5,
"axes.labelpad": 1,
"font.size": 7,
# Make the legend/label fonts a little smaller
"legend.fontsize": 6,
"legend.fancybox": False,
"legend.edgecolor": "inherit",
"legend.borderaxespad": 0.4,
"legend.borderpad": 0.4,
"xtick.labelsize": 6,
"ytick.labelsize": 6,
"xtick.major.pad": 0.5,
"ytick.major.pad": 0.5,
"grid.linewidth": 0.5,
"lines.linewidth": 0.8,
}
sns.set_theme(font_scale=1, rc=tex_fonts)
# sns.set_context(tex_fonts)
sns.set_style("whitegrid")
# plt.rcParams.update({
# # "text.usetex": True,
# "font.family": "serif"
# #"font.sans-serif": "Computer Modern Roman",
# })
ind_spec_constants = socec_model.predict(dataset_train, utilities=True)
bins = np.histogram(ind_spec_constants, bins=50)[1]
sns.set_theme()
f, axes = plt.subplots(2, 2, figsize=(12, 10), tight_layout=True)
colors = ["b", "r", "g", "orange"]
for i, axs in enumerate(axes.flatten()):
sns.histplot(
ind_spec_constants[:, i],
bins=bins,
alpha=0.5,
ax=axs,
kde=True,
color=colors[i],
)
axs.set_title(f"{alternatives[i]}")
# Defining custom 'xlim' and 'ylim' values.
xlim = (-3.5, 3.5)
ylim = (0, 5250)
# Setting the values for all axes.
plt.setp(axes, xlim=xlim, ylim=ylim)
plt.show()
[docs]
def plot_bootstrap(models: list, dataset: pd.DataFrame, features: dict[list[str]]):
"""
Plot the bootstrap sampling.
Parameters
----------
models: list
A list containing all the trained mdoels of the bootstrap sampling
dataset: pd.DataFrame
The full dataset used for training
features: dict[list[str]]
A dictionary of lists of strings contaning the number of alternatives, and the features for that alternative,
e.g. {'0':['feature_1', ...], '1': [], ...]
"""
tex_fonts = {
# Use LaTeX to write all text
# "text.usetex": True,
# "font.family": "serif",
# "font.serif": "Computer Modern Roman",
# Use 14pt font in plots, to match 10pt font in document
"axes.labelsize": 7,
"axes.linewidth": 0.5,
"axes.labelpad": 1,
"font.size": 7,
# Make the legend/label fonts a little smaller
"legend.fontsize": 6,
"legend.fancybox": False,
"legend.edgecolor": "inherit",
"legend.borderaxespad": 0.4,
"legend.borderpad": 0.4,
"xtick.labelsize": 6,
"ytick.labelsize": 6,
"xtick.major.pad": 0.5,
"ytick.major.pad": 0.5,
"grid.linewidth": 0.5,
"lines.linewidth": 0.8,
}
sns.set_theme(font_scale=1, rc=tex_fonts)
sns.set_style("whitegrid")
# plt.rcParams.update({
# # "text.usetex": True,
# "font.family": "serif"
# #"font.sans-serif": "Computer Modern Roman",
# })
ufs_dict = {}
for u in features:
ufs_dict[u] = {}
for f in features[u]:
ufs_dict[u][f] = {
"xplot": np.linspace(0, dataset[f].max(), 1000),
"yarr": np.array([]),
"yav": [],
}
yi = []
for model in models:
vals = weights_to_plot_v2(model)
_, y = non_lin_function(vals[u][f], 0, dataset[f].max(), 1000)
yi.append([yii - y[0] for yii in y])
ufs_dict[u][f]["yarr"] = np.array(yi)
ufs_dict[u][f]["yav"] = ufs_dict[u][f]["yarr"].mean(axis=0)
g = sns.JointGrid(xlim=(0, np.max(dataset[f])), height=3.89)
g.figure.set_dpi(1000)
x, y = ufs_dict[u][f]["xplot"], ufs_dict[u][f]["yav"]
sns.lineplot(
x=x, y=y, ax=g.ax_joint, color="orange", linewidth=1, label="Average"
)
sns.histplot(
x=dataset[f], ax=g.ax_marg_x, bins=100, color="orange", alpha=0.5
)
for i in range(len(models)):
sns.lineplot(
x=x,
y=ufs_dict[u][f]["yarr"][i, :].T,
color="orange",
alpha=0.1,
ax=g.ax_joint,
linewidth=0.5,
)
g.ax_joint.set(xlabel=f"{f}", ylabel="Utility")
[docs]
def compute_VoT(util_collection, u, f1, f2):
"""
The function compute the Value of Time of the attributes specified in attribute_VoT.
Parameters
----------
util_collection : dict
A dictionary containing the type of utility to use for all features in all utilities.
u : str
The utility number, as a str (e.g. '0', '1', ...).
f1 : str
The time related attribtue name.
f2 : str
The cost related attribtue name.
Return
------
VoT : lamda function
The function calculating value of time for attribute1 and attribute2.
"""
VoT = lambda x1, x2, u1=util_collection[u][f1], u2=util_collection[u][
f2
]: u1.derivative()(x1) / u2.derivative()(x2)
return VoT
[docs]
def create_name(features):
"""Create new feature names from a list of feature names"""
new_name = features[0]
for f_name in features[1:]:
new_name += "-" + f_name
return new_name
[docs]
def get_child(
model,
weights,
weights_2d,
weights_market,
tree,
split_points,
features,
feature_names,
i,
market_segm,
direction=None,
):
"""Dig into the tree to get splitting points, features, left and right leaves values"""
min_r = 0
max_r = 10000
if feature_names[tree["split_feature"]] not in features:
features.append(feature_names[tree["split_feature"]])
split_points.append(tree["threshold"])
if "leaf_value" in tree["left_child"] and "leaf_value" in tree["right_child"]:
if direction is None:
weights.append(
[
feature_names[tree["split_feature"]],
tree["threshold"],
tree["left_child"]["leaf_value"],
tree["right_child"]["leaf_value"],
i,
]
)
elif direction == "left":
if len(features) == 1:
weights.append(
[
feature_names[tree["split_feature"]],
tree["threshold"],
tree["left_child"]["leaf_value"],
tree["right_child"]["leaf_value"],
i,
]
)
weights.append(
[
feature_names[tree["split_feature"]],
split_points[0],
0,
-tree["right_child"]["leaf_value"],
i,
]
)
elif market_segm:
feature_name = create_name(features)
if features[0] in model.rum_structure[i]["categorical_feature"]:
weights_market.append(
[
features[-1] + "-0",
tree["threshold"],
tree["left_child"]["leaf_value"],
tree["right_child"]["leaf_value"],
i,
]
)
else:
weights_market.append(
[
features[0] + "-0",
split_points[0],
tree["left_child"]["leaf_value"],
0,
i,
]
)
weights_market.append(
[
features[0] + "-1",
split_points[0],
tree["right_child"]["leaf_value"],
0,
i,
]
)
else:
feature_name = create_name(features)
weights_2d.append(
[
feature_name,
(min_r, split_points[0]),
(min_r, tree["threshold"]),
tree["left_child"]["leaf_value"],
i,
]
)
weights_2d.append(
[
feature_name,
(min_r, split_points[0]),
(tree["threshold"], max_r),
tree["right_child"]["leaf_value"],
i,
]
)
if len(features) > 1:
features.pop(-1)
split_points.pop(-1)
elif direction == "right":
if len(features) == 1:
weights.append(
[
feature_names[tree["split_feature"]],
tree["threshold"],
tree["left_child"]["leaf_value"],
tree["right_child"]["leaf_value"],
i,
]
)
weights.append(
[
feature_names[tree["split_feature"]],
split_points[0],
-tree["left_child"]["leaf_value"],
0,
i,
]
)
elif market_segm:
feature_name = create_name(features)
if features[0] in model.rum_structure[i]["categorical_feature"]:
weights_market.append(
[
features[-1] + "-1",
tree["threshold"],
tree["left_child"]["leaf_value"],
tree["right_child"]["leaf_value"],
i,
]
)
else:
weights_market.append(
[
features[0] + "-0",
split_points[0],
0,
tree["left_child"]["leaf_value"],
i,
]
)
weights_market.append(
[
features[0] + "-1",
split_points[0],
0,
tree["right_child"]["leaf_value"],
i,
]
)
else:
feature_name = create_name(features)
weights_2d.append(
[
feature_name,
(split_points[0], max_r),
(min_r, tree["threshold"]),
tree["left_child"]["leaf_value"],
i,
]
)
weights_2d.append(
[
feature_name,
(split_points[0], max_r),
(tree["threshold"], max_r),
tree["right_child"]["leaf_value"],
i,
]
)
elif "leaf_value" in tree["left_child"]:
weights.append(
[
feature_names[tree["split_feature"]],
tree["threshold"],
tree["left_child"]["leaf_value"],
0,
i,
]
)
get_child(
model,
weights,
weights_2d,
weights_market,
tree["right_child"],
split_points,
features,
feature_names,
i,
market_segm,
direction="right",
)
elif "leaf_value" in tree["right_child"]:
weights.append(
[
feature_names[tree["split_feature"]],
tree["threshold"],
0,
tree["right_child"]["leaf_value"],
i,
]
)
get_child(
model,
weights,
weights_2d,
weights_market,
tree["left_child"],
split_points,
features,
feature_names,
i,
market_segm,
direction="left",
)
else:
get_child(
model,
weights,
weights_2d,
weights_market,
tree["left_child"],
split_points,
features,
feature_names,
i,
market_segm,
direction="left",
)
get_child(
model,
weights,
weights_2d,
weights_market,
tree["right_child"],
split_points,
features,
feature_names,
i,
market_segm,
direction="right",
)
[docs]
def get_weights(model, num_iteration=None):
"""
Get leaf values from a RUMBoost model.
Parameters
----------
model : RUMBoost
A trained RUMBoost object.
num_iteration : int, optional (default = None)
The number of iterations to consider in the model.
Returns
-------
weights_df : pandas DataFrame
DataFrame containing all split points and their corresponding left and right leaves value,
for all features.
weights_2d_df : pandas DataFrame
Dataframe with weights arranged for a 2d plot, used in the case of 2d feature interaction.
weights_market : pandas DataFrame
Dataframe with weights arranged for market segmentation, used in the case of market segmentation.
"""
# using self object or a given model
model_json = model.dump_model(num_iteration=num_iteration)
weights = []
weights_2d = []
weights_market = []
for i, b in enumerate(model_json):
feature_names = b["feature_names"]
for trees in b["tree_info"]:
features = []
split_points = []
market_segm = False
# skipping empty trees
if "split_feature" not in trees["tree_structure"]:
continue
get_child(
model,
weights,
weights_2d,
weights_market,
trees["tree_structure"],
split_points,
features,
feature_names,
i,
market_segm,
)
weights_df = pd.DataFrame(
weights,
columns=[
"Feature",
"Split point",
"Left leaf value",
"Right leaf value",
"Utility",
],
)
weights_2d_df = pd.DataFrame(
weights_2d,
columns=[
"Feature",
"higher_lvl_range",
"lower_lvl_range",
"area_value",
"Utility",
],
)
weights_market_df = pd.DataFrame(
weights_market,
columns=[
"Feature",
"Cat value",
"Split point",
"Left leaf value",
"Right leaf value",
"Utility",
],
)
return weights_df, weights_2d_df, weights_market_df
[docs]
def weights_to_plot_v2(model, market_segm=False, num_iteration=None):
"""
Arrange weights by ascending splitting points and cumulative sum of weights.
Parameters
----------
model : RUMBoost
A trained RUMBoost object.
market_segm : bool, optional (default = False)
If True, the weights are arranged for market segmentation.
num_iteration : int, optional (default = None)
The number of iterations to consider in the model.
Returns
-------
weights_for_plot : dict
Dictionary containing splitting points and corresponding cumulative weights value for all features.
"""
# get raw weights
if market_segm:
_, _, weights = get_weights(model, num_iteration=num_iteration)
else:
weights, _, _ = get_weights(model, num_iteration=num_iteration)
weights_for_plot = {}
# for all features
for i in weights.Utility.unique():
weights_for_plot[str(i)] = {}
for f in weights[weights.Utility == i].Feature.unique():
split_points = []
function_value = [0]
# getting values related to the corresponding utility
weights_util = weights[weights.Utility == i]
# sort by ascending order
feature_data = weights_util[weights_util.Feature == f]
ordered_data = feature_data.sort_values(
by=["Split point"], ignore_index=True
)
for j, s in enumerate(ordered_data["Split point"]):
# new split point
if s not in split_points:
split_points.append(s)
# add a new right leaf value to the current right side value
function_value.append(
function_value[-1]
+ float(ordered_data.loc[j, "Right leaf value"])
)
# add left leaf value to all other current left leaf values
function_value[:-1] = [
h + float(ordered_data.loc[j, "Left leaf value"])
for h in function_value[:-1]
]
else:
# add right leaf value to the current right side value
function_value[-1] += float(ordered_data.loc[j, "Right leaf value"])
# add left leaf value to all other current left leaf values
function_value[:-1] = [
h + float(ordered_data.loc[j, "Left leaf value"])
for h in function_value[:-1]
]
for s in split_points:
if "||" in str(s):
numbers = s.split("||")
split_points[split_points.index(s)] = str(
np.mean([float(i) for i in numbers])
)
weights_for_plot[str(i)][f] = {
"Splitting points": split_points,
"Histogram values": function_value,
}
return weights_for_plot
[docs]
def non_lin_function(
weights_ordered, x_min, x_max, num_points, boosted_from_parameter_space=False
):
"""
Create the nonlinear function for parameters, from weights ordered by ascending splitting points.
Parameters
----------
weights_ordered : dict
Dictionary containing splitting points and corresponding cumulative weights value for a specific
feature's parameter.
x_min : float, int
Minimum x value for which the nonlinear function is computed.
x_max : float, int
Maximum x value for which the nonlinear function is computed.
num_points : int
Number of points used to draw the nonlinear function line.
boosted_from_parameter_space : bool, optional (default = False)
Set to True if the weights are from the parameter space.
It means that the weights are betas, and not piece-wise continuous utilities.
Returns
-------
x_values : list
X values for which the function will be plotted.
nonlin_function : list
Values of the function at the corresponding x points.
"""
# create x points
x_values = np.linspace(x_min, x_max, num_points)
nonlin_function = []
i = 0
max_i = len(weights_ordered["Splitting points"]) # all splitting points
if boosted_from_parameter_space:
start_point = x_min * float(
weights_ordered["Histogram values"][0]
) # for continuity in the piece-wise linear function, first value
x_pad = x_min # padding for accounting from previous intervals
# handling no split points
if max_i == 0:
return x_values, float(weights_ordered["Histogram values"][i])
for x in x_values:
if boosted_from_parameter_space:
if i == max_i: # last interval
nonlin_function += [
start_point
+ float(weights_ordered["Histogram values"][i]) * (x - x_pad)
] # a + bx
elif x < float(
weights_ordered["Splitting points"][i]
): # up to last interval
nonlin_function += [
start_point
+ float(weights_ordered["Histogram values"][i]) * (x - x_pad)
] # a + bx
else:
x_pad = float(weights_ordered["Splitting points"][i])
start_point = nonlin_function[-1] # update new intercept
nonlin_function += [
start_point
+ float(weights_ordered["Histogram values"][i + 1]) * (x - x_pad)
] # a + bx
# go to next splitting points
if i <= max_i - 1:
i += 1
else:
# compute the value of the function at x according to the weights value in between splitting points
if x < float(weights_ordered["Splitting points"][i]):
nonlin_function += [float(weights_ordered["Histogram values"][i])]
else:
nonlin_function += [float(weights_ordered["Histogram values"][i + 1])]
# go to next splitting points
if i < max_i - 1:
i += 1
return x_values, nonlin_function
[docs]
def get_asc(
weights,
alt_to_normalise="Driving",
alternatives={
"Walking": "0",
"Cycling": "1",
"Public Transport": "2",
"Driving": "3",
},
):
"""Retrieve ASCs from a dictionary of all values from a dictionary of leaves values per alternative per feature"""
ASCs = []
for k, alt in alternatives.items():
asc_temp = 0
for feat in weights[alt]:
asc_temp += weights[alt][feat]["Histogram values"][0]
ASCs.append(asc_temp)
return [a - ASCs[int(alternatives[alt_to_normalise])] for a in ASCs]
[docs]
def function_2d(weights_2d, x_vect, y_vect):
"""
Create the nonlinear contour plot for parameters, from weights gathered in getweights_v2
Parameters
----------
weights_2d : dict
Pandas DataFrame containing all possible rectangles with their corresponding area values, for the given feature and utility.
x_vect : numpy array
Vector of higher level feature.
y_vect : numpy array
Vector of lower level feature.
Returns
-------
contour_plot_values : numpy array
Array with values at (x,y) points.
"""
contour_plot_values = np.zeros(shape=(len(x_vect), len(y_vect)))
for k in range(len(weights_2d.index)):
if (weights_2d["lower_lvl_range"].iloc[k][1] == 10000) and (
weights_2d["higher_lvl_range"].iloc[k][1] == 10000
):
i_x = np.searchsorted(x_vect, weights_2d["higher_lvl_range"].iloc[k][0])
i_y = np.searchsorted(y_vect, weights_2d["lower_lvl_range"].iloc[k][0])
contour_plot_values[i_x:, i_y:] += weights_2d["area_value"].iloc[k]
elif weights_2d["lower_lvl_range"].iloc[k][1] == 10000:
i_x = np.searchsorted(x_vect, weights_2d["higher_lvl_range"].iloc[k][1])
i_y = np.searchsorted(y_vect, weights_2d["lower_lvl_range"].iloc[k][0])
contour_plot_values[:i_x, i_y:] += weights_2d["area_value"].iloc[k]
elif weights_2d["higher_lvl_range"].iloc[k][1] == 10000:
i_x = np.searchsorted(x_vect, weights_2d["higher_lvl_range"].iloc[k][0])
i_y = np.searchsorted(y_vect, weights_2d["lower_lvl_range"].iloc[k][1])
contour_plot_values[i_x:, :i_y] += weights_2d["area_value"].iloc[k]
else:
i_x = np.searchsorted(x_vect, weights_2d["higher_lvl_range"].iloc[k][1])
i_y = np.searchsorted(y_vect, weights_2d["lower_lvl_range"].iloc[k][1])
contour_plot_values[:i_x, :i_y] += weights_2d["area_value"].iloc[k]
return contour_plot_values