"""Create reports for validation tool"""
import plotly.figure_factory as ff
import base64
from typing import Union
import xarray as xr
from dataclasses import dataclass
from datetime import datetime
import webbrowser
import os
import numpy as np
from scipy import stats
import pandas as pd
from jinja2 import Environment, PackageLoader
from wind_validation import __version__ as pkg_version
from wind_validation.reporting import plots
import tempfile
from windkit.spatial import reproject
import plotly.graph_objects as go
from plotly.colors import DEFAULT_PLOTLY_COLORS
import warnings
import math
_JINJA_ENV = Environment(loader=PackageLoader("wind_validation", "reporting/templates"))
_SINGLE_POINT_TEMPLATE = "single_point.html"
_AGGREGATED_TEMPLATE = "aggregated.html"
_COMPARISON_TEMPLATE = "comparison.html"
_HTML_STYLE = "style.css"
_SITE_IDENTIFIER = "name"
_POINT_IDENTIFIER = "point"
[docs]def create_report(
validation_results: "Union[list[xr.Dataset], xr.Dataset]",
**kwargs,
) -> None:
"""Create HTML report from validation results comparing model with data. It is also possible to compare multiple
model on the same dataset to inspect the performance of different techniques against observations.
Parameters
----------
validation_results : xarray.Dataset | list[xr.Dataset]
Validation result produced by main "validate" function. The results are compatible with this reporting function.
If comparing multiple models a list of datasets can be passed with the same structure as described before.
obs_name : str | list[str], optional
Observations site name will be used in the general description of a report. This can be string when comparing
single model to single observation dataset however when making comparison the list of names is required because
otherwise there is no way to make a distinction between models.
mod_name : str | list[str], optional
Model name will be used in the general description of a report. This can be string when comparing
single model to single observation dataset however when making comparison the list of names is required because
otherwise there is no way to make a distinction between models.
dest : str, optional
Path of the html file to write the report to. If not provided html file is saved in tmp directory.
By default None
author : str, optional
Author of the generated report to be stated in the description.
By default ""
show : bool, optional
If True - tries to open the report in the browser right away.
By default False
Raises
------
RuntimeError
If destination folder doesn't exist the generation will fail.
If results type doesn't match the documented ones.
If model and observation names are not provided for comparison report.
"""
html = None
if type(validation_results) == xr.Dataset:
# Old logic for distinguishing agg from single point/site, leaving it for
# future reference if the discussion comes up again :)
# if (
# _SITE_IDENTIFIER not in validation_results.dims
# or validation_results.dims[_SITE_IDENTIFIER] == 1
# ):
if (
_POINT_IDENTIFIER in validation_results.dims
and validation_results.dims[_POINT_IDENTIFIER] > 1
):
# for plotting purpose we always want the dataset to be in lat/long
validation_results = reproject(validation_results, "EPSG:4326")
# single site (multiple points) or points from various sites vs a model
html = __aggregated_report(validation_results, **kwargs)
else:
# single point vs a model
html = __single_point_report(validation_results, **kwargs)
elif type(validation_results) == list:
for i in range(len(validation_results)):
if type(validation_results[i]) != xr.Dataset:
raise RuntimeError(
"Validation result must be xarray.Dataset type."
"One of the provided results does not meet the criterion"
)
validation_results[i] = reproject(validation_results[i], "EPSG:4326")
# useful for multiple models comparison on same sites data
html = __comparison_report(validation_results, **kwargs)
else:
raise RuntimeError(
"Can't create a report from this type of data!"
"Available options: list[xr.Dataset], xr.Dataset"
)
dest = kwargs.get("dest", None)
if dest is None:
reportf = tempfile.NamedTemporaryFile(suffix=".html", delete=False)
dest = reportf.name
else:
try:
reportf = open(dest, "wb")
except: # pragma: no cover
raise RuntimeError("Provided directory doesn't exist...")
reportf.write(html.encode("utf-8"))
print(f"HTML report saved in: {dest}")
show = kwargs.get("show", True)
if not show: # pragma: no cover
return
success = webbrowser.open(dest, new=2)
if not success:
print("Couldn't open a browser window!")
@dataclass
class MetaData:
"""Package and report metadata"""
date: datetime
user: str
version: str
@dataclass
class ValidationResults:
"""Validation results class includes the observation title,
model title and results from validation comparing those two"""
obs: str
mod: str
site_results: pd.DataFrame
format: str
def __preprocess_args(validation_results: xr.Dataset, **kwargs):
if "format" not in validation_results.attrs:
raise RuntimeError("Validation results are missing format attribute")
if len(validation_results.dims) == 0:
validation_results = validation_results.expand_dims("point")
res_format = validation_results.attrs["format"]
author = kwargs.get("author", "")
site_name = kwargs.get("obs_name", "Observations")
model_name = kwargs.get("mod_name", "Model")
# if meaningfull names are available, use these, otherwise use
# the enumerated point dimension (default)
try:
validation_results = (
validation_results.to_dataframe()
.reset_index()
.set_index([_SITE_IDENTIFIER])
)
except:
validation_results = validation_results.to_dataframe()
meta = MetaData(datetime.now(), author, pkg_version)
results = ValidationResults(site_name, model_name, validation_results, res_format)
return (meta, results)
def __aggregate(results: ValidationResults):
try:
agg_vars = results.site_results[
[
"obs_wind_speed_mean",
"mod_wind_speed_mean",
"wind_speed_me",
"wind_speed_mpe",
"wind_speed_mae",
"wind_speed_mape",
"obs_power_density_mean",
"mod_power_density_mean",
"power_density_me",
"power_density_mpe",
"power_density_mae",
"power_density_mape",
]
]
except KeyError as e:
raise RuntimeError(
f"Validation result is missing some"
f" assumed keys to create a report:\n{e}"
)
agg_res = agg_vars.aggregate(np.mean)
return agg_res
def __aggregated_report(
validation_results: xr.Dataset,
**kwargs,
):
meta, results = __preprocess_args(validation_results, **kwargs)
agg_results = __aggregate(results)
css_content = f"<style>{_JINJA_ENV.get_template(_HTML_STYLE).render()}</style>"
mapes = plots._individual_mast_maep(
plots.PlotResult(
results.site_results["wind_speed_mpe"], "Wind speed error", "%"
)
)
for i, fig in enumerate(mapes):
mapes[i] = plots._fig_to_html(fig)
# TODO should raise error for singple point
power_hist = plots._compare_distributions(
plots.PlotResult(results.site_results, "Power density error", "%"),
["power_density_mpe"],
[f"{results.obs} vs {results.mod}"],
)
ws_hist = plots._compare_distributions(
plots.PlotResult(results.site_results, "Wind speed error", "%"),
["wind_speed_mpe"],
[f"{results.obs} vs {results.mod}"],
)
template_html = _JINJA_ENV.get_template(_AGGREGATED_TEMPLATE)
rendered_html = template_html.render(
title=f"Wind-Validation Aggregated {results.format.title()} Report",
md_css=css_content,
obs=results.obs,
mod=results.mod,
meta=meta,
agg_results=agg_results,
site_results=results.site_results,
mae_hist=plots._fig_to_html(ws_hist),
power_mape_hist=plots._fig_to_html(power_hist),
mapes=mapes,
iterrow=list(results.site_results.iterrows()),
logos=__generate_bin_logos(),
)
return rendered_html
def __comparison_report(validation_results: "list[xr.Dataset]", **kwargs):
# parse names
if "obs_name" not in kwargs or "mod_name" not in kwargs:
raise RuntimeError(
"Comparison report requires list of model and observation names!"
)
if type(kwargs["obs_name"]) != list and type(kwargs["mod_name"]) != list:
raise RuntimeError(
"Comparison report requires list of model and observation names!"
)
models = kwargs["mod_name"]
if all([m == models[0] for m in models[1:]]):
warnings.warn(
"All the model names are the same, you are either passing "
"observation names instead of models or want to compare "
"different observations with the same model which"
"doesn't make sense because (in some plots) the comparison "
"is done side by side on each point."
)
observations = kwargs["obs_name"]
points_num = validation_results[0].dims["point"]
if not all([x.dims["point"] == points_num for x in validation_results]):
raise RuntimeError("Cannot compare non-equally sized validation results.")
# TODO: fix all the ugly plotting stuff, isolate it
nbins = 30
slice_by = 35 # split individual points comparison into multiple figs
aggregated_results = []
tabledf = pd.DataFrame()
mapdf = pd.DataFrame()
hist_plots = {"wind_speed": go.Figure(), "power_density": go.Figure()}
for i, val_res in enumerate(validation_results):
meta, results = __preprocess_args(
val_res, **kwargs # val_res.drop_vars(_SITE_IDENTIFIER, errors="ignore")
)
agg_res = __aggregate(results)
aggregated_results.append(agg_res)
number_of_sites = len(results.site_results.index.unique())
number_of_points = len(results.site_results.index)
n_fig = math.ceil(number_of_sites / slice_by)
if i == 0:
wind_speed_mpe_bars = [go.Figure() for x in range(n_fig)]
# loop over two variables that we want to plot
for v in hist_plots.keys():
data = results.site_results[f"{v}_mpe"]
mean = data.mean()
stddev = data.std()
xrange = np.linspace(data.min(), data.max(), nbins)
hist_plots[v].add_trace(
go.Histogram(
x=results.site_results[f"{v}_mpe"],
nbinsx=nbins,
histnorm="probability density",
marker=dict(color=DEFAULT_PLOTLY_COLORS[i]),
name=models[i] + " vs " + observations[i],
)
)
hist_plots[v].add_trace(
go.Scatter(
x=xrange,
y=stats.norm(loc=mean, scale=stddev).pdf(xrange),
mode="lines",
line=dict(color=DEFAULT_PLOTLY_COLORS[i], width=1.5),
name=f"Normal dist. ({models[i]})",
)
)
for k in range(n_fig):
lower = k * slice_by
upper = lower + slice_by
if number_of_sites / number_of_points == 1:
sliced = results.site_results[lower:upper]
wind_speed_mpe_bars[k].add_trace(
go.Bar(
name=models[i] + " vs " + observations[i],
x=sliced.index.values,
y=sliced.wind_speed_mpe,
)
)
else:
sliced = results.site_results.loc[
results.site_results.index.unique()[lower:upper]
]
wind_speed_mpe_bars[k].add_trace(
go.Box(
name=models[i] + " vs " + observations[i],
x=sliced.index.values,
y=sliced.wind_speed_mpe,
)
)
# required because the following code requires merging by unique point
# the above code indexing by name to be able to plot boxplots
meta, results = __preprocess_args(
val_res.drop_vars(_SITE_IDENTIFIER, errors="ignore"), **kwargs # val_res
)
suffixed_df = results.site_results[ # form detailed table
["wind_speed_mpe", "power_density_mpe"]
]
suffixed_df = suffixed_df.add_suffix(f"_model_{models[i]}")
tabledf = pd.concat(
[tabledf, suffixed_df],
axis=1,
)
results.site_results["model"] = models[i]
mapdf = pd.concat([mapdf, results.site_results])
# pretty column names for table
tabledf.rename(
columns=lambda x: " ".join(
[y.capitalize() if y != "ME" else y.upper() for y in x.split("_")]
),
inplace=True,
)
tabledf = tabledf.reindex(sorted(tabledf.columns), axis=1)
tabledf.index.names = ["Point/Site"]
tabledf.reset_index(inplace=True)
tabledf = tabledf.round(1)
pd_html_table = tabledf.to_html(
index=False, classes=["sortable", "center"], border=0
)
table_caption = f"""<caption>
Table 2: detailed all-sector metrics for each of the {number_of_points} points.
</caption>"""
pd_html_table = pd_html_table.replace("<thead>", table_caption + "<thead>")
for i in range(n_fig):
wind_speed_mpe_bars[i].update_layout( # TODO: dynamic name?
xaxis_title="Points", yaxis_title="Wind Speed MPE"
)
wind_speed_mpe_bars[i].update_layout(barmode="group", legend=dict(x=0, y=1))
wind_speed_mpe_bars[i] = plots._fig_to_html(wind_speed_mpe_bars[i])
hist_plots["wind_speed"].update_layout(
xaxis_title="Mean error in wind speed (%)",
yaxis_title="Probability density",
legend=dict(x=0, y=1),
)
hist_plots["power_density"].update_layout(
xaxis_title="Mean error in power density (%)",
yaxis_title="Probability density",
legend=dict(x=0, y=1),
)
template_html = _JINJA_ENV.get_template(_COMPARISON_TEMPLATE)
rendered_html = template_html.render(
title=f"Wind-Validation Comparison {results.format.title()} Report",
md_css=f"<style>{_JINJA_ENV.get_template(_HTML_STYLE).render()}</style>",
obs=observations,
mod=models,
point_count=number_of_points,
mast_count=number_of_sites,
# zip is limited by iterating only once, thus is wrapper in list
mod_obs_agg=list(zip(models, observations, aggregated_results)),
meta=meta,
wind_speed_mpe_hist=plots._fig_to_html(hist_plots["wind_speed"]),
power_density_mae_hist=plots._fig_to_html(hist_plots["power_density"]),
wind_speed_mpe_bars=wind_speed_mpe_bars,
pandas_table=pd_html_table,
map_points=list(mapdf.iterrows()),
logos=__generate_bin_logos(),
)
return rendered_html
def __single_point_report(validation_results: xr.Dataset, **kwargs):
meta, results = __preprocess_args(validation_results, **kwargs)
css_content = f"<style>{_JINJA_ENV.get_template(_HTML_STYLE).render()}</style>"
template_html = _JINJA_ENV.get_template(_SINGLE_POINT_TEMPLATE)
newdf = pd.DataFrame()
newdf.index.names = ["Variable"]
newdf.assign(Variable=results.site_results.columns)
newdf = newdf.assign(Value=results.site_results.iloc[0])
newdf.reset_index(inplace=True)
newdf = newdf.to_html(index=False, classes=["center"], border=0)
table_caption = f"""<caption>
Table 1: Detailed site/point metrics & stats. Wind-validation (2021).
</caption>"""
newdf = newdf.replace("<thead>", table_caption + "<thead>")
return template_html.render(
title=f"Wind-Validation Aggregated {results.format.title()} Report",
md_css=css_content,
obs=results.obs,
mod=results.mod,
meta=meta,
iterrow=list(results.site_results.iterrows()),
logos=__generate_bin_logos(),
table=newdf,
)
def __generate_bin_logos():
def _encode_img(path):
encoded_string = ""
with open(path, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read())
encoded_string = encoded_string.decode()
return encoded_string
dtu_logo = _encode_img(
os.path.dirname(os.path.realpath(__file__))
+ "/templates/static/Corp_Red_RGB.png"
)
winsider_logo = _encode_img(
os.path.dirname(os.path.realpath(__file__))
+ "/templates/static/Windsider_BIG_Logo_Color.png"
)
return (dtu_logo, winsider_logo)