"""Create reports for validation tool"""
import plotly.figure_factory as ff
import base64
from typing import Union
import xarray as xr
from dataclasses import dataclass
from datetime import datetime
import webbrowser
import os
import numpy as np
from scipy import stats
import pandas as pd
from jinja2 import Environment, PackageLoader
from wind_validation import __version__ as pkg_version
from wind_validation.reporting import plots
import tempfile
from windkit.spatial import reproject
import plotly.graph_objects as go
from plotly.colors import DEFAULT_PLOTLY_COLORS
import warnings
import math

_JINJA_ENV = Environment(loader=PackageLoader("wind_validation", "reporting/templates"))
_SINGLE_POINT_TEMPLATE = "single_point.html"
_AGGREGATED_TEMPLATE = "aggregated.html"
_COMPARISON_TEMPLATE = "comparison.html"
_HTML_STYLE = "style.css"

[docs]def create_report( validation_results: "Union[list[xr.Dataset], xr.Dataset]", **kwargs, ) -> None: """Create HTML report from validation results comparing model with data. It is also possible to compare multiple model on the same dataset to inspect the performance of different techniques against observations. Parameters ---------- validation_results : xarray.Dataset | list[xr.Dataset] Validation result produced by main "validate" function. The results are compatible with this reporting function. If comparing multiple models a list of datasets can be passed with the same structure as described before. obs_name : str | list[str], optional Observations site name will be used in the general description of a report. This can be string when comparing single model to single observation dataset however when making comparison the list of names is required because otherwise there is no way to make a distinction between models. mod_name : str | list[str], optional Model name will be used in the general description of a report. This can be string when comparing single model to single observation dataset however when making comparison the list of names is required because otherwise there is no way to make a distinction between models. dest : str, optional Path of the html file to write the report to. If not provided html file is saved in tmp directory. By default None author : str, optional Author of the generated report to be stated in the description. By default "" show : bool, optional If True - tries to open the report in the browser right away. By default False Raises ------ RuntimeError If destination folder doesn't exist the generation will fail. If results type doesn't match the documented ones. If model and observation names are not provided for comparison report. """ html = None if type(validation_results) == xr.Dataset: # Old logic for distinguishing agg from single point/site, leaving it for # future reference if the discussion comes up again :) # if ( # _SITE_IDENTIFIER not in validation_results.dims # or validation_results.dims[_SITE_IDENTIFIER] == 1 # ): if ( _POINT_IDENTIFIER in validation_results.dims and validation_results.dims[_POINT_IDENTIFIER] > 1 ): # for plotting purpose we always want the dataset to be in lat/long validation_results = reproject(validation_results, "EPSG:4326") # single site (multiple points) or points from various sites vs a model html = __aggregated_report(validation_results, **kwargs) else: # single point vs a model html = __single_point_report(validation_results, **kwargs) elif type(validation_results) == list: for i in range(len(validation_results)): if type(validation_results[i]) != xr.Dataset: raise RuntimeError( "Validation result must be xarray.Dataset type." "One of the provided results does not meet the criterion" ) validation_results[i] = reproject(validation_results[i], "EPSG:4326") # useful for multiple models comparison on same sites data html = __comparison_report(validation_results, **kwargs) else: raise RuntimeError( "Can't create a report from this type of data!" "Available options: list[xr.Dataset], xr.Dataset" ) dest = kwargs.get("dest", None) if dest is None: reportf = tempfile.NamedTemporaryFile(suffix=".html", delete=False) dest = else: try: reportf = open(dest, "wb") except: # pragma: no cover raise RuntimeError("Provided directory doesn't exist...") reportf.write(html.encode("utf-8")) print(f"HTML report saved in: {dest}") show = kwargs.get("show", True) if not show: # pragma: no cover return success =, new=2) if not success: print("Couldn't open a browser window!")
@dataclass class MetaData: """Package and report metadata""" date: datetime user: str version: str @dataclass class ValidationResults: """Validation results class includes the observation title, model title and results from validation comparing those two""" obs: str mod: str site_results: pd.DataFrame format: str def __preprocess_args(validation_results: xr.Dataset, **kwargs): if "format" not in validation_results.attrs: raise RuntimeError("Validation results are missing format attribute") if len(validation_results.dims) == 0: validation_results = validation_results.expand_dims("point") res_format = validation_results.attrs["format"] author = kwargs.get("author", "") site_name = kwargs.get("obs_name", "Observations") model_name = kwargs.get("mod_name", "Model") # if meaningfull names are available, use these, otherwise use # the enumerated point dimension (default) try: validation_results = ( validation_results.to_dataframe() .reset_index() .set_index([_SITE_IDENTIFIER]) ) except: validation_results = validation_results.to_dataframe() meta = MetaData(, author, pkg_version) results = ValidationResults(site_name, model_name, validation_results, res_format) return (meta, results) def __aggregate(results: ValidationResults): try: agg_vars = results.site_results[ [ "obs_wind_speed_mean", "mod_wind_speed_mean", "wind_speed_me", "wind_speed_mpe", "wind_speed_mae", "wind_speed_mape", "obs_power_density_mean", "mod_power_density_mean", "power_density_me", "power_density_mpe", "power_density_mae", "power_density_mape", ] ] except KeyError as e: raise RuntimeError( f"Validation result is missing some" f" assumed keys to create a report:\n{e}" ) agg_res = agg_vars.aggregate(np.mean) return agg_res def __aggregated_report( validation_results: xr.Dataset, **kwargs, ): meta, results = __preprocess_args(validation_results, **kwargs) agg_results = __aggregate(results) css_content = f"<style>{_JINJA_ENV.get_template(_HTML_STYLE).render()}</style>" mapes = plots._individual_mast_maep( plots.PlotResult( results.site_results["wind_speed_mpe"], "Wind speed error", "%" ) ) for i, fig in enumerate(mapes): mapes[i] = plots._fig_to_html(fig) # TODO should raise error for singple point power_hist = plots._compare_distributions( plots.PlotResult(results.site_results, "Power density error", "%"), ["power_density_mpe"], [f"{results.obs} vs {results.mod}"], ) ws_hist = plots._compare_distributions( plots.PlotResult(results.site_results, "Wind speed error", "%"), ["wind_speed_mpe"], [f"{results.obs} vs {results.mod}"], ) template_html = _JINJA_ENV.get_template(_AGGREGATED_TEMPLATE) rendered_html = template_html.render( title=f"Wind-Validation Aggregated {results.format.title()} Report", md_css=css_content, obs=results.obs, mod=results.mod, meta=meta, agg_results=agg_results, site_results=results.site_results, mae_hist=plots._fig_to_html(ws_hist), power_mape_hist=plots._fig_to_html(power_hist), mapes=mapes, iterrow=list(results.site_results.iterrows()), logos=__generate_bin_logos(), ) return rendered_html def __comparison_report(validation_results: "list[xr.Dataset]", **kwargs): # parse names if "obs_name" not in kwargs or "mod_name" not in kwargs: raise RuntimeError( "Comparison report requires list of model and observation names!" ) if type(kwargs["obs_name"]) != list and type(kwargs["mod_name"]) != list: raise RuntimeError( "Comparison report requires list of model and observation names!" ) models = kwargs["mod_name"] if all([m == models[0] for m in models[1:]]): warnings.warn( "All the model names are the same, you are either passing " "observation names instead of models or want to compare " "different observations with the same model which" "doesn't make sense because (in some plots) the comparison " "is done side by side on each point." ) observations = kwargs["obs_name"] points_num = validation_results[0].dims["point"] if not all([x.dims["point"] == points_num for x in validation_results]): raise RuntimeError("Cannot compare non-equally sized validation results.") # TODO: fix all the ugly plotting stuff, isolate it nbins = 30 slice_by = 35 # split individual points comparison into multiple figs aggregated_results = [] tabledf = pd.DataFrame() mapdf = pd.DataFrame() hist_plots = {"wind_speed": go.Figure(), "power_density": go.Figure()} for i, val_res in enumerate(validation_results): meta, results = __preprocess_args( val_res, **kwargs # val_res.drop_vars(_SITE_IDENTIFIER, errors="ignore") ) agg_res = __aggregate(results) aggregated_results.append(agg_res) number_of_sites = len(results.site_results.index.unique()) number_of_points = len(results.site_results.index) n_fig = math.ceil(number_of_sites / slice_by) if i == 0: wind_speed_mpe_bars = [go.Figure() for x in range(n_fig)] # loop over two variables that we want to plot for v in hist_plots.keys(): data = results.site_results[f"{v}_mpe"] mean = data.mean() stddev = data.std() xrange = np.linspace(data.min(), data.max(), nbins) hist_plots[v].add_trace( go.Histogram( x=results.site_results[f"{v}_mpe"], nbinsx=nbins, histnorm="probability density", marker=dict(color=DEFAULT_PLOTLY_COLORS[i]), name=models[i] + " vs " + observations[i], ) ) hist_plots[v].add_trace( go.Scatter( x=xrange, y=stats.norm(loc=mean, scale=stddev).pdf(xrange), mode="lines", line=dict(color=DEFAULT_PLOTLY_COLORS[i], width=1.5), name=f"Normal dist. ({models[i]})", ) ) for k in range(n_fig): lower = k * slice_by upper = lower + slice_by if number_of_sites / number_of_points == 1: sliced = results.site_results[lower:upper] wind_speed_mpe_bars[k].add_trace( go.Bar( name=models[i] + " vs " + observations[i], x=sliced.index.values, y=sliced.wind_speed_mpe, ) ) else: sliced = results.site_results.loc[ results.site_results.index.unique()[lower:upper] ] wind_speed_mpe_bars[k].add_trace( go.Box( name=models[i] + " vs " + observations[i], x=sliced.index.values, y=sliced.wind_speed_mpe, ) ) # required because the following code requires merging by unique point # the above code indexing by name to be able to plot boxplots meta, results = __preprocess_args( val_res.drop_vars(_SITE_IDENTIFIER, errors="ignore"), **kwargs # val_res ) suffixed_df = results.site_results[ # form detailed table ["wind_speed_mpe", "power_density_mpe"] ] suffixed_df = suffixed_df.add_suffix(f"_model_{models[i]}") tabledf = pd.concat( [tabledf, suffixed_df], axis=1, ) results.site_results["model"] = models[i] mapdf = pd.concat([mapdf, results.site_results]) # pretty column names for table tabledf.rename( columns=lambda x: " ".join( [y.capitalize() if y != "ME" else y.upper() for y in x.split("_")] ), inplace=True, ) tabledf = tabledf.reindex(sorted(tabledf.columns), axis=1) tabledf.index.names = ["Point/Site"] tabledf.reset_index(inplace=True) tabledf = tabledf.round(1) pd_html_table = tabledf.to_html( index=False, classes=["sortable", "center"], border=0 ) table_caption = f"""<caption> Table 2: detailed all-sector metrics for each of the {number_of_points} points. </caption>""" pd_html_table = pd_html_table.replace("<thead>", table_caption + "<thead>") for i in range(n_fig): wind_speed_mpe_bars[i].update_layout( # TODO: dynamic name? xaxis_title="Points", yaxis_title="Wind Speed MPE" ) wind_speed_mpe_bars[i].update_layout(barmode="group", legend=dict(x=0, y=1)) wind_speed_mpe_bars[i] = plots._fig_to_html(wind_speed_mpe_bars[i]) hist_plots["wind_speed"].update_layout( xaxis_title="Mean error in wind speed (%)", yaxis_title="Probability density", legend=dict(x=0, y=1), ) hist_plots["power_density"].update_layout( xaxis_title="Mean error in power density (%)", yaxis_title="Probability density", legend=dict(x=0, y=1), ) template_html = _JINJA_ENV.get_template(_COMPARISON_TEMPLATE) rendered_html = template_html.render( title=f"Wind-Validation Comparison {results.format.title()} Report", md_css=f"<style>{_JINJA_ENV.get_template(_HTML_STYLE).render()}</style>", obs=observations, mod=models, point_count=number_of_points, mast_count=number_of_sites, # zip is limited by iterating only once, thus is wrapper in list mod_obs_agg=list(zip(models, observations, aggregated_results)), meta=meta, wind_speed_mpe_hist=plots._fig_to_html(hist_plots["wind_speed"]), power_density_mae_hist=plots._fig_to_html(hist_plots["power_density"]), wind_speed_mpe_bars=wind_speed_mpe_bars, pandas_table=pd_html_table, map_points=list(mapdf.iterrows()), logos=__generate_bin_logos(), ) return rendered_html def __single_point_report(validation_results: xr.Dataset, **kwargs): meta, results = __preprocess_args(validation_results, **kwargs) css_content = f"<style>{_JINJA_ENV.get_template(_HTML_STYLE).render()}</style>" template_html = _JINJA_ENV.get_template(_SINGLE_POINT_TEMPLATE) newdf = pd.DataFrame() newdf.index.names = ["Variable"] newdf.assign(Variable=results.site_results.columns) newdf = newdf.assign(Value=results.site_results.iloc[0]) newdf.reset_index(inplace=True) newdf = newdf.to_html(index=False, classes=["center"], border=0) table_caption = f"""<caption> Table 1: Detailed site/point metrics & stats. Wind-validation (2021). </caption>""" newdf = newdf.replace("<thead>", table_caption + "<thead>") return template_html.render( title=f"Wind-Validation Aggregated {results.format.title()} Report", md_css=css_content, obs=results.obs, mod=results.mod, meta=meta, iterrow=list(results.site_results.iterrows()), logos=__generate_bin_logos(), table=newdf, ) def __generate_bin_logos(): def _encode_img(path): encoded_string = "" with open(path, "rb") as image_file: encoded_string = base64.b64encode( encoded_string = encoded_string.decode() return encoded_string dtu_logo = _encode_img( os.path.dirname(os.path.realpath(__file__)) + "/templates/static/Corp_Red_RGB.png" ) winsider_logo = _encode_img( os.path.dirname(os.path.realpath(__file__)) + "/templates/static/Windsider_BIG_Logo_Color.png" ) return (dtu_logo, winsider_logo)