# Copyright (c) 2020, SAS Institute Inc., Cary, NC, USA. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
from pathlib import Path
from typing import Any, Callable, List, Optional, Tuple, Union
from uuid import UUID
from warnings import warn
from pandas import DataFrame
import json
from .._services.model_repository import ModelRepository as mr
from ..core import PagedList, RestObj, current_session
from ..utils.misc import check_if_jupyter
from .write_score_code import ScoreCode as sc
from .zip_model import ZipModel as zm
from ..tasks import _create_project, _update_properties, _compare_properties
[docs]
def get_model_properties(
target_values: Union[list, str, None] = None,
model_files: Union[str, Path, None] = None,
):
if type(model_files) is dict:
try:
model = json.loads(model_files["ModelProperties.json"])
except (json.JSONDecodeError, TypeError):
model = model_files["ModelProperties.json"]
try:
input_var = json.loads(model_files["inputVar.json"])
except (json.JSONDecodeError, TypeError):
input_var = model_files["inputVar.json"]
try:
output_var = json.loads(model_files["outputVar.json"])
except (json.JSONDecodeError, TypeError):
output_var = model_files["outputVar.json"]
else:
with open(Path(model_files) / "ModelProperties.json") as f:
model = json.load(f)
with open(Path(model_files) / "inputVar.json") as f:
input_var = json.load(f)
with open(Path(model_files) / "outputVar.json") as f:
output_var = json.load(f)
if target_values is not None:
target_string = ", ".join(target_values)
model["classTargetValues"] = target_string
for var in output_var:
var["role"] = "output"
for var in input_var:
var["role"] = "input"
return model, input_var, output_var
[docs]
def project_exists(
project: Union[str, dict, RestObj],
response: Union[str, dict, RestObj, None] = None,
target_values: Union[list, str, None] = None,
model_files: Union[str, Path, None] = None,
overwrite_project_properties: Optional[bool] = False,
) -> RestObj:
"""
Checks if project exists on SAS Viya. If the project does not exist, then a new
project is created or an error is raised.
Parameters
----------
project : str, dict, or RestObj
The name or id of the model project, or a dictionary representation of the
project.
response : str, dict, or RestObj, optional
JSON response of the get_project() call to model repository service.
target_values : list of str, optional
A list of target values for the target variable. This argument and the
score_metrics argument dictate the handling of the predicted values from
the prediction method. The default value is None.
model_files : str, pathlib.Path, or dict
Either the directory location of the model files (string or Path object), or
a dictionary containing the contents of all the model files.
overwrite_project_properties : bool, optional
Set whether the project properties should be overwritten when attempting to
import the model. The default value is False.
Returns
-------
response : RestObj
JSON response of the get_project() call to model repository service.
Raises
------
SystemError
Alerts user that API calls cannot continue until a valid project is provided.
"""
if response is None:
try:
warn(f"No project with the name or UUID {project} was found.")
UUID(project)
raise SystemError(
"The provided UUID does not match any projects found in SAS Model "
"Manager. Please enter a valid UUID or a new name for a project to be "
"created."
)
except ValueError:
repo = mr.default_repository().get("id")
if model_files is not None:
model, input_var, output_var = get_model_properties(
target_values, model_files
)
response = _create_project(project, model, repo, input_var, output_var)
else:
response = mr.create_project(project, repo)
if check_if_jupyter():
print(f"A new project named {response.name} was created.")
return response
else:
model, input_var, output_var = get_model_properties(target_values, model_files)
if overwrite_project_properties:
response = _update_properties(project, model, input_var, output_var)
else:
_compare_properties(project, model, input_var, output_var)
return response
[docs]
def model_exists(
project: Union[str, dict, RestObj],
name: str,
force: bool = "False",
version_name: str = "latest",
) -> None:
"""
Checks if model already exists in the same project and either raises an error or
delete the redundant model. If no project version is provided, the version is
assumed to be "latest".
Parameters
----------
project : str, dict, or RestObj
The name or id of the model project, or a dictionary representation of the
project.
name : str
The name of the model.
force : bool, optional
Sets whether to overwrite models with the same name upon upload. The default
value is False.
version_name : str, optional
Name of project version to check if a model of the same name already exists.
The default value is "latest".
Raises
------
ValueError
Model repository API cannot overwrite an already existing model with the upload
model call. Alerts user of the force argument to allow model overwriting.
"""
project = mr.get_project(project)
project_id = project["id"]
project_versions = mr.list_project_versions(project)
if version_name == "latest":
version_name = project["latestVersion"]
for version in project_versions:
if version_name == version["name"]:
version_id = version["id"]
break
project_models = mr.get(
f"/projects/{project_id}/projectVersions/{version_id}/models"
)
if not project_models:
return
elif isinstance(project_models, RestObj):
if project_models["name"] == name and force:
mr.delete_model(project_models.id)
elif project_models["name"] == name and not force:
raise ValueError(
f"A model with the same model name exists in project "
f"{project.name}. Include the force=True argument to overwrite "
f"models with the same name."
)
elif isinstance(project_models, PagedList):
for model in project_models:
if model["name"] == name and force:
mr.delete_model(model.id)
elif model["name"] == name and not force:
raise ValueError(
f"A model with the same model name exists in project "
f"{project.name}. Include the force=True argument to overwrite "
f"models with the same name."
)
[docs]
class ImportModel:
notebook_output = check_if_jupyter()
[docs]
@classmethod
def import_model(
cls,
model_files: Union[str, Path, dict],
model_prefix: str,
project: Union[str, dict, RestObj],
input_data: Optional[DataFrame] = None,
predict_method: Union[Callable[..., List], List[Any]] = None,
score_metrics: Optional[List[str]] = None,
pickle_type: str = "pickle",
project_version: str = "latest",
missing_values: Optional[bool] = False,
overwrite_model: Optional[bool] = False,
score_cas: Optional[bool] = True,
mlflow_details: Optional[dict] = None,
predict_threshold: Optional[float] = None,
target_values: Optional[List[str]] = None,
overwrite_project_properties: Optional[bool] = False,
target_index: Optional[int] = None,
preprocess_function: Optional[Callable[[DataFrame], DataFrame]] = None,
**kwargs,
) -> Tuple[RestObj, Union[dict, str, Path]]:
"""
Import a model to SAS Model Manager using pzmm submodule.
Using pzmm, generate Python score code and import the model files into
SAS Model Manager. This function automatically checks the version of SAS
Viya being used through the sasctl Session object and creates the appropriate
score code and API calls required for the model and its associated content to
be registered in SAS Model Manager.
Generation of the score code requires that the `input_data`, `predict_method`,
and `score_metrics` arguments are supplied. Otherwise, a warning will be
generated stating that no score code is being created.
The following are generated by this function if a path is provided in the
model_files argument:
* '\*Score.py'
The Python score code file for the model.
* '\*.zip'
The zip archive of the relevant model files. In Viya 3.5 the Python
score code is not present in this initial zip file.
Parameters
----------
model_files : str, pathlib.Path, or dict
Either the directory location of the model files (string or Path object), or
a dictionary containing the contents of all the model files.
model_prefix : str
The variable for the model name that is used when naming model files.
(For example: 'hmeqClassTree + [Score.py || .pickle])'.
project : str, dict, or RestObj
The name or id of the model project, or a dictionary representation of the
project.
input_data : pandas.DataFrame, optional
The `DataFrame` object contains the training data, and includes only the
predictor columns. The write_score_code function currently supports int(64),
float(64), and string data types for scoring. The default value is None.
predict_method : Callable or list, optional
The Python function used for model predictions and the expected output
types. The expected output types can be passed as example values or as the
value types. For example, if the model is a Scikit-Learn
DecisionTreeClassifier, then pass either of the following:
* [sklearn.tree.DecisionTreeClassifier.predict, ["A"]]
* [sklearn.tree.DecisionTreeClassifier.predict_proba, [0.4, float]]
The default value is None.
score_metrics : list of str, optional
The scoring score_metrics for the model. For classification models, it is
assumed that the first value in the list represents the classification
output. This function supports single and multi-class classification models.
The default value is None.
pickle_type : str, optional
Indicator for the package used to serialize the model file to be uploaded to
SAS Model Manager. The default value is `pickle`.
project_version : str, optional
The project version to import the model in to on SAS Model Manager. The
default value is "latest".
overwrite_model : bool, optional
Set whether models with the same name should be overwritten when attempting
to import the model. The default value is False.
score_cas : bool, optional
Sets whether models registered to SAS Viya 3.5 should be able to be scored
and validated through both CAS and SAS Micro Analytic Service. If set to
false, then the model will only be able to be scored and validated through
SAS Micro Analytic Service. The default value is True.
missing_values : bool, optional
Sets whether data handled by the score code will impute for missing values.
The default value is False.
mlflow_details : dict, optional
Model details from an MLFlow model. This dictionary is created by the
read_mlflow_model_file function. The default value is None.
predict_threshold : float, optional
The prediction threshold for normalized probability score_metrics. Values
are expected to be between 0 and 1. The default value is None.
target_values : list of str, optional
A list of target values for the target variable. This argument and the
score_metrics argument dictate the handling of the predicted values from
the prediction method. The order of the target values should reflect the
order of the related probabilities in the model. The default value is None.
overwrite_project_properties : bool, optional
Set whether the project properties should be overwritten when attempting to
import the model. The default value is False.
target_index : int, optional
Sets the index of success for a binary model. If target_values are given, this
index should match the index of the target outcome in target_values. If target_values
are not given, this index should indicate whether the the target probability variable
is the first or second variable returned by the model. The default value is 1.
**kwargs
Other keyword arguments are passed to the following function:
:meth:`.ScoreCode.write_score_code`
Returns
-------
RestObj
JSON response from the POST API call to SAS Model Manager for importing a
zipped model
model_files : dict, str, or pathlib.Path
Dictionary representation of all files or the path the model files were
generated from.
"""
# For mlflow models, overwrite the provided or default pickle_type
if mlflow_details:
pickle_type = mlflow_details["serialization_format"]
# Import model without generating score code (SAS Viya version invariant)
if input_data is None or not predict_method or not score_metrics:
warn(
"The following arguments are required for the automatic generation of "
"score code: input_data, predict_method, score_metrics."
)
if isinstance(model_files, dict):
zip_io_file = zm.zip_files(model_files, model_prefix, is_viya4=False)
else:
zip_io_file = zm.zip_files(
Path(model_files), model_prefix, is_viya4=False
)
if cls.notebook_output:
print(f"All model files were zipped to {Path(model_files)}.")
# Check if project name provided exists and raise an error or create a
# new project
project_response = mr.get_project(project)
project = project_exists(
project,
project_response,
target_values,
model_files,
overwrite_project_properties,
)
# Check if model with same name already exists in project.
model_exists(
project, model_prefix, overwrite_model, version_name=project_version
)
model = mr.import_model_from_zip(
model_prefix, project, zip_io_file, version=project_version
)
return model, model_files
# For SAS Viya 4, the score code can be written beforehand and imported with
# all the model files
elif current_session().version_info() == 4:
score_code_dict = sc().write_score_code(
model_prefix,
input_data,
predict_method,
score_metrics=score_metrics,
pickle_type=pickle_type,
predict_threshold=predict_threshold,
score_code_path=None if isinstance(model_files, dict) else model_files,
target_values=target_values,
missing_values=missing_values,
score_cas=score_cas,
target_index=target_index,
preprocess_function=preprocess_function,
**kwargs,
)
if score_code_dict:
model_files.update(score_code_dict)
zip_io_file = zm.zip_files(model_files, model_prefix, is_viya4=True)
else:
score_path = Path(model_files) / f"score_{model_prefix}.py"
if cls.notebook_output:
print(
f"Model score code was written successfully to {score_path} and"
f" uploaded to SAS Model Manager."
)
zip_io_file = zm.zip_files(
Path(model_files), model_prefix, is_viya4=True
)
if cls.notebook_output:
print(f"All model files were zipped to {Path(model_files)}.")
# Check if project name provided exists and raise an error or create a
# new project
project_response = mr.get_project(project)
project = project_exists(
project,
project_response,
target_values,
model_files,
overwrite_project_properties,
)
# Check if model with same name already exists in project.
model_exists(
project, model_prefix, overwrite_model, version_name=project_version
)
model = mr.import_model_from_zip(
model_prefix, project, zip_io_file, version=project_version
)
if cls.notebook_output:
try:
print(
f"Model was successfully imported into SAS Model Manager as "
f"{model.name} with the following UUID: {model.id}."
)
except AttributeError:
print("Model failed to import to SAS Model Manager.")
return model, model_files
# For SAS Viya 3.5, the score code is written after upload in order to know
# the model UUID
else:
if isinstance(model_files, dict):
zip_io_file = zm.zip_files(model_files, model_prefix, is_viya4=False)
else:
zip_io_file = zm.zip_files(
Path(model_files), model_prefix, is_viya4=False
)
if cls.notebook_output:
print(f"All model files were zipped to {Path(model_files)}.")
# Check if project name provided exists and raise an error or create a
# new project
project_response = mr.get_project(project)
project = project_exists(
project,
project_response,
target_values,
model_files,
overwrite_project_properties,
)
# Check if model with same name already exists in project.
model_exists(
project, model_prefix, overwrite_model, version_name=project_version
)
model = mr.import_model_from_zip(
model_prefix, project, zip_io_file, version=project_version
)
if cls.notebook_output:
try:
print(
f"Model was successfully imported into SAS Model Manager as "
f"{model.name} with the following UUID: {model.id}."
)
except AttributeError:
print("Model failed to import to SAS Model Manager.")
score_code_dict = sc().write_score_code(
model_prefix,
input_data,
predict_method,
score_metrics=score_metrics,
model=model,
pickle_type=pickle_type,
predict_threshold=predict_threshold,
score_code_path=None if isinstance(model_files, dict) else model_files,
target_values=target_values,
missing_values=missing_values,
score_cas=score_cas,
target_index=target_index,
preprocess_function=preprocess_function,
**kwargs,
)
if score_code_dict:
model_files.update(score_code_dict)
return mr.get_model(model), model_files
else:
score_path = Path(model_files) / f"score_{model_prefix}.py"
if cls.notebook_output:
print(
f"Model score code was written successfully to {score_path} and"
f" uploaded to SAS Model Manager."
)
return mr.get_model(model), model_files