Source code for sasctl._services.text_categorization
#!/usr/bin/env python
# encoding: utf-8
#
# Copyright © 2019, SAS Institute Inc., Cary, NC, USA. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
from ..core import current_session, uri_as_str
from .service import Service
[docs]
class TextCategorization(Service):
"""Categorizes natural language text documents according to a prebuilt or
user-defined model.
"""
_SERVICE_ROOT = "/categorization"
[docs]
@classmethod
def categorize(
cls,
documents,
model,
caslib=None,
id_column=None,
text_column=None,
description=None,
output_postfix=None,
):
"""
Parameters
----------
documents : str or dict or Iterable
Documents to parse. May be either the URI to a CAS table where the
documents are currently stored, or an iterable of strings
containing the documents' text.
model : str or dict
URI of a CAS table that contains one or more category model
binaries.
caslib : str or dict, optional
URI of a caslib in which the documents will be stored. Required if
`documents` is a list of strings.
id_column : str, optional
The column in `documents` that contains a unique id for each
document. Required if `documents` is a CAS table URI.
text_column : str, optional
The column in `documents` that contains the document text to
categorize.
Required if `documents` is a CAS table URI.
description : str, optional
Description to add to the text categorization job.
output_postfix : str, optional
Text to be added to the end of all output table names.
Returns
-------
RestObj
The submitted job
See Also
--------
.cas_management.CASManagement.get_caslib
.cas_management.CASManagement.get_table
"""
if current_session().version_info() >= 4:
raise RuntimeError(
"The Text Categorization service was removed from Viya 4."
)
if documents is None:
raise TypeError("`documents` cannot be None.")
url = "/jobs"
if isinstance(documents, (dict, str)):
# Input is caslib
data = {
"inputUri": uri_as_str(documents),
"documentIdVariable": id_column,
"textVariable": text_column,
"version": 1,
}
headers = {
"Content-Type": "application/vnd.sas.text.categorization.job.request+json",
"Accept": "application/vnd.sas.text.categorization.job+json",
}
else:
# Input is inline documents
data = {
"caslibUri": uri_as_str(caslib),
"documents": documents,
"version": 1,
}
url += "#data"
headers = {
"Content-Type": "application/vnd.sas.text.categorization.job.request.documents+json",
"Accept": "application/vnd.sas.text.categorization.job+json",
}
data.update(
{
"description": description,
"modelUri": uri_as_str(model),
"outputTableNamePostfix": output_postfix,
}
)
# Optional fields are not ignored if None. Explicitly remove before sending
for k in list(data.keys()):
if data[k] is None:
del data[k]
return cls.post(url, json=data, headers=headers)