Source code for sasctl._services.concepts

#!/usr/bin/env python
# encoding: utf-8
#
# Copyright © 2019, SAS Institute Inc., Cary, NC, USA.  All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

from ..core import uri_as_str
from .service import Service



[docs]
class Concepts(Service):
    """Assigns concepts to natural language text documents according to a
    prebuilt or user-defined model.
    """

    _SERVICE_ROOT = "/concepts"


[docs]
    @classmethod
    def assign_concepts(
        cls,
        documents,
        caslib=None,
        id_column=None,
        text_column=None,
        description=None,
        model=None,
        output_postfix=None,
        match_type=None,
        enable_facts=False,
        language="en",
    ):
        """Performs sentiment analysis on the input data.

        Creates a sentiment analysis task that executes asynchronously.  There
        are two different interactions for sentiment analysis: analyzing
        documents in CAS tables and analyzing documents that are uploaded directly.

        Parameters
        ----------
        documents : str or dict or list
            Documents to analyze.  May be either the URI to a CAS table where the
            documents are currently stored, or an iterable of strings containing
            the documents' text.
        caslib : str or dict, optional
            URI of a caslib in which the documents will be stored.  Required if
            `documents` is a list of strings.
        id_column : str, optional
            The column in `documents` that contains a unique id for each
            document.  Required if `documents` is a CAS table URI.
        text_column : str, optional
            The column in `documents` that contains the document text to
            analyze.  Required if `documents` is a CAS table URI.
        description : str, optional
            Description to add to the sentiment analysis job.
        model
        output_postfix : str, optional
            Text to be added to the end of all output table names.
        match_type : str, optional
            Choose from ``{'all', 'longest', 'best'}``.
            Type of matches to return. Defaults to 'all'.
        enable_facts : bool, optional
            Whether to enable facts in the results.  Defaults to False.
        language : str, optional
            Two letter
            `ISO 639-1 <https://en.wikipedia.org/wiki/ISO_639>`_
            code indicating the source language.  Defaults to 'en'.

        Returns
        -------
        RestObj
            The submitted job

        See Also
        --------
        .cas_management.CASManagement.get_caslib
        .cas_management.CASManagement.get_table

        """
        if documents is None:
            raise TypeError("`documents` cannot be None.")

        if isinstance(documents, (dict, str)):
            data = {
                "inputUri": uri_as_str(documents),
                "documentIdVariable": id_column,
                "textVariable": text_column,
                "version": 1,
            }
        else:
            data = {
                "caslibUri": uri_as_str(caslib),
                "documents": documents,
                "version": 1,
            }

        data.update(
            {
                "description": description,
                "language": language,
                "modelUri": uri_as_str(model),
                "outputTableNamePostfix": output_postfix,
                "matchType": match_type,
                "enableFacts": enable_facts,
            }
        )

        # Optional fields are not ignored if None. Explicitly remove before sending
        for k in list(data.keys()):
            if data[k] is None:
                del data[k]

        url = "/jobs"

        # Update URL if passing in raw documents.
        if "documents" in data:
            url += "#data"
            headers = {
                "Content-Type": "application/vnd.sas.text.concepts.job.request.documents+json",
                "Accept": "application/vnd.sas.text.concepts.job+json",
            }
        else:
            headers = {
                "Content-Type": "application/vnd.sas.text.concepts.job.request+json",
                "Accept": "application/vnd.sas.text.concepts.job+json",
            }

        return cls.post(url, json=data, headers=headers)