Source code for sphinxcontrib.argdoc.ext

#!/usr/bin/env python
"""Functions that constitute the :data:`sphinxcontrib.argdoc` extension for `Sphinx`_.

User functions
--------------
:func:`noargdoc`
    Function decorator that forces :data:`sphinxcontrib.argdoc` to skip a :term:`main-like function`
    it would normally process
    
Developer functions
-------------------

:func:`format_argparser_as_docstring`
    Extract tables of arguments from an :class:`~argparse.ArgumentParser`
    and from all of its subprograms, then format their descriptions and
    help text.

:func:`get_subcommand_tables`
    Extract tables from all subcommand
    :class:`ArgumentParsers <argparse.ArgumentParser>`
    contained by an enclosing :class:`~argparse.ArgumentParser`

:func:`post_process_automodule`
    Event handler that activates :data:`sphinxcontrib.argdoc` upon `autodoc-process-docstring`
    events

:func:`setup`
    Register :data:`sphinxcontrib.argdoc` with the running `Sphinx`_ instance
"""
import sys
import re
import shlex
import subprocess
import os
import codecs

import sphinx
from sphinx.errors import ConfigError

__author__  = "Joshua Griffin Dunn"

#===============================================================================
# INDEX: various constants
#===============================================================================
_REQUIRED = [
    'sphinx.ext.autodoc',
    'sphinx.ext.autosummary',
]
"""Other `Sphinx`_ extensions required by :data:`sphinxcontrib.argdoc`"""



_HEADERS = "=-~._\"'^;"
_INDENT_SIZE = 4


[docs]def safeunicode(inp):
    """Convert a string to unicode in a Python 2.7/3.x-safe way

    Parameters
    ----------
    inp : str
        Input string

    Returns
    -------
    unicode (Python 2.7) or string (Python 3.x)
        utf-8 encoded representation of `inp`
    """
    if sys.version_info[0] == 2 and isinstance(inp,str):
        return unicode(inp,"utf-8")
    else:
        return inp


_PLACEHOLDER_CONSTANT = "ARGDOCPOSITIONALARGUMENT "
_OTHER_HEADER_LINES = safeunicode("""Script contents
---------------""").split("\n")

_SEPARATOR = safeunicode("\n------------\n\n").split("\n")

#===============================================================================
# INDEX: helper functions for token parsing and text formatting
#===============================================================================

[docs]def get_patterns(prefix_chars="-"):
    """Retrieve a dictionary of regular expressions that separate argument names
    from their values and descriptions
    
    Parameters
    ----------
    prefix_chars : str, optional
        String of prefix characters that the :class:`~argparse.ArgumentParser`
        uses (Default: `'-'`)
    
    Returns
    -------
    dict
        Dictionary of regular expression patterns
    """
    all_patterns = {}
    esc_prefix_chars = prefix_chars
    for char in "-+*?[]{}()":
        esc_prefix_chars = esc_prefix_chars.replace(char,"\%s" % char)
    for char in prefix_chars:
        if char in "-+*?[]{}()":
            esc_char = "\%s" % char
        else:
            esc_char = char
        patterns = { "section_title"      : r"^(\w+.*):$",
                     "positional_arg"     : r"^  (?P<arg1>[^{}\sALL]+)(?:\s\s+(?P<desc>\w+.*))?$".replace("ALL",esc_prefix_chars),
                     "arg_only"           : r"^  (?P<arg1>-+[^\s,]+)(?:, (?P<arg2>--[^\s]+))?$".replace("-",esc_char),
                     "arg_plus_val"       : r"^  (?P<arg1>-+[^\s]+)(?P<val1>(?: [^ALL\s]+)+)(?:(?:, (?P<arg2>--[^\s]+))(?P<val2>(?: [^\s]+)+))?$".replace("-",esc_char).replace("ALL",esc_prefix_chars),
                     "arg_plus_desc"      : r"^  (?P<arg1>-+[^\s]+)(?:,\s(?P<arg2>--[^\s]+))?\s\s+(?P<desc>.*)".replace("-",esc_char),
                     "arg_plus_val_desc"  : r"^  (?P<arg1>-+[^\s]+)(?P<val1>(?: [^ALL\s]+)+)(?:(?:, (?P<arg2>--[^\s]+))(?P<val2>(?: [^\s]+)+))?  +(?P<desc>\w+.*)$".replace("-",esc_char).replace("ALL",esc_prefix_chars),
                     "continue_desc"      : r"^ {12,24}(.*)",
                     "section_desc"       : r"^  ((?:[^ALL\s][^\s]*)(?:\s[^\s]+)+)$".replace("ALL",esc_prefix_chars),
                     "subcommand_names"   : r"^  {((?:\w+)(?:(?:,(?:\w+))+)?)}$",
                     "subcommand_name"    : r"^    (?P<arg1>[^{}\sALL]+)(?:\s\s+(?P<desc>\w+.*))?$".replace("ALL",esc_prefix_chars), # same as positional arg, but with more leading space
                    }
        
        all_patterns[char] = { K : re.compile(V) for K,V in patterns.items() }

    return all_patterns

[docs]def get_col1_text(matchdict):
    """Format argument name(s) and value(s) for column 1 of argument tables

    Parameters
    ----------
    matchdict : dict
        Dictionary of values

    Returns
    -------
    str (unicode if Python 2.7)
    """
    if "val1" in matchdict:
        tmpstr = "``%s %s``" % (matchdict["arg1"],matchdict["val1"])
        if matchdict.get("arg2") is not None:
            tmpstr += (", ``%s %s``" % (matchdict["arg2"],matchdict["val2"]))
    else:
        tmpstr = "``%s``" % matchdict["arg1"]
        if matchdict.get("arg2") is not None:
            tmpstr += (", ``%s``" % matchdict["arg2"])
        
    return safeunicode(tmpstr)

[docs]def get_col2_text(matchdict):
    """Format argument descriptions, if present, for column 2 of argument tables

    Parameters
    ----------
    matchdict : dict
        Dictionary of values

    Returns
    -------
    str (unicode if Python 2.7)
    """
    # line below looks weird- but coming out of regex matches,
    # often 'desc' *is* defined with value `None`
    tmpstr =  matchdict.get("desc","") if matchdict.get("desc") is not None else ""
    return safeunicode(tmpstr)

[docs]def make_rest_table(rows,title=False,indent=0):
    """Make a reStructuredText table from a list of rows of items

    Parameters
    ----------
    rows : list of tuples
        A row of text to put in the table, each tuple item a cell

    title : bool, optional
        If `True`, the first pair is assumed to contain column headings
        (Default: `False`)

    indent_size : int, optional
        Number of spaces prepend to each line of output (Default: `0`)

    Returns
    -------
    list
        List of strings, corresponding to multi-line `reStructuredText`_ table
    """
    columns = list(zip(*rows))
    lengths = [1 + max([len(X) for X in Y]) for Y in columns]
    if title == True:
        lengths = [X+4 for X in lengths]

    border   = []
    template = []
    for n, my_length in enumerate(lengths):
        border.append(safeunicode("="*my_length))
        template.append(safeunicode("{%s: <%ss}" % (n,my_length)))

    border   = safeunicode("    ").join(border)
    template = safeunicode("    ").join(template)

    lines = [border]
    n = 0
    if title == True:
        title_row = [safeunicode("**%s**") % X for X in rows[0]]
        lines.append(template.format(*tuple(title_row)))
        lines.append(border.replace("=","-"))
        n = 1

    for items in rows[n:]:
        lines.append(template.format(*items))

    lines.append(border)
    lines.append(safeunicode(""))
    if indent > 0:
        tmp = safeunicode(" "*indent)
        lines = [tmp+X if len(X) > 0 else X for X in lines]

    return lines

[docs]def format_warning(topline,details):
    """Format warning text clearly

    Parameters
    ----------
    topline : str
        One-line description of warning

    details : str
        Multiline, detailed description of warning (e.g. exception info)


    Returns
    -------
    str
        Multiline warning message, formatted
    """
    border = "-"*75 + "\n"
    out = border
    out += ("[argdoc] %s\n" % topline)
    out += details
    out += border
    return out


#===============================================================================
# INDEX: function decorator
#===============================================================================

[docs]def noargdoc(func):
    """Decorator that forces :data:`sphinxcontrib.argdoc` to skip processing of `func` 
    
    Parameters
    ----------
    func : function
        :term:`main-like function` of a script

    
    Returns
    -------
    func
        wrapped function
    """
    func.__dict__["noargdoc"] = True
    return func

#===============================================================================
# INDEX: documentation generation functions
#===============================================================================

[docs]def get_subcommand_tables(app,obj,help_lines,patterns,start_line,command_chain="",section_head=True,header_level=1):
    """Process help output from an :py:class:`~argparse.ArgumentParser`
    that includes one or more subcommands.  Called by :func:`format_argparser_as_docstring`
    
    Parameters
    ----------
    app
        Sphinx application instance
    
    obj : object
        Object (e.g. module, class, function) to document
            
    help_lines : list
        List of strings, each corresponding to a line of output from having
        passed ``--help`` as an argument to the :term:`main-like function`

    
    patterns : dict
        Dictionary names of line types in argparse output to regular expression
        patterns that process those line types

    start_line : int
        Line in argparse help output containing subcommand header

    section_head : bool, optional
        If `True`, a section header for "Command-line arguments" will be included
        in the output. (Default: `True`)

    pre_args : int, optional
        Number of arguments required to be supplied before subcommand help
        can be retrieved (Default: `0`)
        
    header_level : int, optional
        Level of header to use for `section_name`. Lower numbers are higher
        precedence. (Default: `1`)        
    
    Returns
    -------
    list
        List of strings encoding reStructuredText table of command-line
        arguments for all subprograms in the containing argparser
    """
    out_lines = []
    base = list(patterns.values())[0]
    for line in help_lines[start_line:]:
        match = base["subcommand_names"].search(line.strip("\n")) 
        if match is not None:
            subcommands = match.groups()[0].split(",")
            break
    
    app.debug("[argdoc] %s subcommands: %s" % (obj.__name__,", ".join(subcommands)))
    for subcommand in subcommands:
        try:
            newname = command_chain.replace(_PLACEHOLDER_CONSTANT,"").replace("  "," ").split()
            newname.append(subcommand)
            newname ="-".join(newname)
            callstr = "%s -m %s %s %s %s%shelp" % (sys.executable,
                                                   obj.__name__,
                                                   command_chain,
                                                   subcommand,
                                                   app.config.argdoc_prefix_chars[0],
                                                   app.config.argdoc_prefix_chars[0])
            app.debug("[argdoc] Parsing subcommand %s with as `%s`" % (subcommand,callstr))
            call = shlex.split(callstr)
            out = subprocess.check_output(call,env=os.environ.copy())
            if sys.version_info[0] == 2:
                out = unicode(out,"utf-8")
            elif sys.version_info[0] == 3:
                out = out.decode("utf-8")
            sub_help_lines = out.split("\n")
            new_command_chain = command_chain + (" %s " % subcommand)
            out_lines.extend(format_argparser_as_docstring(app,
                                                           obj,
                                                           sub_help_lines,
                                                           patterns,
                                                           section_head=section_head,
                                                           header_level=header_level+1,
                                                           section_name=safeunicode("``%s`` subcommand" % newname),
                                                           _is_subcommand=True,
                                                           command_chain=new_command_chain)) 
        except subprocess.CalledProcessError as e:
            note = "Could not call module %s as '%s'. Output:\n"% (obj.__name__, " ".join(e.cmd))
            msg = format_warning(note,e.output)
            app.warn(msg)

    return out_lines

[docs]def format_argparser_as_docstring(app,obj,help_lines,patterns,
                                  section_head=True,section_name=safeunicode("Command-line arguments"),
                                  header_level=1,
                                  _is_subcommand=False,
                                  command_chain="",
                                  ):
    """Process help output from an :py:class:`argparse.ArgumentParser`.
    Called by :func:`post_process_automodule` and :func:`get_subcommand_tables`
    
    Parameters
    ----------
    app
        Sphinx application instance
    
    obj : object
        Object (e.g. module, class, function) to document
            
    help_lines : list
        List of strings, each corresponding to a line of output from having
        passed ``--help`` as an argument to the :term:`main-like function`

    patterns : dict
        Dictionary names of line types in argparse output to regular expression
        patterns that process those line types
    
    section_head : bool, optional
        If `True`, a section header for "Command-line arguments" will be included.
        This messes up parsing for function docstrings, but is fine for module
        docstrings (Default: `False`).
    
    section_name : str, optional
        A name or title for the current program or subcommand.
        (Default: `'Command-line arguments'`)
    
    header_level : int, optional
        Level of header to use for `section_name`. Lower numbers are higher
        precedence. (Default: `1`)
    
    _is_subcommand : bool, optional
        If `True`, include module docstring in output. Required for subcommands
        whose help won't be included by in the module docstring found by 
        autodoc. (Default: `False`) 
        
    
    Returns
    -------
    list
        List of strings encoding reStructuredText table of arguments
        for program or subprogram
    """
    base = list(patterns.values())[0]
    started          = False
    has_subcommands  = False
    subcommand_start = 0

    out_lines = []  # lines we will output
    positional_args = 0

    # markers for beginning and end of subcommand docstring descriptions
    desc_start = None
    desc_end   = None

    # the following are wiped & re-initialized for each section
    col1      = ["Argument"]  # holder for column 1 contents: argument names
    col2      = ["Description"]  # holder for column 2 contents: argument descriptions
    section_title = [] # title of current section
    section_desc  = [] # description of current section
    unmatched = []
    
    for n,line in enumerate(help_lines):
        line = line.rstrip()
        if _is_subcommand == True and desc_start is None:
            # subcommand descriptions are not automatically added by autodoc,
            # so we need to track them ourselves
            if line.strip() == "":
                desc_start = n+1
        if started == True:
            if len(line.strip()) == 0 and len(col1) > 1 and len(col2) > 1:
                # if current argument group is finished, format table of arguments for export
                # and append it to `out_lines`
                if len(col1) != len(col2):
                    app.warn("[argdoc] Column mismatch in section '%s'. col1 %s, col2 %s rows." % (section_title,len(col1),len(col2)))

                out_lines.append(safeunicode(""))
                out_lines.extend(section_title)
                out_lines.extend(section_desc)
                out_lines.append(safeunicode(""))
                out_lines.extend(make_rest_table(list(zip(col1,col2)),title=True,indent=_INDENT_SIZE))        
                out_lines.extend(unmatched)

                # reset section-specific variables
                section_title = []
                section_desc  = []
                col1 = ["Argument"]
                col2 = ["Description"]
                unmatched = []
            else:
                matchdict = None
                match = None
                for pat in ["section_title",
                            "section_desc",
                            "positional_arg",
                            "arg_only",
                            "arg_plus_val",
                            "continue_desc",
                            "arg_plus_desc",
                            "arg_plus_val_desc",
                            "subcommand_names",
                            "subcommand_name",
                            ]:
                    for char in patterns.keys():
                        if match is None:
                            match = patterns[char][pat].match(line)
                            if match is not None:
                                app.debug2("[argdoc] %s\n    %s\n" % (pat,line))
                                if pat == "section_title":
                                    match = base["section_title"].match(line)
                                    section_title = [match.groups()[0].capitalize(),
                                                     _HEADERS[header_level+1]*len(match.groups()[0]),
                                                    ]
                                    break
                                elif pat == "section_desc":
                                    section_desc.append(line)
                                    break
                                elif pat == "continue_desc":
                                    try:
                                        col2[-1] = safeunicode("%s %s" % (col2[-1],match.groups()[0].strip("\n")))
                                    except IndexError as e:
                                        app.warn("[argdoc] continuing description with no prior description on line %s: \n    %s" % (n,line))
                                        assert False
                                    break
                                elif pat == "positional_arg":
                                    matchdict = match.groupdict()
                                    col1.append(get_col1_text(matchdict))
                                    col2.append(get_col2_text(matchdict))
                                    positional_args += 1
                                    break
                                elif pat == "subcommand_names":
                                    has_subcommands = True
                                    subcommand_start = n
                                    break
                                elif pat == "subcommand_name":
                                    matchdict = match.groupdict()
                                    col1.append(get_col1_text(matchdict))
                                    col2.append(get_col2_text(matchdict))
                                    if has_subcommands == False:
                                        app.warn("[argdoc] found subcommand-like line but no subcommands at line %s:\n    %s" % (n,line))
                                    break
                                else:
                                    matchdict = match.groupdict()
                                    col1.append(get_col1_text(matchdict))
                                    col2.append(get_col2_text(matchdict))
                                    break
                if match is None:
                    # triggered if epilog, or if other unmatched lines
                    if len(line.strip()) > 0:
                        app.debug2("[argdoc] No match. Epilog?\n%s\n" % line.strip())
                        line = safeunicode(line)
                        out_lines.append(line)
                    else:
                        app.debug2("[argdoc] blank line")
        # FIXME: 
        # this is how we test where argument descriptions begin in ``--help`` text
        # at present we look for an explicit 'arguments:' token, which allows argdoc
        # to deal with lines of helptext that have trailing colons but which don't start
        # argument sections (which a regex would fail at)
        #
        # BUT, if an argument parser has no line that says "arguments:" in its helptext,
        # argdoc will fail
        #
        # we need a better test, which will be more portable
        elif line.endswith("arguments:"):
            # Found first argument section. Create command-line argument heading
            started = True
            desc_end = n
            if section_head == True:
                stmp1 = section_name
                stmp2 = _HEADERS[header_level]*len(section_name)
                out_lines.extend(_SEPARATOR)
                out_lines.append(safeunicode(stmp1))
                out_lines.append(safeunicode(stmp2))
                # if is a subcommand, put cached description under heading
                if _is_subcommand == True:
                    out_lines.extend(help_lines[desc_start:desc_end])
            
            # Create paragraph header for the argument section
            match = base["section_title"].match(line)
            section_title = [match.groups()[0].capitalize(),
                             _HEADERS[header_level+1]*len(match.groups()[0]),
                            ]
   
    if has_subcommands == True:
        # parse subcommand argparsers after main, and append output below
        command_chain = command_chain + (_PLACEHOLDER_CONSTANT*positional_args)
        new_lines = get_subcommand_tables(app,
                                          obj,
                                          help_lines,
                                          patterns,
                                          subcommand_start,
                                          section_head=section_head,
                                          header_level=header_level+2,
                                          command_chain=command_chain,
                                          )
        out_lines.extend(new_lines)

    return out_lines

[docs]def post_process_automodule(app,what,name,obj,options,lines):
    """Insert a table listing and describing an executable script's command-line
    arguments into its ``:automodule:`` documentation.
    
    Any :term:`main-like function` decorated with the :func:`noargdoc` decorator
    will be skipped. A function is determined to be a :term:`main-like function`
    if its name matches the name set in the configuration option
    `argdoc_main_func` inside ``conf.py``. The default value for
    `argdoc_main_func` is `main`.
    
    
    Notes
    -----
    Per the `autodoc`_ spec, this function modifies `lines` in place.
    
    
    Parameters
    ----------
    app
        Sphinx application instance
    
    what : str
        Type of object (e.g. "module", "function", "class")
    
    name : str
        Fully-qualified name of object
    
    obj : object
        Object (e.g. module, class, function) to document
    
    options : object
        Options given to the directive, whose boolean properties are set to `True`
        if their corresponding flag was given in the directive

    lines : list
        List of strings encoding the module docstrings after `Sphinx`_ processing

    Raises
    ------
    :class:`~sphinx.errors.ConfigError`
       If `argdoc_main_func` is defined in ``conf.py`` and is not a `str`
    """
    funcname     = app.config.argdoc_main_func
    prefix_chars = app.config.argdoc_prefix_chars
    patterns = get_patterns(prefix_chars)

    errmsg = ""
    if not isinstance(funcname,str):
        errmsg += "[argdoc] Incorrect type for `argdoc_main_func. Expected `str`, found, `%s` with value `%s`)\n" % (type(funcname),funcname)
    if len(prefix_chars) == 0:
        errmsg += "[argdoc] Expected at least one prefix character (e.g. '-'). Found empty string.\n"

    if len(errmsg) > 0:
        raise ConfigError(errmsg)

    if what == "module" and obj.__dict__.get(funcname,None) is not None:
        if obj.__dict__.get(funcname).__dict__.get("noargdoc",False) == False:
            app.debug2("[argdoc] Processing module '%s'" % obj.__name__)
            call = shlex.split("%s -m %s --help".replace("-",prefix_chars[0]) % (sys.executable,name))
            try:
                out = subprocess.check_output(call,env=os.environ.copy()) 
                if sys.version_info[0] == 2:
                    out = unicode(out,"utf-8")
                elif sys.version_info[0] == 3:
                    out = out.decode("utf-8")
                help_lines = out.split("\n")
                out_lines = format_argparser_as_docstring(app,obj,help_lines,section_head=True,header_level=1,patterns=patterns)
                out_lines += _SEPARATOR
                lines.extend(out_lines)
                lines.extend(_OTHER_HEADER_LINES)
            except subprocess.CalledProcessError as e:
                note = "Could not call module %s as '%s'. Output:\n"% (name," ".join(e.cmd))
                app.warn(format_warning(note,e.output))
            except IndexError as e:
                note = "Error processing argparser into docstring for module %s: \n%s" % (name,e.message)
                details = "\n\n%s\n\n%s" % (e.args,e)
                app.warn(format_warning(note,details))

        if app.config.argdoc_save_rst == True:
            filename = os.path.join(app.outdir,"%s_postargdoc.rst" % name)
            with codecs.open(filename,encoding="utf-8",mode="wb") as fout:
                for n,line in enumerate(lines):
                    try:
                        line = safeunicode(line)
                        fout.write(line)
                        fout.write(safeunicode("\n"))
                    except Exception as e:
                        app.warn("[argdoc] Could not write out line %s of file %s." % (n,name))
    
            fout.close()
                
        app.emit("argdoc-process-docstring",what,name,obj,options,lines)