Source code for jams.util

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
r"""
Utility functions
-----------------

.. autosummary::
    :toctree: generated/

    import_lab
    expand_filepaths
    smkdirs
    filebase
    find_with_extension
"""

import os
import glob
import pandas as pd

from . import core


[docs]def import_lab(namespace, filename, infer_duration=True, **parse_options):
    r'''Load a .lab file as an Annotation object.

    .lab files are assumed to have the following format:

        ``TIME_START\tTIME_END\tANNOTATION``

    By default, .lab files are assumed to have columns separated by one
    or more white-space characters, and have no header or index column
    information.

    If the .lab file contains only two columns, then an empty duration
    field is inferred.

    If the .lab file contains more than three columns, each row's
    annotation value is assigned the contents of last non-empty column.


    Parameters
    ----------
    namespace : str
        The namespace for the new annotation

    filename : str
        Path to the .lab file

    infer_duration : bool
        If `True`, interval durations are inferred from `(start, end)` columns,
        or difference between successive times.

        If `False`, interval durations are assumed to be explicitly coded as
        `(start, duration)` columns.  If only one time column is given, then
        durations are set to 0.

        For instantaneous event annotations (e.g., beats or onsets), this
        should be set to `False`.

    parse_options : additional keyword arguments
        Passed to ``pandas.DataFrame.read_csv``

    Returns
    -------
    annotation : Annotation
        The newly constructed annotation object

    See Also
    --------
    pandas.DataFrame.read_csv
    '''

    # Create a new annotation object
    annotation = core.Annotation(namespace)

    parse_options.setdefault('sep', r'\s+')
    parse_options.setdefault('engine', 'python')
    parse_options.setdefault('header', None)
    parse_options.setdefault('index_col', False)

    # This is a hack to handle potentially ragged .lab data
    parse_options.setdefault('names', range(20))

    data = pd.read_csv(filename, **parse_options)

    # Drop all-nan columns
    data = data.dropna(how='all', axis=1)

    # Do we need to add a duration column?
    # This only applies to event annotations
    if len(data.columns) == 2:
        # Insert a column of zeros after the timing
        data.insert(1, 'duration', 0)
        if infer_duration:
            data['duration'][:-1] = data.loc[:, 0].diff()[1:].values

    else:
        # Convert from time to duration
        if infer_duration:
            data.loc[:, 1] -= data[0]

    for row in data.itertuples():
        time, duration = row[1:3]

        value = [x for x in row[3:] if x is not None][-1]

        annotation.append(time=time,
                          duration=duration,
                          confidence=1.0,
                          value=value)

    return annotation


[docs]def expand_filepaths(base_dir, rel_paths):
    """Expand a list of relative paths to a give base directory.

    Parameters
    ----------
    base_dir : str
        The target base directory

    rel_paths : list (or list-like)
        Collection of relative path strings

    Returns
    -------
    expanded_paths : list
        `rel_paths` rooted at `base_dir`

    Examples
    --------
    >>> jams.util.expand_filepaths('/data', ['audio', 'beat', 'seglab'])
    ['/data/audio', '/data/beat', '/data/seglab']

    """
    return [os.path.join(base_dir, os.path.normpath(rp)) for rp in rel_paths]


[docs]def smkdirs(dpath, mode=0o777):
    """Safely make a full directory path if it doesn't exist.

    Parameters
    ----------
    dpath : str
        Path of directory/directories to create

    mode : int [default=0777]
        Permissions for the new directories

    See also
    --------
    os.makedirs
    """
    if not os.path.exists(dpath):
        os.makedirs(dpath, mode=mode)


[docs]def filebase(filepath):
    """Return the extension-less basename of a file path.

    Parameters
    ----------
    filepath : str
        Path to a file

    Returns
    -------
    base : str
        The name of the file, with directory and extension removed

    Examples
    --------
    >>> jams.util.filebase('my_song.mp3')
    'my_song'

    """
    return os.path.splitext(os.path.basename(filepath))[0]


[docs]def find_with_extension(in_dir, ext, depth=3, sort=True):
    """Naive depth-search into a directory for files with a given extension.

    Parameters
    ----------
    in_dir : str
        Path to search.
    ext : str
        File extension to match.
    depth : int
        Depth of directories to search.
    sort : bool
        Sort the list alphabetically

    Returns
    -------
    matched : list
        Collection of matching file paths.

    Examples
    --------
    >>> jams.util.find_with_extension('Audio', 'wav')
    ['Audio/LizNelson_Rainfall/LizNelson_Rainfall_MIX.wav',
     'Audio/LizNelson_Rainfall/LizNelson_Rainfall_RAW/LizNelson_Rainfall_RAW_01_01.wav',
     'Audio/LizNelson_Rainfall/LizNelson_Rainfall_RAW/LizNelson_Rainfall_RAW_02_01.wav',
     ...
     'Audio/Phoenix_ScotchMorris/Phoenix_ScotchMorris_STEMS/Phoenix_ScotchMorris_STEM_02.wav',
     'Audio/Phoenix_ScotchMorris/Phoenix_ScotchMorris_STEMS/Phoenix_ScotchMorris_STEM_03.wav',
    'Audio/Phoenix_ScotchMorris/Phoenix_ScotchMorris_STEMS/Phoenix_ScotchMorris_STEM_04.wav']

    """
    assert depth >= 1
    ext = ext.strip(os.extsep)
    match = list()
    for n in range(1, depth+1):
        wildcard = os.path.sep.join(["*"]*n)
        search_path = os.path.join(in_dir, os.extsep.join([wildcard, ext]))
        match += glob.glob(search_path)

    if sort:
        match.sort()
    return match