Source code for jams.util

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
Utility functions

.. autosummary::
    :toctree: generated/


import os
import glob
import pandas as pd

from . import core

[docs]def import_lab(namespace, filename, infer_duration=True, **parse_options): r'''Load a .lab file as an Annotation object. .lab files are assumed to have the following format: ``TIME_START\tTIME_END\tANNOTATION`` By default, .lab files are assumed to have columns separated by one or more white-space characters, and have no header or index column information. If the .lab file contains only two columns, then an empty duration field is inferred. If the .lab file contains more than three columns, each row's annotation value is assigned the contents of last non-empty column. Parameters ---------- namespace : str The namespace for the new annotation filename : str Path to the .lab file infer_duration : bool If `True`, interval durations are inferred from `(start, end)` columns, or difference between successive times. If `False`, interval durations are assumed to be explicitly coded as `(start, duration)` columns. If only one time column is given, then durations are set to 0. For instantaneous event annotations (e.g., beats or onsets), this should be set to `False`. parse_options : additional keyword arguments Passed to ``pandas.DataFrame.read_csv`` Returns ------- annotation : Annotation The newly constructed annotation object See Also -------- pandas.DataFrame.read_csv ''' # Create a new annotation object annotation = core.Annotation(namespace) parse_options.setdefault('sep', r'\s+') parse_options.setdefault('engine', 'python') parse_options.setdefault('header', None) parse_options.setdefault('index_col', False) # This is a hack to handle potentially ragged .lab data parse_options.setdefault('names', range(20)) data = pd.read_csv(filename, **parse_options) # Drop all-nan columns data = data.dropna(how='all', axis=1) # Do we need to add a duration column? # This only applies to event annotations if len(data.columns) == 2: # Insert a column of zeros after the timing data.insert(1, 'duration', 0) if infer_duration: data['duration'][:-1] = data.loc[:, 0].diff()[1:].values else: # Convert from time to duration if infer_duration: data.loc[:, 1] -= data[0] for row in data.itertuples(): time, duration = row[1:3] value = [x for x in row[3:] if x is not None][-1] annotation.append(time=time, duration=duration, confidence=1.0, value=value) return annotation
[docs]def expand_filepaths(base_dir, rel_paths): """Expand a list of relative paths to a give base directory. Parameters ---------- base_dir : str The target base directory rel_paths : list (or list-like) Collection of relative path strings Returns ------- expanded_paths : list `rel_paths` rooted at `base_dir` Examples -------- >>> jams.util.expand_filepaths('/data', ['audio', 'beat', 'seglab']) ['/data/audio', '/data/beat', '/data/seglab'] """ return [os.path.join(base_dir, os.path.normpath(rp)) for rp in rel_paths]
[docs]def smkdirs(dpath, mode=0o777): """Safely make a full directory path if it doesn't exist. Parameters ---------- dpath : str Path of directory/directories to create mode : int [default=0777] Permissions for the new directories See also -------- os.makedirs """ if not os.path.exists(dpath): os.makedirs(dpath, mode=mode)
[docs]def filebase(filepath): """Return the extension-less basename of a file path. Parameters ---------- filepath : str Path to a file Returns ------- base : str The name of the file, with directory and extension removed Examples -------- >>> jams.util.filebase('my_song.mp3') 'my_song' """ return os.path.splitext(os.path.basename(filepath))[0]
[docs]def find_with_extension(in_dir, ext, depth=3, sort=True): """Naive depth-search into a directory for files with a given extension. Parameters ---------- in_dir : str Path to search. ext : str File extension to match. depth : int Depth of directories to search. sort : bool Sort the list alphabetically Returns ------- matched : list Collection of matching file paths. Examples -------- >>> jams.util.find_with_extension('Audio', 'wav') ['Audio/LizNelson_Rainfall/LizNelson_Rainfall_MIX.wav', 'Audio/LizNelson_Rainfall/LizNelson_Rainfall_RAW/LizNelson_Rainfall_RAW_01_01.wav', 'Audio/LizNelson_Rainfall/LizNelson_Rainfall_RAW/LizNelson_Rainfall_RAW_02_01.wav', ... 'Audio/Phoenix_ScotchMorris/Phoenix_ScotchMorris_STEMS/Phoenix_ScotchMorris_STEM_02.wav', 'Audio/Phoenix_ScotchMorris/Phoenix_ScotchMorris_STEMS/Phoenix_ScotchMorris_STEM_03.wav', 'Audio/Phoenix_ScotchMorris/Phoenix_ScotchMorris_STEMS/Phoenix_ScotchMorris_STEM_04.wav'] """ assert depth >= 1 ext = ext.strip(os.extsep) match = list() for n in range(1, depth+1): wildcard = os.path.sep.join(["*"]*n) search_path = os.path.join(in_dir, os.extsep.join([wildcard, ext])) match += glob.glob(search_path) if sort: match.sort() return match