XML cutter and midi formatter

This script is prepared to parse a folder full of xml files, cut them to a certain length , if desired (according to a specific measure criteria) and output then a midi file with thiss new length.

Afterwards, we are able to process these new midi files by using jSymbolic GUI if we want to include these features in our analyses. This notebook allso provides code for that purpose.

To install musif:

  1. Download this notebook.

  2. Start jupyter in your Anaconda environment.

  3. Open this script.

  4. Run the following cell by clicking on it and pressing Ctrl+Enter.

https://musescore.org/es/download

%pip install musif
import os
import subprocess
import sys
from math import floor
from os import path
from pathlib import Path

from music21.stream.base import Measure, Score
from musif.common._utils import read_dicts_from_csv

import musif.extract.constants as C
from musif.extract.extract import parse_filename
from musif.logs import perr, pinfo, pwarn

sys.path.append(os.path.abspath('.'))
from feature_extraction.custom_conf import CustomConf
from musif.config import ExtractConfiguration
import glob


class CustomConf(ExtractConfiguration):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._load_metadata()

    def _load_metadata(self) -> None:
        self.scores_metadata = {
            path.basename(file): read_dicts_from_csv(file)
            for file in glob(path.join(self.metadata_dir, "*.csv")) # type: ignore
        }
        if not self.scores_metadata:
            print(
                "\nMetadata could not be loaded properly!! Check metadata path in config file.\n"
            )

Define specific functions for our script

def cut_by_measures_by_measure(cfg, data):
    score: Score = data[C.DATA_SCORE]
    last_measure = 1000000
    for metadata in cfg.scores_metadata[last_measure]:
        if metadata["FileName"] == data["FileName"]:
            last_measure = floor(float(metadata.get(cfg.end_of_theme_a, last_measure)))
            if last_measure == 0:
                name = data['file'].name
                pwarn(f'Last measure for {name} fil was found to be 0! Remember to update metadata before extraction ;) Setting last measure to the end of the score.\n')
                last_measure = 1000000
            break

    remove_everything_after_measure(score, last_measure)

def remove_everything_after_measure(score, last_measure):
    for part in score.parts:
        read_measures = 0
        elements_to_remove = []
        for measure in part.getElementsByClass(Measure):
            read_measures += 1
            if read_measures > last_measure:
                elements_to_remove.append(measure)
        part.remove(targetOrList=elements_to_remove)

def save_xml(data, new_filename):
    new_filename = str(new_filename) + '.xml'
    data[C.DATA_SCORE].write('musicxml', fp=f'{new_filename}')
    
def save_to_midi(filename):
    filename = str(filename)
    new_filename = filename + '.mid'
    if path.exists(new_filename):
            pinfo(f"{filename} already exists as MIDI, skipping it!")
            return
    cmd = ["mscore", "-fo", new_filename, filename + '.xml']
    pinfo(f"Converting {filename} to MIDI")
    try:
        subprocess.run(
            cmd,
            stdout=subprocess.DEVNULL,
            timeout=120,
        )
    except subprocess.TimeoutExpired:
        pwarn(
            f"Continuing because time expired for file {filename}! Try running:\n"
            + "".join(cmd)
        ) 
from musif.config import ExtractConfiguration

custom_config = "config.yml"

cfg = CustomConf(
    None,
    metadata_dir = "your/metadata/dir"
    expand_repeats = False,
    remove_unpitched_objects = True)
data_path = 'data/xml/'
data_path_cutted = Path('data/xml/cutted_themeA/')
for filename in sorted(Path(data_path).glob(f"*.xml")):
        data = {}
        new_filename = data_path_cutted / Path(filename.stem + '_cutted')
        if path.exists(str(new_filename) + '.xml'):
            pinfo(f"{filename} already exists as cutted xml, skipping it!")
            continue
        score = parse_filename(
            filename,
            None,
            expand_repeats=cfg.expand_repeats,
            export_dfs_to = None,
            remove_unpitched_objects=cfg.remove_unpitched_objects,
        )
        data[C.DATA_SCORE] = score
        data[C.DATA_FILE] = filename
        
        cut_by_measures_by_measure(cfg, data)
        data_path_cutted.mkdir(exist_ok=True)
        try:        
            save_xml(data, new_filename)
        except Exception as e:
            perr(f'There was an error saving score {filename} to xml: {e}. Skipping it!')
            continue
        try:        
            save_to_midi(new_filename)
        except Exception as e:
            perr(f'There was an error saving score {filename} to midi: {e}. Skipping it!')

— In this part, we are free to use jSymbolic GUI to extract features from our recent created midi files. Afterwards, just run the following cell in order to join jSymbolic data to musif’s extraction—

Download: https://sourceforge.net/projects/jmir/files/jSymbolic/

Merging musif data with jSymbolic extracted csv

Now, if we want to merge jSymbolic data with our extracted musif df:

import pandas as pd

path_to_musif_df = '.'
df_musif = pd.read_csv(path_to_musif_df, low_memory=False)

path_to_jsymbollic_extracted_csv = 'extracted_feature_values.csv'
df_jSymbolic = pd.read_csv(path_to_jsymbollic_extracted_csv, low_memory=False)
df_jSymbolic.rename(columns={'Unnamed: 0': 'FileName'})
# -- here you must process the FileName column so both values in musif's df and in j_Symbolic match
df_jSymbolic.columns = ['js_' + i for i in df_jSymbolic.columns] 
df_jSymbolic.rename(columns={'js_Unnamed: 0': 'FileName'}, inplace=True)
df_jSymbolic['FileName'] = [i.replace('/Users/carlosvaquero/Downloads/midi_partial/', '').replace('.mid', '.xml') for i in df_jSymbolic['FileName']]

# 

df_total = pd.merge(df_musif, df_jSymbolic, on='FileName')

df_total.to_csv('total.csv', index=False)
---------------------------------------------------------------------------
PermissionError                           Traceback (most recent call last)
Cell In[2], line 4
      1 import pandas as pd
      3 path_to_musif_df = '.'
----> 4 df_musif = pd.read_csv(path_to_musif_df, low_memory=False)
      6 path_to_jsymbollic_extracted_csv = 'extracted_feature_values.csv'
      7 df_jSymbolic = pd.read_csv(path_to_jsymbollic_extracted_csv, low_memory=False)

File c:\Anaconda3\envs\musicai\lib\site-packages\pandas\io\parsers\readers.py:948, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
    935 kwds_defaults = _refine_defaults_read(
    936     dialect,
    937     delimiter,
   (...)
    944     dtype_backend=dtype_backend,
    945 )
    946 kwds.update(kwds_defaults)
--> 948 return _read(filepath_or_buffer, kwds)

File c:\Anaconda3\envs\musicai\lib\site-packages\pandas\io\parsers\readers.py:611, in _read(filepath_or_buffer, kwds)
    608 _validate_names(kwds.get("names", None))
    610 # Create the parser.
--> 611 parser = TextFileReader(filepath_or_buffer, **kwds)
    613 if chunksize or iterator:
    614     return parser

File c:\Anaconda3\envs\musicai\lib\site-packages\pandas\io\parsers\readers.py:1448, in TextFileReader.__init__(self, f, engine, **kwds)
   1445     self.options["has_index_names"] = kwds["has_index_names"]
   1447 self.handles: IOHandles | None = None
-> 1448 self._engine = self._make_engine(f, self.engine)

File c:\Anaconda3\envs\musicai\lib\site-packages\pandas\io\parsers\readers.py:1705, in TextFileReader._make_engine(self, f, engine)
   1703     if "b" not in mode:
   1704         mode += "b"
-> 1705 self.handles = get_handle(
   1706     f,
   1707     mode,
   1708     encoding=self.options.get("encoding", None),
   1709     compression=self.options.get("compression", None),
   1710     memory_map=self.options.get("memory_map", False),
   1711     is_text=is_text,
   1712     errors=self.options.get("encoding_errors", "strict"),
   1713     storage_options=self.options.get("storage_options", None),
   1714 )
   1715 assert self.handles is not None
   1716 f = self.handles.handle

File c:\Anaconda3\envs\musicai\lib\site-packages\pandas\io\common.py:863, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    858 elif isinstance(handle, str):
    859     # Check whether the filename is to be opened in binary mode.
    860     # Binary mode does not support 'encoding' and 'newline'.
    861     if ioargs.encoding and "b" not in ioargs.mode:
    862         # Encoding
--> 863         handle = open(
    864             handle,
    865             ioargs.mode,
    866             encoding=ioargs.encoding,
    867             errors=errors,
    868             newline="",
    869         )
    870     else:
    871         # Binary mode
    872         handle = open(handle, ioargs.mode)

PermissionError: [Errno 13] Permission denied: '.'