XML cutter and midi formatter¶
This script is prepared to parse a folder full of xml files, cut them to a certain length , if desired (according to a specific measure criteria) and output then a midi file with thiss new length.
Afterwards, we are able to process these new midi files by using jSymbolic GUI if we want to include these features in our analyses. This notebook allso provides code for that purpose.
To install musif
:
Download this notebook.
Start
jupyter
in your Anaconda environment.Open this script.
Run the following cell by clicking on it and pressing Ctrl+Enter.
https://musescore.org/es/download
%pip install musif
import os
import subprocess
import sys
from math import floor
from os import path
from pathlib import Path
from music21.stream.base import Measure, Score
from musif.common._utils import read_dicts_from_csv
import musif.extract.constants as C
from musif.extract.extract import parse_filename
from musif.logs import perr, pinfo, pwarn
sys.path.append(os.path.abspath('.'))
from feature_extraction.custom_conf import CustomConf
from musif.config import ExtractConfiguration
import glob
class CustomConf(ExtractConfiguration):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._load_metadata()
def _load_metadata(self) -> None:
self.scores_metadata = {
path.basename(file): read_dicts_from_csv(file)
for file in glob(path.join(self.metadata_dir, "*.csv")) # type: ignore
}
if not self.scores_metadata:
print(
"\nMetadata could not be loaded properly!! Check metadata path in config file.\n"
)
Define specific functions for our script
def cut_by_measures_by_measure(cfg, data):
score: Score = data[C.DATA_SCORE]
last_measure = 1000000
for metadata in cfg.scores_metadata[last_measure]:
if metadata["FileName"] == data["FileName"]:
last_measure = floor(float(metadata.get(cfg.end_of_theme_a, last_measure)))
if last_measure == 0:
name = data['file'].name
pwarn(f'Last measure for {name} fil was found to be 0! Remember to update metadata before extraction ;) Setting last measure to the end of the score.\n')
last_measure = 1000000
break
remove_everything_after_measure(score, last_measure)
def remove_everything_after_measure(score, last_measure):
for part in score.parts:
read_measures = 0
elements_to_remove = []
for measure in part.getElementsByClass(Measure):
read_measures += 1
if read_measures > last_measure:
elements_to_remove.append(measure)
part.remove(targetOrList=elements_to_remove)
def save_xml(data, new_filename):
new_filename = str(new_filename) + '.xml'
data[C.DATA_SCORE].write('musicxml', fp=f'{new_filename}')
def save_to_midi(filename):
filename = str(filename)
new_filename = filename + '.mid'
if path.exists(new_filename):
pinfo(f"{filename} already exists as MIDI, skipping it!")
return
cmd = ["mscore", "-fo", new_filename, filename + '.xml']
pinfo(f"Converting {filename} to MIDI")
try:
subprocess.run(
cmd,
stdout=subprocess.DEVNULL,
timeout=120,
)
except subprocess.TimeoutExpired:
pwarn(
f"Continuing because time expired for file {filename}! Try running:\n"
+ "".join(cmd)
)
from musif.config import ExtractConfiguration
custom_config = "config.yml"
cfg = CustomConf(
None,
metadata_dir = "your/metadata/dir"
expand_repeats = False,
remove_unpitched_objects = True)
data_path = 'data/xml/'
data_path_cutted = Path('data/xml/cutted_themeA/')
for filename in sorted(Path(data_path).glob(f"*.xml")):
data = {}
new_filename = data_path_cutted / Path(filename.stem + '_cutted')
if path.exists(str(new_filename) + '.xml'):
pinfo(f"{filename} already exists as cutted xml, skipping it!")
continue
score = parse_filename(
filename,
None,
expand_repeats=cfg.expand_repeats,
export_dfs_to = None,
remove_unpitched_objects=cfg.remove_unpitched_objects,
)
data[C.DATA_SCORE] = score
data[C.DATA_FILE] = filename
cut_by_measures_by_measure(cfg, data)
data_path_cutted.mkdir(exist_ok=True)
try:
save_xml(data, new_filename)
except Exception as e:
perr(f'There was an error saving score {filename} to xml: {e}. Skipping it!')
continue
try:
save_to_midi(new_filename)
except Exception as e:
perr(f'There was an error saving score {filename} to midi: {e}. Skipping it!')
— In this part, we are free to use jSymbolic GUI to extract features from our recent created midi files. Afterwards, just run the following cell in order to join jSymbolic data to musif’s extraction—
Download: https://sourceforge.net/projects/jmir/files/jSymbolic/
Merging musif data with jSymbolic extracted csv¶
Now, if we want to merge jSymbolic data with our extracted musif df:
import pandas as pd
path_to_musif_df = '.'
df_musif = pd.read_csv(path_to_musif_df, low_memory=False)
path_to_jsymbollic_extracted_csv = 'extracted_feature_values.csv'
df_jSymbolic = pd.read_csv(path_to_jsymbollic_extracted_csv, low_memory=False)
df_jSymbolic.rename(columns={'Unnamed: 0': 'FileName'})
# -- here you must process the FileName column so both values in musif's df and in j_Symbolic match
df_jSymbolic.columns = ['js_' + i for i in df_jSymbolic.columns]
df_jSymbolic.rename(columns={'js_Unnamed: 0': 'FileName'}, inplace=True)
df_jSymbolic['FileName'] = [i.replace('/Users/carlosvaquero/Downloads/midi_partial/', '').replace('.mid', '.xml') for i in df_jSymbolic['FileName']]
#
df_total = pd.merge(df_musif, df_jSymbolic, on='FileName')
df_total.to_csv('total.csv', index=False)
---------------------------------------------------------------------------
PermissionError Traceback (most recent call last)
Cell In[2], line 4
1 import pandas as pd
3 path_to_musif_df = '.'
----> 4 df_musif = pd.read_csv(path_to_musif_df, low_memory=False)
6 path_to_jsymbollic_extracted_csv = 'extracted_feature_values.csv'
7 df_jSymbolic = pd.read_csv(path_to_jsymbollic_extracted_csv, low_memory=False)
File c:\Anaconda3\envs\musicai\lib\site-packages\pandas\io\parsers\readers.py:948, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
935 kwds_defaults = _refine_defaults_read(
936 dialect,
937 delimiter,
(...)
944 dtype_backend=dtype_backend,
945 )
946 kwds.update(kwds_defaults)
--> 948 return _read(filepath_or_buffer, kwds)
File c:\Anaconda3\envs\musicai\lib\site-packages\pandas\io\parsers\readers.py:611, in _read(filepath_or_buffer, kwds)
608 _validate_names(kwds.get("names", None))
610 # Create the parser.
--> 611 parser = TextFileReader(filepath_or_buffer, **kwds)
613 if chunksize or iterator:
614 return parser
File c:\Anaconda3\envs\musicai\lib\site-packages\pandas\io\parsers\readers.py:1448, in TextFileReader.__init__(self, f, engine, **kwds)
1445 self.options["has_index_names"] = kwds["has_index_names"]
1447 self.handles: IOHandles | None = None
-> 1448 self._engine = self._make_engine(f, self.engine)
File c:\Anaconda3\envs\musicai\lib\site-packages\pandas\io\parsers\readers.py:1705, in TextFileReader._make_engine(self, f, engine)
1703 if "b" not in mode:
1704 mode += "b"
-> 1705 self.handles = get_handle(
1706 f,
1707 mode,
1708 encoding=self.options.get("encoding", None),
1709 compression=self.options.get("compression", None),
1710 memory_map=self.options.get("memory_map", False),
1711 is_text=is_text,
1712 errors=self.options.get("encoding_errors", "strict"),
1713 storage_options=self.options.get("storage_options", None),
1714 )
1715 assert self.handles is not None
1716 f = self.handles.handle
File c:\Anaconda3\envs\musicai\lib\site-packages\pandas\io\common.py:863, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
858 elif isinstance(handle, str):
859 # Check whether the filename is to be opened in binary mode.
860 # Binary mode does not support 'encoding' and 'newline'.
861 if ioargs.encoding and "b" not in ioargs.mode:
862 # Encoding
--> 863 handle = open(
864 handle,
865 ioargs.mode,
866 encoding=ioargs.encoding,
867 errors=errors,
868 newline="",
869 )
870 else:
871 # Binary mode
872 handle = open(handle, ioargs.mode)
PermissionError: [Errno 13] Permission denied: '.'