Source code for musif.common.sort

import warnings
from logging import getLogger
from typing import List, Dict

from musif.config import LOGGER_NAME
from pandas import DataFrame


[docs] def sort_list(list_to_sort: List[str], reference_list: List[str]) -> List[str]: """ Sorts first list based on the second one. Those elements that are not present in the reference list will be placed at the end. Returns the same list re-ordered. Parameters ---------- list_to_sort : list List that needs to be sorted according to some criteria reference_list : str List used as reference to sort the first one. """ sort_dictionary = {elem: i for i, elem in enumerate(reference_list)} found = [elem for elem in list_to_sort if elem in sort_dictionary] orphans = [elem for elem in list_to_sort if elem not in sort_dictionary] return sorted(found, key=lambda x: sort_dictionary[x]) + orphans
[docs] def sort_dict(dict_to_sort: dict, reference_list: list) -> dict: """ Sorts dictionary keys according to a reference li() Parameters ---------- dict_to_sort : dict Dictionary that needs to be sorted according to some criteria main_list : list List used as reference to sort the first one. """ indexes = [] oprhans = [] for i in dict_to_sort: if i in reference_list: indexes.append(reference_list.index(i)) else: oprhans.append({i: dict_to_sort[i]}) getLogger(LOGGER_NAME).warning( "We do not have the appropiate sorting for {}".format(i) ) indexes = sorted(indexes) list_sorted = [ {reference_list[i]: dict_to_sort[reference_list[i]]} for i in indexes ] list_sorted = list_sorted + oprhans dict_sorted = dict((key, d[key]) for d in list_sorted for key in d) return dict_sorted
[docs] def sort_columns(data: DataFrame, sorting_list: list) -> DataFrame: """ Reorders columns of a Dataframe according to a reference list. Uses sort_list. Parameters ---------- data : DataFrame DataFrame whose columns needs to be re-ordered according to some criteria sorting_list : list List used as reference to sort columns. """ cols = sort_list(data.columns.tolist(), [i for i in sorting_list]) data = data[cols] return data
[docs] def sort_dataframe( data: DataFrame, column: str, sorting_lists: Dict[list, str], key_to_sort: str ) -> DataFrame: """ Sorts Dataframe's rows by a column using a list as a reference. Parameters ---------- data: DataFrame DataFrame to be re-ordered according to some criteria. column: str Column of the Dataframe used as key. sorting_lists: Dict[list] Dictionary containing lists used as reference to sort values. key_to_sort: str Key from sorting_lists that contains the desired list to be used as reference. """ if key_to_sort == "Alphabetic": dataSorted = data.sort_values(by=[column]) else: form_list = sorting_lists[key_to_sort] # es global indexes = [] for i in data[column]: if str(i).lower().strip() not in ["nan", "nd"]: value = ( i.strip() if key_to_sort not in ["FormSorting", "CharacterSorting"] else i.strip().lower() ) try: # index = form_list.index(value) index = form_list.index(value) if value in form_list else 999 except ValueError: index = 999 warnings.warn( "We do not have the value {} in the sorting list {}".format( value, key_to_sort ) ) indexes.append(index) else: indexes.append(999) # at the end of the list data.loc[:, "Ranks"] = indexes dataSorted = data.sort_values(by=["Ranks"]) dataSorted.drop("Ranks", 1, inplace=True) return dataSorted