pre-release

angelfaraldo · Dec 11, 2017 · fdde624 · fdde624
1 parent d2e5d91
commit fdde624
Show file tree

Hide file tree

Showing 2 changed files with 266 additions and 67 deletions.
diff --git a/miran/key.py b/miran/key.py
@@ -7,10 +7,17 @@
 import madmom as md
 
 from miran.vector import *
+from miran.format import int_to_key
 from miran.utils import bin_to_pc
 from miran.defs import KEY_SETTINGS, KEY_LABELS
 
 
+# TODO: we should try to separate tonic from mode id...
+# TODO: si la confidencia es relativaente alta pero hay muchos candidatos posiblemente sea atonical
+# TODO: si la confidencia es baja y hay pocos candidatos posiblemente sea unpitched
+
+
+
 def _select_profile_type(profile, templates_dict):
     try:
         return templates_dict[profile]
@@ -50,7 +57,9 @@ def _dur_to_endtime(**kwargs):
         return None
 
 
-def _key2(pcp, profile_type='bgate', interpolation='linear', conf_thres=0.5):
+def _key2(pcp, profile_type='bgate', interpolation='linear', candidates=4, conf_thres=0.5):
+    # for some reason, increasing the number of candidates estropea el proceso
+
     key_templates = {
 
         'bgate': np.array([[1., 0.00, 0.42, 0.00, 0.53, 0.37, 0.00, 0.77, 0.00, 0.38, 0.21, 0.30],
@@ -125,68 +134,180 @@ def _key2(pcp, profile_type='bgate', interpolation='linear', conf_thres=0.5):
     if (pcp.size < 12) or (pcp.size % 12 != 0):
         raise IndexError("Input PCP size is not a positive multiple of 12")
 
-    _major, _minor = _select_profile_type(profile_type, key_templates)
-
-    if _major.size > pcp.size:
-        pcp = resize_vector(pcp, _major.size)
-
-    if _major.size < pcp.size:
-        _major = resize_vector(_major, pcp.size, interpolation)
-        _minor = resize_vector(_minor, pcp.size, interpolation)
-
-    first_max_major = -1
-    second_max_major = -1
-    key_index_major = -1
-
-    first_max_minor = -1
-    second_max_minor = -1
-    key_index_minor = -1
-
-    for shift in np.arange(pcp.size):
-        correlation_major = crosscorrelation(pcp, np.roll(_major, shift))
-        if correlation_major > first_max_major:
-            second_max_major = first_max_major
-            first_max_major = correlation_major
-            key_index_major = shift
-
-        correlation_minor = crosscorrelation(pcp, np.roll(_minor, shift))
-        if correlation_minor > first_max_minor:
-            second_max_minor = first_max_minor
-            first_max_minor = correlation_minor
-            key_index_minor = shift
-
-    if first_max_major > first_max_minor:
-        key_index = key_index_major
-        scale = 'major'
-        first_max = first_max_major
-        second_max = second_max_major
-    elif first_max_minor > first_max_major:
-        key_index = key_index_minor
-        scale = 'minor'
-        first_max = first_max_minor
-        second_max = second_max_minor
-    else:
-        key_index = -1
-        first_max = -1
-        second_max = -1
-        scale = 'unknown'
-
-    key_index /= pcp.size / 12.
-    key_index = int(np.round(key_index)) % 12
-
-    if key_index < 0:
-        raise IndexError("key_index smaller than zero. Could not find key.")
-    else:
-        first_to_second_ratio = (first_max - second_max) / first_max
-        if first_max < conf_thres:
-            #return 'NoTonic', 'NoMode', first_max, first_to_second_ratio
-            return 'NoTonic', 'NoMode', first_max, first_to_second_ratio
-            # TODO: we should try to separate tonic from mode id...
-            # TODO: si la confidencia es relativaente alta pero hay muchos candidatos posiblemente sea atonical
-            # TODO: si la confidencia es baja y hay pocos candidatos posiblemente sea unpitched
-        else:
-            return KEY_LABELS[key_index], scale, first_max, first_to_second_ratio
-
+    _key_profiles = _select_profile_type(profile_type, key_templates)
+
+    # TODO: IMPORTANT FIX THIS!
+    # if _key_profiles[0].size > pcp.size:
+    #    pcp = resize_vector(pcp, _key_profiles[0].size)
+
+    #    if _key_profiles[0].size < pcp.size:
+    #        _major = resize_vector(_major, pcp.size, interpolation)
+    #        _minor = resize_vector(_minor, pcp.size, interpolation)
+
+    corr_values = []
+
+    for profile in _key_profiles:
+        for shift in np.arange(pcp.size):
+            corr_values.append(crosscorrelation(pcp, np.roll(profile, shift)))
+
+    corr_indexes = np.argpartition(corr_values, -candidates)[-candidates:]
+
+    keys = []
+    keys_confidences = []
+
+    for index in corr_indexes[::-1]:
+        keys.append(int_to_key(index))
+        keys_confidences.append(corr_values[index])
+
+    first_to_second_ratio = (corr_values[0] - corr_values[1])  # / corr_values[0]
+
+    if keys_confidences[0] < conf_thres:
+         return 'X', keys_confidences[0], first_to_second_ratio
+
+    if np.mean(keys_confidences - corr_values[0]) < 0.1:
+        print("Too many key Candidates, random result")
+
+    return keys[0], keys, keys_confidences, first_to_second_ratio
+
+
+
+# def _key2(pcp, profile_type='bgate', interpolation='linear', conf_thres=0.5):
+#     key_templates = {
+#
+#         'bgate': np.array([[1., 0.00, 0.42, 0.00, 0.53, 0.37, 0.00, 0.77, 0.00, 0.38, 0.21, 0.30],
+#                            [1., 0.00, 0.36, 0.39, 0.00, 0.38, 0.00, 0.74, 0.27, 0.00, 0.42, 0.23]]),
+#
+#         # almost identical to bgate. kept for backwards compatibility
+#         'bmtg3': np.array([[1.00, 0.00, 0.42, 0.00, 0.53, 0.37, 0.00, 0.76, 0.00, 0.38, 0.21, 0.30],
+#                            [1.00, 0.00, 0.36, 0.39, 0.10, 0.37, 0.00, 0.76, 0.27, 0.00, 0.42, 0.23]]),
+#
+#         'bmtg2': np.array([[1.00, 0.10, 0.42, 0.10, 0.53, 0.37, 0.10, 0.77, 0.10, 0.38, 0.21, 0.30],
+#                            [1.00, 0.10, 0.36, 0.39, 0.29, 0.38, 0.10, 0.74, 0.27, 0.10, 0.42, 0.23]]),
+#
+#         # was originally bmtg1
+#         'braw': np.array([[1., 0.1573, 0.4200, 0.1570, 0.5296, 0.3669, 0.1632, 0.7711, 0.1676, 0.3827, 0.2113, 0.2965],
+#                           [1., 0.2330, 0.3615, 0.3905, 0.2925, 0.3777, 0.1961, 0.7425, 0.2701, 0.2161, 0.4228, 0.2272]]),
+#
+#         'diatonic': np.array([[1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1],
+#                               [1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1]]),
+#
+#         'monotonic': np.array([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+#                                [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]]),
+#
+#         'triads': np.array([[1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0],
+#                             [1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0]]),
+#
+#         'edma_ecir': np.array([[0.16519551, 0.04749026, 0.08293076, 0.06687112, 0.09994645, 0.09274123, 0.05294487, 0.13159476, 0.05218986,
+#                                 0.07443653, 0.06940723, 0.0642515],
+#                                [0.17235348, 0.05336489, 0.0761009, 0.10043649, 0.05621498, 0.08527853, 0.0497915, 0.13451001, 0.07458916, 0.05003023,
+#                                 0.09187879, 0.05545106]]),
+#
+#         'edmm_ecir': np.array([[0.083, 0.083, 0.083, 0.083, 0.083, 0.083, 0.083, 0.083, 0.083, 0.083, 0.083, 0.083],
+#                                [0.17235348, 0.04, 0.0761009, 0.12, 0.05621498, 0.08527853, 0.0497915, 0.13451001, 0.07458916, 0.05003023, 0.09187879,
+#                                 0.05545106]]),
+#
+#         'edma': np.array([[1., 0.2875, 0.5020, 0.4048, 0.6050, 0.5614, 0.3205, 0.7966, 0.3159, 0.4506, 0.4202, 0.3889],
+#                           [1., 0.3096, 0.4415, 0.5827, 0.3262, 0.4948, 0.2889, 0.7804, 0.4328, 0.2903, 0.5331, 0.3217]]),
+#
+#         'edmm': np.array([[1., 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
+#                           [1., 0.2321, 0.4415, 0.6962, 0.3262, 0.4948, 0.2889, 0.7804, 0.4328, 0.2903, 0.5331, 0.3217]]),
+#
+#         'krumhansl': np.array([[6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88],
+#                                [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]]),
+#
+#         'temperley99': np.array([[5.0, 2.0, 3.5, 2.0, 4.5, 4.0, 2.0, 4.5, 2.0, 3.5, 1.5, 4.0],
+#                                  [5.0, 2.0, 3.5, 4.5, 2.0, 4.0, 2.0, 4.5, 3.5, 2.0, 1.5, 4.0]]),
+#
+#         'temperley05': np.array([[0.748, 0.060, 0.488, 0.082, 0.67, 0.46, 0.096, 0.715, 0.104, 0.366, 0.057, 0.4],
+#                                  [0.712, 0.084, 0.474, 0.618, 0.049, 0.46, 0.105, 0.747, 0.404, 0.067, 0.133, 0.33]]),
+#
+#         'temperley-essen': np.array([[0.184, 0.001, 0.155, 0.003, 0.191, 0.109, 0.005, 0.214, 0.001, 0.078, 0.004, 0.055],
+#                                      [0.192, 0.005, 0.149, 0.179, 0.002, 0.144, 0.002, 0.201, 0.038, 0.012, 0.053, 0.022]]),
+#
+#         'thpcp': np.array([[0.95162, 0.20742, 0.71758, 0.22007, 0.71341, 0.48841, 0.31431, 1.00000, 0.20957, 0.53657, 0.22585, 0.55363],
+#                            [0.94409, 0.21742, 0.64525, 0.63229, 0.27897, 0.57709, 0.26428, 1.0000, 0.26428, 0.30633, 0.45924, 0.35929]]),
+#
+#         'shaath': np.array([[6.6, 2.0, 3.5, 2.3, 4.6, 4.0, 2.5, 5.2, 2.4, 3.7, 2.3, 3.4],
+#                             [6.5, 2.7, 3.5, 5.4, 2.6, 3.5, 2.5, 5.2, 4.0, 2.7, 4.3, 3.2]]),
+#
+#         'gomez': np.array([[0.82, 0.00, 0.55, 0.00, 0.53, 0.30, 0.08, 1.00, 0.00, 0.38, 0.00, 0.47],
+#                            [0.81, 0.00, 0.53, 0.54, 0.00, 0.27, 0.07, 1.00, 0.27, 0.07, 0.10, 0.36]]),
+#
+#         'faraldo': np.array([[7.0, 2.0, 3.8, 2.3, 4.7, 4.1, 2.5, 5.2, 2.0, 3.7, 3.0, 3.4],
+#                              [7.0, 3.0, 3.8, 4.5, 2.6, 3.5, 2.5, 5.2, 4.0, 2.5, 4.5, 3.0]]),
+#
+#         'pentatonic': np.array([[1.0, 0.1, 0.25, 0.1, 0.5, 0.7, 0.1, 0.8, 0.1, 0.25, 0.1, 0.5],
+#                                 [1.0, 0.2, 0.25, 0.5, 0.1, 0.7, 0.1, 0.8, 0.3, 0.2, 0.6, 0.2]]),
+#
+#         'noland': np.array([[0.0629, 0.0146, 0.061, 0.0121, 0.0623, 0.0414, 0.0248, 0.0631, 0.015, 0.0521, 0.0142, 0.0478],
+#                             [0.0682, 0.0138, 0.0543, 0.0519, 0.0234, 0.0544, 0.0176, 0.067, 0.0349, 0.0297, 0.0401, 0.027]])
+#     }
+#
+#     if (pcp.size < 12) or (pcp.size % 12 != 0):
+#         raise IndexError("Input PCP size is not a positive multiple of 12")
+#
+#     _major, _minor = _select_profile_type(profile_type, key_templates)
+#
+#     if _major.size > pcp.size:
+#         pcp = resize_vector(pcp, _major.size)
+#
+#     if _major.size < pcp.size:
+#         _major = resize_vector(_major, pcp.size, interpolation)
+#         _minor = resize_vector(_minor, pcp.size, interpolation)
+#
+#     first_max_major = -1
+#     second_max_major = -1
+#     key_index_major = -1
+#
+#     first_max_minor = -1
+#     second_max_minor = -1
+#     key_index_minor = -1
+#exo
+#     for shift in np.arange(pcp.size):
+#         correlation_major = crosscorrelation(pcp, np.roll(_major, shift))
+#         if correlation_major > first_max_major:
+#             second_max_major = first_max_major
+#             first_max_major = correlation_major
+#             key_index_major = shift
+#
+#         correlation_minor = crosscorrelation(pcp, np.roll(_minor, shift))
+#         if correlation_minor > first_max_minor:
+#             second_max_minor = first_max_minor
+#             first_max_minor = correlation_minor
+#             key_index_minor = shift
+#
+#     if first_max_major > first_max_minor:
+#         key_index = key_index_major
+#         scale = 'major'
+#         first_max = first_max_major
+#         second_max = second_max_major
+#     elif first_max_minor > first_max_major:
+#         key_index = key_index_minor
+#         scale = 'minor'
+#         first_max = first_max_minor
+#         second_max = second_max_minor
+#     else:
+#         key_index = -1
+#         first_max = -1
+#         second_max = -1
+#         scale = 'unknown'
+#
+#     key_index /= pcp.size / 12.
+#     key_index = int(np.round(key_index)) % 12
+#
+#     if key_index < 0:
+#         raise IndexError("key_index smaller than zero. Could not find key.")
+#     else:
+#         first_to_second_ratio = (first_max - second_max) / first_max
+#         key = '{} {}'.format(KEY_LABELS[key_index], scale)
+#
+#         if first_max < conf_thres:
+#             #return 'NoTonic', 'NoMode', first_max, first_to_second_ratio
+#             return 'X', first_max, first_to_second_ratio
+#         else:
+#             # return KEY_LABELS[key_index], scale, first_max, first_to_second_ratio
+#             return key, first_max, first_to_second_ratio
+#
 
 def _key3(pcp, profile_type='bgate', interpolation='linear', conf_thres=0.5):
     if (pcp.size < 12) or (pcp.size % 12 != 0):
@@ -1015,13 +1136,18 @@ def key_angel(input_audio_file, output_text_file, **kwargs):
     # IMPORTANT! Adjust to essentia's HPCP calculation starting on A...
     chroma = np.roll(chroma, -3 * (kwargs["HPCP_SIZE"] // 12))
 
+    print(list(chroma))
+
+
     if kwargs["USE_THREE_PROFILES"]:
         estimation_1 = _key3(chroma, kwargs["KEY_PROFILE"], kwargs["PROFILE_INTERPOLATION"])
     else:
         estimation_1 = _key2(chroma, kwargs["KEY_PROFILE"], kwargs["PROFILE_INTERPOLATION"])
 
-    key_1 = estimation_1[0] + '\t' + estimation_1[1]
-    correlation_value = estimation_1[2]
+    #key_1 = estimation_1[0] + '\t' + estimation_1[1]
+    #correlation_value = estimation_1[2]
+    key_1 = estimation_1[0]
+    correlation_value = estimation_1[1]
 
     if kwargs["WITH_MODAL_DETAILS"]:
         estimation_2 = _key7(chroma, kwargs["PROFILE_INTERPOLATION"])
@@ -1038,8 +1164,8 @@ def key_angel(input_audio_file, output_text_file, **kwargs):
         key = key_1
 
     textfile = open(output_text_file, 'w')
-    #textfile.write(key + '\t' + str(correlation_value) + '\n')
-    textfile.write(key)
+    textfile.write(key + '\t' + str(correlation_value) + '\n')
+    #textfile.write(key)
     textfile.close()
 
     return key, correlation_value
diff --git a/miran/settings/edmNew.json b/miran/settings/edmNew.json
@@ -0,0 +1,73 @@
+{
+  "DURATION": null,
+  "START_TIME": 0,
+
+  "SAMPLE_RATE": 44100,
+  "WINDOW_SIZE": 32768,
+  "HOP_SIZE": 4096,
+  "WINDOW_SHAPE": "hann",
+
+  "PCP_THRESHOLD": 0.2,
+  "HIGHPASS_CUTOFF": 200,
+  "SPECTRAL_WHITENING": true,
+  "DETUNING_CORRECTION": false,
+  "DETUNING_CORRECTION_SCOPE": "average",
+
+  "MIN_HZ": 25.0,
+  "MAX_HZ": 3500.0,
+  "SPECTRAL_PEAKS_THRESHOLD": 0.0001,
+  "SPECTRAL_PEAKS_MAX": 60,
+
+  "HPCP_BAND_PRESET": false,
+  "HPCP_SPLIT_HZ": 250,
+  "HPCP_HARMONICS": 4,
+  "HPCP_REFERENCE_HZ": 440,
+  "HPCP_NON_LINEAR": false,
+  "HPCP_NORMALIZE": false,
+  "HPCP_SHIFT": false,
+  "HPCP_SIZE": 12,
+  "HPCP_WEIGHT_WINDOW_SEMITONES": 1,
+  "HPCP_WEIGHT_TYPE": "cosine",
+  "PROFILE_INTERPOLATION": "linear",
+
+
+  "KEY_POLYPHONY": false,
+  "KEY_USE_THREE_CHORDS": false,
+  "KEY_HARMONICS": 15,
+  "KEY_SLOPE": 0.2,
+
+  "ANALYSIS_TYPE": "global",
+  "N_WINDOWS": 100,
+  "WINDOW_INCREMENT": 100,
+
+  "KEY_PROFILE": "bgate",
+  "USE_THREE_PROFILES": false,
+  "WITH_MODAL_DETAILS": false,
+
+  "OPTIONS": {
+    "DETUNING_CORRECTION_SCOPE": [
+      "average",
+      "frame"
+    ],
+    "HPCP_NORMALIZE": [
+      "none",
+      "unitSum",
+      "unitMax"
+    ],
+    "HPCP_WEIGHT_TYPE": [
+      "none",
+      "cosine",
+      "squaredCosine"
+    ],
+    "KEY_PROFILE": [
+      "bgate",
+      "braw",
+      "edma",
+      "edmm"
+    ],
+    "ANALYSIS_TYPE": [
+      "global",
+      "local"
+    ]
+  }
+}