DR error measures: Difference between revisions

From Xenharmonic Reference
Line 117: Line 117:
* Naturally deals with the ''x'' > 0 constraint using a log barrier
* Naturally deals with the ''x'' > 0 constraint using a log barrier
==== Python code ====
==== Python code ====
<pre>
import numpy as np
import math
from functools import reduce
class LBFGSSolution:
    def __init__(self, x, fx, iterations, success):
        self.x = x
        self.fx = fx
        self.iterations = iterations
        self.success = success
def numerical_gradient(f, x, eps = 1e-8):
    grad = np.empty(len(x))
    for i in range(len(x)):
        x_plus = x.copy()
        x_minus = x.copy()
        x_plus[i] += eps
        x_minus[i] += eps
        grad[i] = (f(x_plus) - f(x_minus)) / (2 * eps)
    return grad
def l_bfgs(f, grad, x0, history_size = 10, max_iterations = 100, tolerance = 1e-8):
    n = len(x0)
    x = x0
    fx = f(x)
    g = grad(x)
   
    s_history = []
    y_history = []
    rho_history = []
   
    for iter in range(max_iterations):
        # Check convergence
        grad_norm = math.sqrt(reduce(lambda sum, gi: sum + gi * gi, g, 0))
        if grad_norm < tolerance:
            return LBFGSSolution(x, fx, iter, True)
       
        # Compute search direction using 2-loop recursion
        q = g.copy()
        alpha = []
        for i in range(len(s_history) - 1, -1. -1):
            alpha[i] = rho_history[i] * s_history[i] @ q
            for j in range(n):
                q[j] -= alpha[i] * y_history[i][j]
       
        # Initial Hessian approx
        gamma = 1
        if len(s_history) > 0:
            k = len(s_history) - 1
            gamma = (s_history[k] @ y_history[k]) / (y_history[k] @ y_history[k])
       
        z = q * gamma
        for i in range(len(s_history)):
            beta = rho_history[i] * (y_history[i] @ z)
            for j in range(n):
                z[j] += s_history[i][j] * (alpha[i] - beta)
       
        p = -z # Search direction
        # Line search (simple backtracking)
        step_size = 1.0
        c1 = 1e-4
        rho_ls = 0.5
        max_line_search = 20
        x_new = np.array(map(lambda xi, i: xi + step_size * p[i], x))
        fx_new = f(x_new)
        for _ in range(max_line_search):
            if fx_new <= fx + c1 * step_size * (g @ p):
                break
            step_size *= rho_ls
            x_new = map(lambda xi, i: xi + step_size * p[i])
            fx_new = f(x_new)
       
        # Update history
        s = np.array(map(lambda xi, i: xi - x[i], x_new))
        g_new = grad(x_new)
        y = np.array(map(lambda gi, i: gi - g[i], g_new))
        sy = s @ y
        if sy > 1e-10:
            s_history.append(s)
            y_history.append(y)
            rho_history.append(1 / sy)
           
            if s_history.length > history_size:
                s_history = s_history[1:]
                y_history = y_history[1:]
                rho_history = rho_history[1:]
           
        x = x_new
        fx = fx_new
        g = g_new
   
    return LBFGSSolution(x, fx, max_iterations, False)
def l_bfgs_b(f, bounds, x0, history_size = 10, max_iterations = 100, tolerance = 1e-8, barrier_weight = 1e-6):
    def transformed_f(x):
        penalty = 0
        for i in range(len(x)):
            [lower, upper] = bounds[i]
            if lower is not None and x[i] <= lower:
                return float("inf")
            if upper is not None and x[i] >= upper:
                return float("inf")
           
            # log barrier penalties
            if lower is not None:
                penalty -= barrier_weight * math.log(x[i] - lower)
            if upper is not None:
                penalty -= barrier_weight * math.log(upper - x[i])
       
        return f(x) + penalty
   
    grad = lambda x: numerical_gradient(transformed_f, x)
    result = l_bfgs(transformed_f, grad, x0, history_size, max_iterations, tolerance)
    # Return the actual function value, not the transformed one
    result.fx = f(result.x)
    return result
</pre>


== External links ==
== External links ==
* [https://inthar-raven.github.io/delta/ Inthar's DR chord explorer (includes least-squares error calculation in both domains and multiple error models)]
* [https://inthar-raven.github.io/delta/ Inthar's DR chord explorer (includes least-squares error calculation in both domains and multiple error models)]
[[Category:Atypical ratios]]
[[Category:Atypical ratios]]

Revision as of 04:19, 14 December 2025

This is a technical or mathematical page. While the subject may be of some relevance to music, the page treats the subject in technical language.

This article will describe several least-squares error measures for delta-rational chords. They have the advantage of not fixing a particular interval in the chord when constructing the chord of best fit. However, like any other numerical measure of concordance or error, you should take them with a grain of salt.

Conventions and introduction

The idea motivating least-squares error measures on a chord as an approximation to a given delta signature is the following (for simplicity, let’s talk about the fully DR case first):

We want the error of a chord 1:r1:r2:...:rn (in increasing order), with n > 1, in the linear domain as an approximation to a fully delta-rational chord with signature +δ12 ... +δn, i.e. a chord

x:x+δ1::x+l=1nδl

with root real-valued harmonic x. Let D0=0,Di=k=1iδk be the delta signature +δ12 ... +δn written cumulatively.

We want to measure the error without having to fix any dyad (as one might naively fix a dyad and measure errors in the other deltas in relation to the fixed dyad). To do this we solve a least-squares error problem: use a root-sum-square error function and optimize x (and any free deltas) to minimize that function.

Domain and error model

We have two choices:

  • to measure either the linear (frequency ratio) error or the logarithmic (cents) one (called the domain);
  • the collection of intervals to sum over (which we call the error model):
    • Rooted: Only intervals from the root real-valued harmonic x are chosen.
    • Pairwise: All intervals in the chord are compared.
    • All-steps: Only intervals between adjacent notes are compared.

We arrive at the following general formula: Let [n]={0,1,2,...,n}, let I([n]2) be the error model, and let fD represent the domain function (identity or log). Then the error function to be minimized by optimizing x and any free deltas is:

i<j,{i,j}I(fD(x+Djx+Di)fD(rjri))2.

Error function for various modes and error models
Domain Error model Error function
Linear Rooted i=1n(x+Dixri)2=i=1n(1+Dixri)2
Pairwise 0i<jn(x+Djx+Dirjri)2
All-steps 0i<n(x+Di+1x+Diri+1ri)2
Logarithmic
(nepers)
Rooted i=1n(logx+Dirix)2
Pairwise 0i<jn(logx+Djx+Dilogrjri)2
All-steps 0i<n(logx+Di+1x+Dilogri+1ri)2

To convert nepers to cents, multiply by 1200log2.

Solution methods

Analytic

Fully DR, rooted linear

Setting the derivative to 0 gives us the closed-form solution

x=i=1nDin+i=1nri,

which can be plugged back into

1=1n(1+Dixri)2

to obtain the least-squares linear error.

Suppose we wish to approximate a target delta signature of the form +δ1+?+δ3 with the chord 1:r1:r2:r3 (where the +? is free to vary). The least-squares problem is

minimizex,y(x+δ1xr1)2+(x+δ1+yxr2)2+(x+δ1+y+δ3xr3)2,

where y represents the free delta +?.

We can set the partial derivatives with respect to x and y of the inner expression equal to zero (since the derivative of sqrt() is never 0) and use SymPy to solve the system symbolically:

import sympy
x = sympy.Symbol("x", real=True)
y = sympy.Symbol("y", real=True)
d1 = sympy.Symbol("\\delta_{1}", real=True)
d2 = sympy.Symbol("\\delta_{2}", real=True)
d3 = sympy.Symbol("\\delta_{3}", real=True)
r1 = sympy.Symbol("r_1", real=True)
r2 = sympy.Symbol("r_2", real=True)
r3 = sympy.Symbol("r_3", real=True)
err_squared = ((x + d1) / x - r1) ** 2 + ((x + d1 + y) / x - r2) ** 2 + ((x + d1 + y + d3) / x - r3) ** 2
err_squared.expand()
err_squared_x = sympy.diff(err_squared, x)
err_squared_y = sympy.diff(err_squared, y)
sympy.nonlinsolve([err_squared_x, err_squared_y], [x, y])

The unique solution with x > 0 is

x=2δ1+δ3+2yr2+r32,

y=2δ12r1+δ12r2+δ12r3δ1δ3r1+δ1δ3r2δ1δ3r3+δ1δ3+δ32r2δ322δ1r12δ1δ3r2+δ3r3.

Grid method (FDR case)

We let x1 = x and include additional free variables x2, ..., xn, one for every additional +?, after coalescing strings of consecutive +?'s into segments and after trimming leading and trailing free delta segments.

L-BFGS-B is a quasi-Newton optimization method particularly suited for this problem:

  • The error function is smooth, allowing use of gradients
  • Fast convergence, requiring at worst 20 iterations for accuracy
  • Naturally deals with the x > 0 constraint using a log barrier

Python code

import numpy as np
import math
from functools import reduce

class LBFGSSolution:
    def __init__(self, x, fx, iterations, success):
        self.x = x
        self.fx = fx
        self.iterations = iterations
        self.success = success

def numerical_gradient(f, x, eps = 1e-8):
    grad = np.empty(len(x))
    for i in range(len(x)):
        x_plus = x.copy()
        x_minus = x.copy()
        x_plus[i] += eps
        x_minus[i] += eps
        grad[i] = (f(x_plus) - f(x_minus)) / (2 * eps)
    return grad

def l_bfgs(f, grad, x0, history_size = 10, max_iterations = 100, tolerance = 1e-8):
    n = len(x0)
    x = x0
    fx = f(x)
    g = grad(x)
    
    s_history = []
    y_history = []
    rho_history = []
    
    for iter in range(max_iterations):
        # Check convergence
        grad_norm = math.sqrt(reduce(lambda sum, gi: sum + gi * gi, g, 0))
        if grad_norm < tolerance:
            return LBFGSSolution(x, fx, iter, True)
        
        # Compute search direction using 2-loop recursion
        q = g.copy()
        alpha = []
        for i in range(len(s_history) - 1, -1. -1):
            alpha[i] = rho_history[i] * s_history[i] @ q
            for j in range(n):
                q[j] -= alpha[i] * y_history[i][j]
        
        # Initial Hessian approx
        gamma = 1
        if len(s_history) > 0:
            k = len(s_history) - 1
            gamma = (s_history[k] @ y_history[k]) / (y_history[k] @ y_history[k])
        
        z = q * gamma

        for i in range(len(s_history)):
            beta = rho_history[i] * (y_history[i] @ z)
            for j in range(n):
                z[j] += s_history[i][j] * (alpha[i] - beta)
        
        p = -z # Search direction

        # Line search (simple backtracking)
        step_size = 1.0
        c1 = 1e-4
        rho_ls = 0.5
        max_line_search = 20

        x_new = np.array(map(lambda xi, i: xi + step_size * p[i], x))
        fx_new = f(x_new)

        for _ in range(max_line_search):
            if fx_new <= fx + c1 * step_size * (g @ p):
                break
            step_size *= rho_ls
            x_new = map(lambda xi, i: xi + step_size * p[i])
            fx_new = f(x_new)
        
        # Update history
        s = np.array(map(lambda xi, i: xi - x[i], x_new))
        g_new = grad(x_new)
        y = np.array(map(lambda gi, i: gi - g[i], g_new))

        sy = s @ y
        if sy > 1e-10:
            s_history.append(s)
            y_history.append(y)
            rho_history.append(1 / sy)
            
            if s_history.length > history_size:
                s_history = s_history[1:]
                y_history = y_history[1:]
                rho_history = rho_history[1:]
            
        x = x_new
        fx = fx_new
        g = g_new
    
    return LBFGSSolution(x, fx, max_iterations, False)

def l_bfgs_b(f, bounds, x0, history_size = 10, max_iterations = 100, tolerance = 1e-8, barrier_weight = 1e-6):
    def transformed_f(x):
        penalty = 0
        for i in range(len(x)):
            [lower, upper] = bounds[i]
            if lower is not None and x[i] <= lower:
                return float("inf")
            if upper is not None and x[i] >= upper:
                return float("inf")
            
            # log barrier penalties
            if lower is not None:
                penalty -= barrier_weight * math.log(x[i] - lower)
            if upper is not None:
                penalty -= barrier_weight * math.log(upper - x[i])
        
        return f(x) + penalty
    
    grad = lambda x: numerical_gradient(transformed_f, x)
    result = l_bfgs(transformed_f, grad, x0, history_size, max_iterations, tolerance)
    # Return the actual function value, not the transformed one
    result.fx = f(result.x)
    return result