DR error measures: Difference between revisions
| Line 117: | Line 117: | ||
* Naturally deals with the ''x'' > 0 constraint using a log barrier | * Naturally deals with the ''x'' > 0 constraint using a log barrier | ||
==== Python code ==== | ==== Python code ==== | ||
<pre> | <pre>import numpy as np | ||
import numpy as np | |||
import math | import math | ||
from functools import reduce | from functools import reduce | ||
| Line 129: | Line 128: | ||
self.success = success | self.success = success | ||
def numerical_gradient(f, x, eps = 1e-8): | def numerical_gradient(f, x, eps=1e-8): | ||
grad = np. | grad = np.zeros_like(x) | ||
for i in range(len(x)): | for i in range(len(x)): | ||
x_plus = x.copy() | x_plus = x.copy() | ||
x_minus = x.copy() | x_minus = x.copy() | ||
x_plus[i] += eps | x_plus[i] += eps | ||
x_minus[i] | x_minus[i] -= eps | ||
grad[i] = (f(x_plus) - f(x_minus)) / (2 * eps) | grad[i] = (f(x_plus) - f(x_minus)) / (2 * eps) | ||
return grad | return grad | ||
def l_bfgs(f, grad, x0, history_size = 10, max_iterations = 100, tolerance = 1e- | def l_bfgs(f, grad, x0, history_size=10, max_iterations=100, tolerance=1e-5): | ||
x = np.array(x0, dtype=float) # Ensure float array | |||
x | n = len(x) | ||
fx = f(x) | fx = f(x) | ||
g = grad(x) | g = grad(x) | ||
| Line 149: | Line 148: | ||
rho_history = [] | rho_history = [] | ||
for | for iteration in range(max_iterations): | ||
# Check convergence | # Check convergence using numpy norm | ||
grad_norm = | grad_norm = np.linalg.norm(g) | ||
if grad_norm < tolerance: | if grad_norm < tolerance: | ||
return LBFGSSolution(x, fx, | return LBFGSSolution(x, fx, iteration, True) | ||
# | # --- Two-loop recursion --- | ||
q = g.copy() | q = g.copy() | ||
# | # We need to store alpha to use it in the second loop | ||
m = len(s_history) | |||
alpha = [0.0] * m | |||
gamma = (s_history[ | # First loop (Backward) | ||
for i in range(m - 1, -1, -1): | |||
alpha[i] = rho_history[i] * np.dot(s_history[i], q) | |||
q = q - alpha[i] * y_history[i] # Vectorized update | |||
# Initial Hessian approximation scaling | |||
gamma = 1.0 | |||
if m > 0: | |||
gamma = np.dot(s_history[-1], y_history[-1]) / np.dot(y_history[-1], y_history[-1]) | |||
z = q * gamma | z = q * gamma | ||
for i in range( | # Second loop (Forward) | ||
beta = rho_history[i] * (y_history[i] | for i in range(m): | ||
beta = rho_history[i] * np.dot(y_history[i], z) | |||
z = z + s_history[i] * (alpha[i] - beta) # Vectorized update | |||
p = -z # Search direction | p = -z # Search direction | ||
# Line search ( | # --- Line search (Backtracking) --- | ||
step_size = 1.0 | step_size = 1.0 | ||
c1 = 1e-4 | c1 = 1e-4 | ||
rho_ls = 0.5 | rho_ls = 0.5 | ||
max_line_search = 20 | max_line_search = 20 | ||
# Vectorized check for sufficient decrease (Armijo rule) | |||
# f(x + alpha*p) <= f(x) + c1 * alpha * p^T * g | |||
g_dot_p = np.dot(g, p) | |||
for _ in range(max_line_search): | for _ in range(max_line_search): | ||
if fx_new <= fx + c1 * step_size * | x_new = x + step_size * p | ||
fx_new = f(x_new) | |||
if fx_new <= fx + c1 * step_size * g_dot_p: | |||
break | break | ||
step_size *= rho_ls | step_size *= rho_ls | ||
else: | |||
# If line search fails to find a better point, stop or accept best attempt | |||
# (Here we just accept the last reduced step_size) | |||
pass | |||
# Update | # --- Update History --- | ||
s = | s = x_new - x | ||
g_new = grad(x_new) | g_new = grad(x_new) | ||
y = | y = g_new - g | ||
sy = s | sy = np.dot(s, y) | ||
if sy > 1e-10: | |||
if sy > 1e-10: # Ensure curvature condition | |||
s_history.append(s) | s_history.append(s) | ||
y_history.append(y) | y_history.append(y) | ||
rho_history.append(1 / sy) | rho_history.append(1.0 / sy) | ||
if s_history | # Maintain history size | ||
s_history | if len(s_history) > history_size: | ||
y_history | s_history.pop(0) | ||
rho_history | y_history.pop(0) | ||
rho_history.pop(0) | |||
x = x_new | x = x_new | ||
| Line 216: | Line 227: | ||
return LBFGSSolution(x, fx, max_iterations, False) | return LBFGSSolution(x, fx, max_iterations, False) | ||
def | def l_bfgs_barrier(f, bounds, x0, history_size=10, max_iterations=100, tolerance=1e-5, barrier_weight=1e-4): | ||
""" | |||
Solves bounded optimization using a Log-Barrier method wrapped around L-BFGS. | |||
Note: x0 must be strictly feasible (inside bounds). | |||
""" | |||
def transformed_f(x): | def transformed_f(x): | ||
penalty = 0 | penalty = 0 | ||
for i in | for i, val in enumerate(x): | ||
lower, upper = bounds[i] | |||
if lower is not None and | |||
# Hard cutoff prevents log domain errors during line search exploration | |||
if (lower is not None and val <= lower) or (upper is not None and val >= upper): | |||
return float("inf") | return float("inf") | ||
# | # Log barrier penalties | ||
if lower is not None: | if lower is not None: | ||
penalty -= barrier_weight * math.log( | penalty -= barrier_weight * math.log(val - lower) | ||
if upper is not None: | if upper is not None: | ||
penalty -= barrier_weight * math.log(upper - | penalty -= barrier_weight * math.log(upper - val) | ||
return f(x) + penalty | return f(x) + penalty | ||
# Use the transformed function for gradients as well | |||
grad = lambda x: numerical_gradient(transformed_f, x) | grad = lambda x: numerical_gradient(transformed_f, x) | ||
result = l_bfgs(transformed_f, grad, x0, history_size, max_iterations, tolerance) | result = l_bfgs(transformed_f, grad, x0, history_size, max_iterations, tolerance) | ||
# | |||
# Restore actual function value | |||
result.fx = f(result.x) | result.fx = f(result.x) | ||
return result | return result | ||
Revision as of 04:24, 14 December 2025
This article will describe several least-squares error measures for delta-rational chords. They have the advantage of not fixing a particular interval in the chord when constructing the chord of best fit. However, like any other numerical measure of concordance or error, you should take them with a grain of salt.
Conventions and introduction
The idea motivating least-squares error measures on a chord as an approximation to a given delta signature is the following (for simplicity, let’s talk about the fully DR case first):
We want the error of a chord 1:r1:r2:...:rn (in increasing order), with n > 1, in the linear domain as an approximation to a fully delta-rational chord with signature +δ1 +δ2 ... +δn, i.e. a chord
with root real-valued harmonic x. Let be the delta signature +δ1 +δ2 ... +δn written cumulatively.
We want to measure the error without having to fix any dyad (as one might naively fix a dyad and measure errors in the other deltas in relation to the fixed dyad). To do this we solve a least-squares error problem: use a root-sum-square error function and optimize x (and any free deltas) to minimize that function.
Domain and error model
We have two choices:
- to measure either the linear (frequency ratio) error or the logarithmic (cents) one (called the domain);
- the collection of intervals to sum over (which we call the error model):
- Rooted: Only intervals from the root real-valued harmonic x are chosen.
- Pairwise: All intervals in the chord are compared.
- All-steps: Only intervals between adjacent notes are compared.
We arrive at the following general formula: Let let be the error model, and let represent the domain function (identity or ). Then the error function to be minimized by optimizing and any free deltas is:
| Domain | Error model | Error function |
|---|---|---|
| Linear | Rooted | |
| Pairwise | ||
| All-steps | ||
| Logarithmic (nepers) |
Rooted | |
| Pairwise | ||
| All-steps |
To convert nepers to cents, multiply by
Solution methods
Analytic
Fully DR, rooted linear
Setting the derivative to 0 gives us the closed-form solution
which can be plugged back into
to obtain the least-squares linear error.
Partially DR (one related delta set, one free variable), rooted linear
Suppose we wish to approximate a target delta signature of the form with the chord (where the +? is free to vary). The least-squares problem is
where y represents the free delta +?.
We can set the partial derivatives with respect to x and y of the inner expression equal to zero (since the derivative of sqrt() is never 0) and use SymPy to solve the system symbolically:
import sympy
x = sympy.Symbol("x", real=True)
y = sympy.Symbol("y", real=True)
d1 = sympy.Symbol("\\delta_{1}", real=True)
d2 = sympy.Symbol("\\delta_{2}", real=True)
d3 = sympy.Symbol("\\delta_{3}", real=True)
r1 = sympy.Symbol("r_1", real=True)
r2 = sympy.Symbol("r_2", real=True)
r3 = sympy.Symbol("r_3", real=True)
err_squared = ((x + d1) / x - r1) ** 2 + ((x + d1 + y) / x - r2) ** 2 + ((x + d1 + y + d3) / x - r3) ** 2
err_squared.expand()
err_squared_x = sympy.diff(err_squared, x)
err_squared_y = sympy.diff(err_squared, y)
sympy.nonlinsolve([err_squared_x, err_squared_y], [x, y])The unique solution with x > 0 is
Grid method (FDR case)
L-BFGS-B (one related delta set, arbitrary free deltas)
We let x1 = x and include additional free variables x2, ..., xn, one for every additional +?, after coalescing strings of consecutive +?'s into segments and after trimming leading and trailing free delta segments.
L-BFGS-B is a quasi-Newton optimization method particularly suited for this problem:
- The error function is smooth, allowing use of gradients
- Fast convergence, requiring at worst 20 iterations for accuracy
- Naturally deals with the x > 0 constraint using a log barrier
Python code
import numpy as np
import math
from functools import reduce
class LBFGSSolution:
def __init__(self, x, fx, iterations, success):
self.x = x
self.fx = fx
self.iterations = iterations
self.success = success
def numerical_gradient(f, x, eps=1e-8):
grad = np.zeros_like(x)
for i in range(len(x)):
x_plus = x.copy()
x_minus = x.copy()
x_plus[i] += eps
x_minus[i] -= eps
grad[i] = (f(x_plus) - f(x_minus)) / (2 * eps)
return grad
def l_bfgs(f, grad, x0, history_size=10, max_iterations=100, tolerance=1e-5):
x = np.array(x0, dtype=float) # Ensure float array
n = len(x)
fx = f(x)
g = grad(x)
s_history = []
y_history = []
rho_history = []
for iteration in range(max_iterations):
# Check convergence using numpy norm
grad_norm = np.linalg.norm(g)
if grad_norm < tolerance:
return LBFGSSolution(x, fx, iteration, True)
# --- Two-loop recursion ---
q = g.copy()
# We need to store alpha to use it in the second loop
m = len(s_history)
alpha = [0.0] * m
# First loop (Backward)
for i in range(m - 1, -1, -1):
alpha[i] = rho_history[i] * np.dot(s_history[i], q)
q = q - alpha[i] * y_history[i] # Vectorized update
# Initial Hessian approximation scaling
gamma = 1.0
if m > 0:
gamma = np.dot(s_history[-1], y_history[-1]) / np.dot(y_history[-1], y_history[-1])
z = q * gamma
# Second loop (Forward)
for i in range(m):
beta = rho_history[i] * np.dot(y_history[i], z)
z = z + s_history[i] * (alpha[i] - beta) # Vectorized update
p = -z # Search direction
# --- Line search (Backtracking) ---
step_size = 1.0
c1 = 1e-4
rho_ls = 0.5
max_line_search = 20
# Vectorized check for sufficient decrease (Armijo rule)
# f(x + alpha*p) <= f(x) + c1 * alpha * p^T * g
g_dot_p = np.dot(g, p)
for _ in range(max_line_search):
x_new = x + step_size * p
fx_new = f(x_new)
if fx_new <= fx + c1 * step_size * g_dot_p:
break
step_size *= rho_ls
else:
# If line search fails to find a better point, stop or accept best attempt
# (Here we just accept the last reduced step_size)
pass
# --- Update History ---
s = x_new - x
g_new = grad(x_new)
y = g_new - g
sy = np.dot(s, y)
if sy > 1e-10: # Ensure curvature condition
s_history.append(s)
y_history.append(y)
rho_history.append(1.0 / sy)
# Maintain history size
if len(s_history) > history_size:
s_history.pop(0)
y_history.pop(0)
rho_history.pop(0)
x = x_new
fx = fx_new
g = g_new
return LBFGSSolution(x, fx, max_iterations, False)
def l_bfgs_barrier(f, bounds, x0, history_size=10, max_iterations=100, tolerance=1e-5, barrier_weight=1e-4):
"""
Solves bounded optimization using a Log-Barrier method wrapped around L-BFGS.
Note: x0 must be strictly feasible (inside bounds).
"""
def transformed_f(x):
penalty = 0
for i, val in enumerate(x):
lower, upper = bounds[i]
# Hard cutoff prevents log domain errors during line search exploration
if (lower is not None and val <= lower) or (upper is not None and val >= upper):
return float("inf")
# Log barrier penalties
if lower is not None:
penalty -= barrier_weight * math.log(val - lower)
if upper is not None:
penalty -= barrier_weight * math.log(upper - val)
return f(x) + penalty
# Use the transformed function for gradients as well
grad = lambda x: numerical_gradient(transformed_f, x)
result = l_bfgs(transformed_f, grad, x0, history_size, max_iterations, tolerance)
# Restore actual function value
result.fx = f(result.x)
return result
