# In this example we find a minimum of the Rosenbrock function without bounds
# on independed variables.

def fun_rosenbrock(x):
    return np.array([10 * (x[1] - x[0]**2), (1 - x[0])])

# Notice that we only provide the vector of the residuals. The algorithm
# constructs the cost function as a sum of squares of the residuals, which
# gives the Rosenbrock function. The exact minimum is at ``x = [1.0, 1.0]``.

from scipy.optimize import least_squares
x0_rosenbrock = np.array([2, 2])
res_1 = least_squares(fun_rosenbrock, x0_rosenbrock)
res_1.x
# array([ 1.,  1.])
res_1.cost
# 9.8669242910846867e-30
res_1.optimality
# 8.8928864934219529e-14

# We now constrain the variables, in such a way that the previous solution
# becomes infeasible. Specifically, we require that ``x[1] >= 1.5``, and
# ``x[0]`` left unconstrained. To this end, we specify the `bounds` parameter
# to `least_squares` in the form ``bounds=([-np.inf, 1.5], np.inf)``.

# We also provide the analytic Jacobian:

def jac_rosenbrock(x):
    return np.array([
        [-20 * x[0], 10],
        [-1, 0]])

# Putting this all together, we see that the new solution lies on the bound:

res_2 = least_squares(fun_rosenbrock, x0_rosenbrock, jac_rosenbrock,
                      bounds=([-np.inf, 1.5], np.inf))
res_2.x
# array([ 1.22437075,  1.5       ])
res_2.cost
# 0.025213093946805685
res_2.optimality
# 1.5885401433157753e-07

# Now we solve a system of equations (i.e., the cost function should be zero
# at a minimum) for a Broyden tridiagonal vector-valued function of 100000
# variables:

def fun_broyden(x):
    f = (3 - x) * x + 1
    f[1:] -= x[:-1]
    f[:-1] -= 2 * x[1:]
    return f

# The corresponding Jacobian matrix is sparse. We tell the algorithm to
# estimate it by finite differences and provide the sparsity structure of
# Jacobian to significantly speed up this process.

from scipy.sparse import lil_matrix
def sparsity_broyden(n):
    sparsity = lil_matrix((n, n), dtype=int)
    i = np.arange(n)
    sparsity[i, i] = 1
    i = np.arange(1, n)
    sparsity[i, i - 1] = 1
    i = np.arange(n - 1)
    sparsity[i, i + 1] = 1
    return sparsity
# ...
n = 100000
x0_broyden = -np.ones(n)
# ...
res_3 = least_squares(fun_broyden, x0_broyden,
                      jac_sparsity=sparsity_broyden(n))
res_3.cost
# 4.5687069299604613e-23
res_3.optimality
# 1.1650454296851518e-11

# Let's also solve a curve fitting problem using robust loss function to
# take care of outliers in the data. Define the model function as
# ``y = a + b * exp(c * t)``, where t is a predictor variable, y is an
# observation and a, b, c are parameters to estimate.

# First, define the function which generates the data with noise and
# outliers, define the model parameters, and generate data:

def gen_data(t, a, b, c, noise=0, n_outliers=0, random_state=0):
    y = a + b * np.exp(t * c)
# ...
    rnd = np.random.RandomState(random_state)
    error = noise * rnd.randn(t.size)
    outliers = rnd.randint(0, t.size, n_outliers)
    error[outliers] *= 10
# ...
    return y + error
# ...
a = 0.5
b = 2.0
c = -1
t_min = 0
t_max = 10
n_points = 15
# ...
t_train = np.linspace(t_min, t_max, n_points)
y_train = gen_data(t_train, a, b, c, noise=0.1, n_outliers=3)

# Define function for computing residuals and initial estimate of
# parameters.

def fun(x, t, y):
    return x[0] + x[1] * np.exp(x[2] * t) - y
# ...
x0 = np.array([1.0, 1.0, 0.0])

# Compute a standard least-squares solution:

res_lsq = least_squares(fun, x0, args=(t_train, y_train))

# Now compute two solutions with two different robust loss functions. The
# parameter `f_scale` is set to 0.1, meaning that inlier residuals should
# not significantly exceed 0.1 (the noise level used).

res_soft_l1 = least_squares(fun, x0, loss='soft_l1', f_scale=0.1,
                            args=(t_train, y_train))
res_log = least_squares(fun, x0, loss='cauchy', f_scale=0.1,
                        args=(t_train, y_train))

# And finally plot all the curves. We see that by selecting an appropriate
# `loss`  we can get estimates close to optimal even in the presence of
# strong outliers. But keep in mind that generally it is recommended to try
# 'soft_l1' or 'huber' losses first (if at all necessary) as the other two
# options may cause difficulties in optimization process.

t_test = np.linspace(t_min, t_max, n_points * 10)
y_true = gen_data(t_test, a, b, c)
y_lsq = gen_data(t_test, *res_lsq.x)
y_soft_l1 = gen_data(t_test, *res_soft_l1.x)
y_log = gen_data(t_test, *res_log.x)
# ...
import matplotlib.pyplot as plt
plt.plot(t_train, y_train, 'o')
plt.plot(t_test, y_true, 'k', linewidth=2, label='true')
plt.plot(t_test, y_lsq, label='linear loss')
plt.plot(t_test, y_soft_l1, label='soft_l1 loss')
plt.plot(t_test, y_log, label='cauchy loss')
plt.xlabel("t")
plt.ylabel("y")
plt.legend()
plt.show()