Source code for double_pendulum.simulator

#!/usr/bin/env python3

# Enable basic compatibility features to work with either Python 2 or 3.
from __future__ import print_function, absolute_import, unicode_literals

# Standard library modules.
import math, time, argparse, threading, queue

# Third-party library modules.
import numpy as np

# Use the scipy fmin optimizer if available.
# ref: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin.html
try:
    import scipy.optimize
    have_optimizer = True
except ImportError:
    print("Unable to load scipy.optimize.")
    have_optimizer = False

# This uses python-osc to communicate with a Max/MSP patch.
#   installation:      pip3 install python-osc
#   source code:       https://github.com/attwad/python-osc
#   pypi description:  https://pypi.org/project/python-osc/
from pythonosc import udp_client
from pythonosc import dispatcher
from pythonosc import osc_server

################################################################
[docs]class DoublePendulumController(object):
    """Prototype for a double-pendulum controller.  This is where you should
    customize the underlying control functions, optimization strategy, and
    reward functions.

    :param args: namespace of command-line arguments returned from argparse
    """
    def __init__(self, args):

        # initialize mode and state variables for controller
        self.mode = 'dissipate'
        self.cycle_start = 0.0
        self.torque_cost = 0.0

        # fixed controller parameters
        self.friction_damping = -0.2
        self.hand_up_target = np.array((1.0, 0.5*math.pi, 0.0, 0.0))
        self.kp    = np.array((16.0, 8.0))
        self.ki    = np.array((4.0, 2.0))
        self.kd    = np.array((4.0, 2.0))

        # integrated error
        self.ierr  = np.array((0.0, 0.0, 0.0, 0.0))

        # keep track of the worst case cost to know when to cancel a trial
        self.worst_cost = None

        # Construct default parameter vector for the optimization problem.  This
        # builds a single array with all the default parameter values.  See the
        # set_optimization_params() method to update the mapping from parameter
        # vector back to controller parameters.
        self.default_params = np.hstack((self.kp, self.ki, self.kd))

        # launch a thread to run the optimizer as a co-routine
        self.evaluation_queue = queue.Queue() # next parameter array to evaluate
        self.result_queue     = queue.Queue() # next computed cost value
        self.optimizer_thread = threading.Thread(target=self.optimizer_task)
        self.optimizer_thread.daemon = True
        self.optimizer_thread.start()
        return

    #================================================================

[docs]    def message(self, msgaddr, *args):
        """Process messages from the Max/MSP display received via OSC over UDP.

        :param msgaddr: the address string of the OSC message, eg '/control/mode'
        :param args: tuple of OSC message arguments
        """

        print("Controller received message %s: %s" % (msgaddr, args))
        if msgaddr == "/control/kp1":
            self.kp[0] = args[0]
        elif msgaddr == "/control/kd1":
            self.kd[0] = args[0]
        elif msgaddr == "/control/mode":
            self.mode = args[0]

    #================================================================
[docs]    def optimizer_task(self):
        """Entry point for starting the optimizer on a background thread.  It communicates with the simulator using a pair of queues."""
        print("Starting optimizer thread.")
        if have_optimizer:
            result = scipy.optimize.fmin(func=self.optimizer_eval, x0=self.default_params)
            print("Optimizer finished:", result)
        else:
            print("Optimizer not available, evaluating default parameters.")
            result = self.optimizer_eval(self.default_params)
        return

[docs]    def optimizer_eval(self, params):
        """Optimizer callback function to evaluate a parameter vector.  This pushes the
        vector into a queue and blocks the optimizer thread waiting for a cost result.

        :param params: numpy ndarray for the optimization problem parameters
        :returns: a scalar cost value

        """

        print("Optimizer requesting evaluation of:", params)
        self.evaluation_queue.put(params)

        # wait for result
        cost = self.result_queue.get()
        print("Trial yielded cost:", cost)
        return cost

[docs]    def set_optimization_params(self, params):
        """Configure controller gains for a single trial of the optimization problem.
        Called from within the controller state machine when a new parameter
        vector is received for evaluation.

        :param params: numpy ndarray of optimization parameters
        """
        self.kp = params[0:2]
        self.ki = params[2:4]
        self.kd = params[4:6]
        print("Set optimization params: kp is %s, ki is %s, kd is %s" % (self.kp, self.ki, self.kd))
        return

    #================================================================
[docs]    def compute_control(self, t, dt, state, tau):
        """Method called from simulator to calculate the next step of applied torques.

        :param t: time in seconds since simulation began
        :param state: four element ndarray of joint positions q and joint velocities qd as [q1, q2, qd1, qd2], expressed in radians and radians/sec
        :param tau: two element ndarray to fill in with joint torques to apply
        """
        # convenience variables to notate the state variables
        q1  = state[0]  # 'shoulder' angle in radians
        q2  = state[1]  # 'elbow' angle in radians
        qd1 = state[2]  # 'shoulder' velocity in radians/second
        qd2 = state[3]  # 'elbow' velocity in radians/second

        # select action based on mode
        if self.mode == 'dissipate':
            # add viscous damping to dissipate energy
            tau[0] = self.friction_damping * qd1
            tau[1] = self.friction_damping * qd2

        elif self.mode == 'pose':
            # calculate position and velocity error as difference from reference state
            qerr = self.hand_up_target - state

            # apply PD control to reach the pose (no integral term)
            tau[0] = (self.kp[0] * qerr[0]) + (self.kd[0] * qerr[2])
            tau[1] = (self.kp[1] * qerr[1]) + (self.kd[1] * qerr[3])

        #--------------------------------------------------------------------------------
        # The following states alternate between driving the pendulum to a
        # 'hand-up' pose and relaxing back to the bottom.  The 'hand-up' pose
        # has the first link horizontal, with the second link pointing straight
        # up.  These can be used in an optimization process to find the most
        # efficient transition between the two poses.
        elif self.mode == 'hand-init' or self.mode == 'optimize':
            # wait for the next optimization trial to begin
            tau[0] = 16.0 * self.friction_damping * qd1
            tau[1] = 16.0 * self.friction_damping * qd2
            if not self.evaluation_queue.empty():
                params = self.evaluation_queue.get()
                self.set_optimization_params(params)
                self.torque_cost = 0.0
                self.mode = 'hand-up'

        elif self.mode == 'hand-up':
            # calculate position and velocity error as difference from reference state
            qerr = self.hand_up_target - state

            # apply PID control to reach the pose
            tau[0] = (self.kp[0] * qerr[0]) + (self.kd[0] * qerr[2]) + (self.ki[0] * self.ierr[0])
            tau[1] = (self.kp[1] * qerr[1]) + (self.kd[1] * qerr[3]) + (self.ki[1] * self.ierr[1])

            # integrate the error
            self.ierr = self.ierr + qerr * dt

            # once at the target, relax, and finish the trial
            if max(abs(qerr[0:2])) < 0.01 and max(abs(qerr[2:4])) < 0.1:
                self.mode = 'hand-down'
                self.ierr  = np.array((0.0, 0.0, 0.0, 0.0)) # reset integrator
                self.result_queue.put(self.torque_cost)
                if self.worst_cost is None: self.worst_cost = self.torque_cost
                else: self.worst_cost = max(self.worst_cost, self.torque_cost)

            # if the trial isn't converging, quit it early
            elif self.worst_cost is not None and self.torque_cost > 2*self.worst_cost:
                print("Cancelling trial at cost: ", self.torque_cost)
                self.mode = 'hand-down'
                self.ierr  = np.array((0.0, 0.0, 0.0, 0.0)) # reset integrator
                self.result_queue.put(self.torque_cost)

        elif self.mode == 'hand-down':
            # relax and let the arm fall back down to the bottom
            tau[0] = 16.0 * self.friction_damping * qd1
            tau[1] = 16.0 * self.friction_damping * qd2

            # if completely at rest, restart the process
            if abs(qd1) < 0.01 and abs(q1) < 0.01:
                self.mode = 'hand-init'

        #----------------------------------------
        elif self.mode == 'swingup':
            tau[1] = self.friction_damping * qd2 # assume unpowered elbow

            # if completely at rest, add a slight perturbation to kick-start the process
            if abs(qd1) < 0.01 and abs(q1) < 0.01:
                tau[0] = 1.0

            # unstable positive velocity feedback near bottom
            elif abs(q1) < 0.5:
                tau[0] = 2.0 * qd1

            else: # else coast
                tau[0] = self.friction_damping * qd1
        #----------------------------------------
        else: # free-swinging, no torques
            tau[0] = 0.0
            tau[1] = 0.0

        # accumulate sum of squared torques cost
        self.torque_cost += tau.dot(tau)
        return

################################################################
[docs]class DoublePendulumSimulator(object):
    """Dynamic simulation of a frictionless double pendulum.  This object can send
    state updates to an external server for rendering.  It communicates with a
    user-supplied control object to compute applied joint torques.

    :param args: namespace of command-line arguments returned from argparse
    :param cartoon: python-osc UDP client object for sending messages to GUI
    :param control: instance of DoublePendulumControl for computing applied torques
    """

    def __init__(self, args, cartoon, control):
        # save the OSC client object used to update the display
        self.cartoon = cartoon

        # save the object used to calculate joint torques
        self.control = control

        # save selected command line flags
        self.verbose = args.verbose
        self.fast = args.fast

        # set default dynamics
        self.set_default_dynamic_parameters()

        # configure transient state
        self.reset()
        return

[docs]    def reset(self):
        """Reset or initialize all simulator state variables."""
        self.t     = 0.0
        self.dt    = 0.001
        self.state = np.array([0.0, 0.0, 0.0, 0.0])
        self.tau   = np.array([0.0, 0.0])
        self.dydt  = np.ndarray((4,))
        return

[docs]    def set_default_dynamic_parameters(self):
        """Set the default dynamics coefficients defining the rigid-body model physics."""
        self.l1   = 1.0    # proximal link length, link1
        self.l2   = 1.0    # distal link length, link2
        self.lc1  = 0.5    # distance from proximal joint to link1 COM
        self.lc2  = 0.5    # distance from distal joint to link2 COM
        self.m1   = 1.0    # link1 mass
        self.m2   = 1.0    # link2 mass
        self.I1   = (self.m1 * self.l1**2) / 12  # link1 moment of inertia
        self.I2   = (self.m2 * self.l2**2) / 12  # link2 moment of inertia
        self.gravity  = -9.81
        return

    #================================================================
[docs]    def deriv(self):
        """Calculate the accelerations for a rigid body double-pendulum dynamics model.
        :returns: system derivative vector as a numpy ndarray
        """
        q1  = self.state[0]
        q2  = self.state[1]
        qd1 = self.state[2]
        qd2 = self.state[3]
        LC1 = self.lc1
        LC2 = self.lc2
        L1 = self.l1
        M1 = self.m1
        M2 = self.m2

        d11 = M1*LC1*LC1  + M2*(L1*L1 + LC2*LC2 + 2*L1*LC2*math.cos(q2)) + self.I1 + self.I2
        d12 = M2*(LC2*LC2 + L1*LC2*math.cos(q2)) + self.I2
        d21 = d12
        d22 = M2*LC2*LC2  + self.I2

        h1 = -M2*L1*LC2*math.sin(q2)*qd2*qd2 - 2*M2*L1*LC2*math.sin(q2)*qd2*qd1
        h2 = M2*L1*LC2*math.sin(q2)*qd1*qd1

        phi1 = -M2*LC2*self.gravity*math.sin(q1+q2)  - (M1*LC1 + M2*L1) * self.gravity * math.sin(q1)
        phi2 = -M2*LC2*self.gravity*math.sin(q1+q2)

        # now solve the equations for qdd:
        #  d11 qdd1 + d12 qdd2 + h1 + phi1 = tau1
        #  d21 qdd1 + d22 qdd2 + h2 + phi2 = tau2

        rhs1 = self.tau[0] - h1 - phi1
        rhs2 = self.tau[1] - h2 - phi2

        # Apply Cramer's Rule to compute the accelerations using
        # determinants by solving D qdd = rhs.  First compute the
        # denominator as the determinant of D:
        denom = (d11 * d22) - (d21 * d12)

        # the derivative of the position is trivially the current velocity
        self.dydt[0] = qd1
        self.dydt[1] = qd2

        # the derivative of the velocity is the acceleration.
        # the numerator of qdd[n] is the determinant of the matrix in
        # which the nth column of D is replaced by RHS
        self.dydt[2] = ((rhs1 * d22 ) - (rhs2 * d12)) / denom
        self.dydt[3] = (( d11 * rhs2) - (d21  * rhs1)) / denom
        return self.dydt

    #================================================================
[docs]    def timer_tick(self, delta_t):
        """Run the simulation for an interval.

        :param delta_t: length of interval in simulated time seconds
        """
        while delta_t > 0:

            # calculate next control outputs
            self.control.compute_control(self.t, self.dt, self.state, self.tau)

            # calculate dynamics model
            qd = self.deriv()

            # Euler integration
            self.state = self.state + self.dt * qd
            delta_t -= self.dt
            self.t += self.dt

[docs]    def update_display(self):
        """Send the current state to the GUI using a UDP OSC message."""
        radian_to_degrees = 180.0 / math.pi
        pose = (self.state[0]*radian_to_degrees, self.state[1]*radian_to_degrees)
        self.cartoon.send_message("/q", pose)
        if self.verbose: print("Position: ", pose)
        return

[docs]    def event_loop(self):
        """Simulator run loop; never exits."""
        frame_interval = 0.040 if not self.fast else 10.0
        while True:
            self.timer_tick(frame_interval)
            self.update_display()
            if not self.fast: time.sleep(frame_interval)

    #================================================================
    # Methods to process messages from the Max/MSP display received via OSC over UDP.
[docs]    def message(self, msgaddr, *args):
        """Process messages from the Max/MSP display received via OSC over UDP.

        :param msgaddr: the address string of the OSC message, eg '/sim/verbose'
        :param args: tuple of OSC message arguments
        """

        print("Simulator received message %s: %s" % (msgaddr, args))
        if msgaddr == "/sim/verbose":
            self.verbose = (args[0] != 0)

[docs]    def unknown_message(self, msgaddr, *args):
        """Default handler for unrecognized OSC messages."""
        print("Simulator received unmapped message %s: %s" % (msgaddr, args))

################################################################
def start_osc_server(args, sim, control):
    """Start a background thread running an OSC server listening for messages on an UDP socket."""

    # Initialize the OSC message dispatch system.
    dispatch = dispatcher.Dispatcher()
    dispatch.map("/sim/*", sim.message)
    dispatch.map("/control/*", control.message)
    dispatch.set_default_handler(sim.unknown_message)

    # Start and run the server.
    server = osc_server.ThreadingOSCUDPServer((args.simaddr, args.simport), dispatch)
    server_thread = threading.Thread(target=server.serve_forever)
    server_thread.daemon = True
    server_thread.start()

################################################################
# Script entry point.
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Double-pendulum or two-link arm simulator.")
    parser.add_argument('--verbose', action='store_true', help='Enable debugging output.')
    parser.add_argument('--maxport', type=int, default=16375, help='UDP port for Max/MSP patch (default 16375).')
    parser.add_argument('--maxaddr', default="127.0.0.1", help='IP address for the Max/MSP patch (default localhost).')
    parser.add_argument('--simport', type=int, default=54375, help='UDP port for Python simulator (default 54375).')
    parser.add_argument('--simaddr', default="127.0.0.1", help='IP address for the simulator (default localhost).')
    parser.add_argument('--fast', action='store_true', help='Run as fast as possible.')
    args = parser.parse_args()

    # create an OSC client endpoint to send display updates to Max/MSP
    cartoon = udp_client.SimpleUDPClient(args.maxaddr, args.maxport)

    # initializes the simulator
    control = DoublePendulumController(args)
    sim = DoublePendulumSimulator(args, cartoon, control)

    # set up the OSC message server to receive parameters from Max/MSP
    start_osc_server(args, sim, control)

    # begin the simulation
    sim.event_loop()