target/aarch64: Use 'bool' data type
[openocd.git] / src / target / aarch64.c
index ce4a8f6ae9a522afa2e193b22172a7cb9b94a18e..1fafcfd45de679e0b3bc067c3b4447a91f409540 100644 (file)
 #include "target_type.h"
 #include "armv8_opcodes.h"
 #include "armv8_cache.h"
+#include "arm_semihosting.h"
 #include <helper/time_support.h>
 
+enum restart_mode {
+       RESTART_LAZY,
+       RESTART_SYNC,
+};
+
+enum halt_mode {
+       HALT_LAZY,
+       HALT_SYNC,
+};
+
+struct aarch64_private_config {
+       struct adiv5_private_config adiv5_config;
+       struct arm_cti *cti;
+};
+
 static int aarch64_poll(struct target *target);
 static int aarch64_debug_entry(struct target *target);
 static int aarch64_restore_context(struct target *target, bool bpwp);
@@ -44,12 +60,17 @@ static int aarch64_unset_breakpoint(struct target *target,
 static int aarch64_mmu(struct target *target, int *enabled);
 static int aarch64_virt2phys(struct target *target,
        target_addr_t virt, target_addr_t *phys);
-static int aarch64_read_apb_ap_memory(struct target *target,
+static int aarch64_read_cpu_memory(struct target *target,
        uint64_t address, uint32_t size, uint32_t count, uint8_t *buffer);
 
+#define foreach_smp_target(pos, head) \
+       for (pos = head; (pos != NULL); pos = pos->next)
+
 static int aarch64_restore_system_control_reg(struct target *target)
 {
+       enum arm_mode target_mode = ARM_MODE_ANY;
        int retval = ERROR_OK;
+       uint32_t instr;
 
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = target_to_armv8(target);
@@ -59,51 +80,49 @@ static int aarch64_restore_system_control_reg(struct target *target)
                /* LOG_INFO("cp15_control_reg: %8.8" PRIx32, cortex_v8->cp15_control_reg); */
 
                switch (armv8->arm.core_mode) {
-                       case ARMV8_64_EL0T:
-                       case ARMV8_64_EL1T:
-                       case ARMV8_64_EL1H:
-                               retval = armv8->arm.msr(target, 3, /*op 0*/
-                                               0, 1,   /* op1, op2 */
-                                               0, 0,   /* CRn, CRm */
-                                               aarch64->system_control_reg);
-                               if (retval != ERROR_OK)
-                                       return retval;
+               case ARMV8_64_EL0T:
+                       target_mode = ARMV8_64_EL1H;
+                       /* fall through */
+               case ARMV8_64_EL1T:
+               case ARMV8_64_EL1H:
+                       instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL1, 0);
+                       break;
+               case ARMV8_64_EL2T:
+               case ARMV8_64_EL2H:
+                       instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL2, 0);
                        break;
-                       case ARMV8_64_EL2T:
-                       case ARMV8_64_EL2H:
-                               retval = armv8->arm.msr(target, 3, /*op 0*/
-                                               4, 1,   /* op1, op2 */
-                                               0, 0,   /* CRn, CRm */
-                                               aarch64->system_control_reg);
-                               if (retval != ERROR_OK)
-                                       return retval;
+               case ARMV8_64_EL3H:
+               case ARMV8_64_EL3T:
+                       instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL3, 0);
                        break;
-                       case ARMV8_64_EL3H:
-                       case ARMV8_64_EL3T:
-                               retval = armv8->arm.msr(target, 3, /*op 0*/
-                                               6, 1,   /* op1, op2 */
-                                               0, 0,   /* CRn, CRm */
-                                               aarch64->system_control_reg);
-                               if (retval != ERROR_OK)
-                                       return retval;
+
+               case ARM_MODE_SVC:
+               case ARM_MODE_ABT:
+               case ARM_MODE_FIQ:
+               case ARM_MODE_IRQ:
+               case ARM_MODE_SYS:
+                       instr = ARMV4_5_MCR(15, 0, 0, 1, 0, 0);
                        break;
-                       default:
-                               retval = armv8->arm.mcr(target, 15, 0, 0, 1, 0, aarch64->system_control_reg);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                               break;
-                       }
+
+               default:
+                       LOG_INFO("cannot read system control register in this mode");
+                       return ERROR_FAIL;
+               }
+
+               if (target_mode != ARM_MODE_ANY)
+                       armv8_dpm_modeswitch(&armv8->dpm, target_mode);
+
+               retval = armv8->dpm.instr_write_data_r0(&armv8->dpm, instr, aarch64->system_control_reg);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (target_mode != ARM_MODE_ANY)
+                       armv8_dpm_modeswitch(&armv8->dpm, ARM_MODE_ANY);
        }
+
        return retval;
 }
 
-/*  check address before aarch64_apb read write access with mmu on
- *  remove apb predictible data abort */
-static int aarch64_check_address(struct target *target, uint32_t address)
-{
-       /* TODO */
-       return ERROR_OK;
-}
 /*  modify system_control_reg in order to enable or disable mmu for :
  *  - virt2phys address conversion
  *  - read or write memory in phys or virt address */
@@ -112,6 +131,7 @@ static int aarch64_mmu_modify(struct target *target, int enable)
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        int retval = ERROR_OK;
+       uint32_t instr = 0;
 
        if (enable) {
                /*      if mmu enabled at target stop and mmu not enable */
@@ -119,86 +139,51 @@ static int aarch64_mmu_modify(struct target *target, int enable)
                        LOG_ERROR("trying to enable mmu on target stopped with mmu disable");
                        return ERROR_FAIL;
                }
-               if (!(aarch64->system_control_reg_curr & 0x1U)) {
+               if (!(aarch64->system_control_reg_curr & 0x1U))
                        aarch64->system_control_reg_curr |= 0x1U;
-                       switch (armv8->arm.core_mode) {
-                               case ARMV8_64_EL0T:
-                               case ARMV8_64_EL1T:
-                               case ARMV8_64_EL1H:
-                                       retval = armv8->arm.msr(target, 3, /*op 0*/
-                                                       0, 0,   /* op1, op2 */
-                                                       1, 0,   /* CRn, CRm */
-                                                       aarch64->system_control_reg_curr);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               break;
-                               case ARMV8_64_EL2T:
-                               case ARMV8_64_EL2H:
-                                       retval = armv8->arm.msr(target, 3, /*op 0*/
-                                                       4, 0,   /* op1, op2 */
-                                                       1, 0,   /* CRn, CRm */
-                                                       aarch64->system_control_reg_curr);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               break;
-                               case ARMV8_64_EL3H:
-                               case ARMV8_64_EL3T:
-                                       retval = armv8->arm.msr(target, 3, /*op 0*/
-                                                       6, 0,   /* op1, op2 */
-                                                       1, 0,   /* CRn, CRm */
-                                                       aarch64->system_control_reg_curr);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               break;
-                               default:
-                                       LOG_DEBUG("unknow cpu state 0x%x" PRIx32, armv8->arm.core_state);
-                       }
-               }
        } else {
                if (aarch64->system_control_reg_curr & 0x4U) {
                        /*  data cache is active */
                        aarch64->system_control_reg_curr &= ~0x4U;
-                       /* flush data cache armv7 function to be called */
+                       /* flush data cache armv8 function to be called */
                        if (armv8->armv8_mmu.armv8_cache.flush_all_data_cache)
                                armv8->armv8_mmu.armv8_cache.flush_all_data_cache(target);
                }
                if ((aarch64->system_control_reg_curr & 0x1U)) {
                        aarch64->system_control_reg_curr &= ~0x1U;
-                       switch (armv8->arm.core_mode) {
-                               case ARMV8_64_EL0T:
-                               case ARMV8_64_EL1T:
-                               case ARMV8_64_EL1H:
-                                       retval = armv8->arm.msr(target, 3, /*op 0*/
-                                                       0, 0,   /* op1, op2 */
-                                                       1, 0,   /* CRn, CRm */
-                                                       aarch64->system_control_reg_curr);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                                       break;
-                               case ARMV8_64_EL2T:
-                               case ARMV8_64_EL2H:
-                                       retval = armv8->arm.msr(target, 3, /*op 0*/
-                                                       4, 0,   /* op1, op2 */
-                                                       1, 0,   /* CRn, CRm */
-                                                       aarch64->system_control_reg_curr);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                                       break;
-                               case ARMV8_64_EL3H:
-                               case ARMV8_64_EL3T:
-                                       retval = armv8->arm.msr(target, 3, /*op 0*/
-                                                       6, 0,   /* op1, op2 */
-                                                       1, 0,   /* CRn, CRm */
-                                                       aarch64->system_control_reg_curr);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                                       break;
-                               default:
-                                       LOG_DEBUG("unknow cpu state 0x%x" PRIx32, armv8->arm.core_state);
-                                       break;
-                       }
                }
        }
+
+       switch (armv8->arm.core_mode) {
+       case ARMV8_64_EL0T:
+       case ARMV8_64_EL1T:
+       case ARMV8_64_EL1H:
+               instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL1, 0);
+               break;
+       case ARMV8_64_EL2T:
+       case ARMV8_64_EL2H:
+               instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL2, 0);
+               break;
+       case ARMV8_64_EL3H:
+       case ARMV8_64_EL3T:
+               instr = ARMV8_MSR_GP(SYSTEM_SCTLR_EL3, 0);
+               break;
+
+       case ARM_MODE_SVC:
+       case ARM_MODE_ABT:
+       case ARM_MODE_FIQ:
+       case ARM_MODE_IRQ:
+       case ARM_MODE_SYS:
+               instr = ARMV4_5_MCR(15, 0, 0, 1, 0, 0);
+               break;
+
+       default:
+               LOG_DEBUG("unknown cpu state 0x%" PRIx32, armv8->arm.core_mode);
+               break;
+       }
+
+       retval = armv8->dpm.instr_write_data_r0(&armv8->dpm, instr,
+                               aarch64->system_control_reg_curr);
        return retval;
 }
 
@@ -211,7 +196,14 @@ static int aarch64_init_debug_access(struct target *target)
        int retval;
        uint32_t dummy;
 
-       LOG_DEBUG(" ");
+       LOG_DEBUG("%s", target_name(target));
+
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_OSLAR, 0);
+       if (retval != ERROR_OK) {
+               LOG_DEBUG("Examine %s failed", "oslock");
+               return retval;
+       }
 
        /* Clear Sticky Power Down status Bit in PRSR to enable access to
           the registers in the Core Power Domain */
@@ -228,27 +220,22 @@ static int aarch64_init_debug_access(struct target *target)
         */
 
        /* Enable CTI */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_CTR, 1);
-       /* By default, gate all channel triggers to and from the CTM */
+       retval = arm_cti_enable(armv8->cti, true);
+       /* By default, gate all channel events to and from the CTM */
        if (retval == ERROR_OK)
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->cti_base + CTI_GATE, 0);
-       /* output halt requests to PE on channel 0 trigger */
+               retval = arm_cti_write_reg(armv8->cti, CTI_GATE, 0);
+       /* output halt requests to PE on channel 0 event */
        if (retval == ERROR_OK)
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->cti_base + CTI_OUTEN0, CTI_CHNL(0));
-       /* output restart requests to PE on channel 1 trigger */
+               retval = arm_cti_write_reg(armv8->cti, CTI_OUTEN0, CTI_CHNL(0));
+       /* output restart requests to PE on channel 1 event */
        if (retval == ERROR_OK)
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->cti_base + CTI_OUTEN1, CTI_CHNL(1));
+               retval = arm_cti_write_reg(armv8->cti, CTI_OUTEN1, CTI_CHNL(1));
        if (retval != ERROR_OK)
                return retval;
 
        /* Resync breakpoint registers */
 
-       /* Since this is likely called from init or reset, update target state information*/
-       return aarch64_poll(target);
+       return ERROR_OK;
 }
 
 /* Write to memory mapped registers directly with no cache or mmu handling */
@@ -279,121 +266,279 @@ static int aarch64_dpm_setup(struct aarch64_common *a8, uint64_t debug)
        return retval;
 }
 
-static struct target *get_aarch64(struct target *target, int32_t coreid)
+static int aarch64_set_dscr_bits(struct target *target, unsigned long bit_mask, unsigned long value)
 {
-       struct target_list *head;
-       struct target *curr;
+       struct armv8_common *armv8 = target_to_armv8(target);
+       return armv8_set_dbgreg_bits(armv8, CPUV8_DBG_DSCR, bit_mask, value);
+}
 
-       head = target->head;
-       while (head != (struct target_list *)NULL) {
-               curr = head->target;
-               if ((curr->coreid == coreid) && (curr->state == TARGET_HALTED))
-                       return curr;
-               head = head->next;
+static int aarch64_check_state_one(struct target *target,
+               uint32_t mask, uint32_t val, int *p_result, uint32_t *p_prsr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       uint32_t prsr;
+       int retval;
+
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_PRSR, &prsr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (p_prsr)
+               *p_prsr = prsr;
+
+       if (p_result)
+               *p_result = (prsr & mask) == (val & mask);
+
+       return ERROR_OK;
+}
+
+static int aarch64_wait_halt_one(struct target *target)
+{
+       int retval = ERROR_OK;
+       uint32_t prsr;
+
+       int64_t then = timeval_ms();
+       for (;;) {
+               int halted;
+
+               retval = aarch64_check_state_one(target, PRSR_HALT, PRSR_HALT, &halted, &prsr);
+               if (retval != ERROR_OK || halted)
+                       break;
+
+               if (timeval_ms() > then + 1000) {
+                       retval = ERROR_TARGET_TIMEOUT;
+                       LOG_DEBUG("target %s timeout, prsr=0x%08"PRIx32, target_name(target), prsr);
+                       break;
+               }
        }
-       return target;
+       return retval;
 }
-static int aarch64_halt(struct target *target);
 
-static int aarch64_halt_smp(struct target *target)
+static int aarch64_prepare_halt_smp(struct target *target, bool exc_target, struct target **p_first)
 {
        int retval = ERROR_OK;
        struct target_list *head = target->head;
+       struct target *first = NULL;
 
-       while (head != (struct target_list *)NULL) {
+       LOG_DEBUG("target %s exc %i", target_name(target), exc_target);
+
+       while (head != NULL) {
                struct target *curr = head->target;
                struct armv8_common *armv8 = target_to_armv8(curr);
+               head = head->next;
+
+               if (exc_target && curr == target)
+                       continue;
+               if (!target_was_examined(curr))
+                       continue;
+               if (curr->state != TARGET_RUNNING)
+                       continue;
+
+               /* HACK: mark this target as prepared for halting */
+               curr->debug_reason = DBG_REASON_DBGRQ;
 
                /* open the gate for channel 0 to let HALT requests pass to the CTM */
-               if (curr->smp)
-                       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                                       armv8->cti_base + CTI_GATE, CTI_CHNL(0));
+               retval = arm_cti_ungate_channel(armv8->cti, 0);
+               if (retval == ERROR_OK)
+                       retval = aarch64_set_dscr_bits(curr, DSCR_HDE, DSCR_HDE);
                if (retval != ERROR_OK)
                        break;
 
-               head = head->next;
+               LOG_DEBUG("target %s prepared", target_name(curr));
+
+               if (first == NULL)
+                       first = curr;
        }
 
+       if (p_first) {
+               if (exc_target && first)
+                       *p_first = first;
+               else
+                       *p_first = target;
+       }
+
+       return retval;
+}
+
+static int aarch64_halt_one(struct target *target, enum halt_mode mode)
+{
+       int retval = ERROR_OK;
+       struct armv8_common *armv8 = target_to_armv8(target);
+
+       LOG_DEBUG("%s", target_name(target));
+
+       /* allow Halting Debug Mode */
+       retval = aarch64_set_dscr_bits(target, DSCR_HDE, DSCR_HDE);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* trigger an event on channel 0, this outputs a halt request to the PE */
+       retval = arm_cti_pulse_channel(armv8->cti, 0);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (mode == HALT_SYNC) {
+               retval = aarch64_wait_halt_one(target);
+               if (retval != ERROR_OK) {
+                       if (retval == ERROR_TARGET_TIMEOUT)
+                               LOG_ERROR("Timeout waiting for target %s halt", target_name(target));
+                       return retval;
+               }
+       }
+
+       return ERROR_OK;
+}
+
+static int aarch64_halt_smp(struct target *target, bool exc_target)
+{
+       struct target *next = target;
+       int retval;
+
+       /* prepare halt on all PEs of the group */
+       retval = aarch64_prepare_halt_smp(target, exc_target, &next);
+
+       if (exc_target && next == target)
+               return retval;
+
        /* halt the target PE */
        if (retval == ERROR_OK)
-               retval = aarch64_halt(target);
+               retval = aarch64_halt_one(next, HALT_LAZY);
+
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* wait for all PEs to halt */
+       int64_t then = timeval_ms();
+       for (;;) {
+               bool all_halted = true;
+               struct target_list *head;
+               struct target *curr;
+
+               foreach_smp_target(head, target->head) {
+                       int halted;
+
+                       curr = head->target;
+
+                       if (!target_was_examined(curr))
+                               continue;
+
+                       retval = aarch64_check_state_one(curr, PRSR_HALT, PRSR_HALT, &halted, NULL);
+                       if (retval != ERROR_OK || !halted) {
+                               all_halted = false;
+                               break;
+                       }
+               }
+
+               if (all_halted)
+                       break;
+
+               if (timeval_ms() > then + 1000) {
+                       retval = ERROR_TARGET_TIMEOUT;
+                       break;
+               }
+
+               /*
+                * HACK: on Hi6220 there are 8 cores organized in 2 clusters
+                * and it looks like the CTI's are not connected by a common
+                * trigger matrix. It seems that we need to halt one core in each
+                * cluster explicitly. So if we find that a core has not halted
+                * yet, we trigger an explicit halt for the second cluster.
+                */
+               retval = aarch64_halt_one(curr, HALT_LAZY);
+               if (retval != ERROR_OK)
+                       break;
+       }
 
        return retval;
 }
 
-static int update_halt_gdb(struct target *target)
+static int update_halt_gdb(struct target *target, enum target_debug_reason debug_reason)
 {
-       int retval = 0;
-       if (target->gdb_service && target->gdb_service->core[0] == -1) {
-               target->gdb_service->target = target;
-               target->gdb_service->core[0] = target->coreid;
-               retval += aarch64_halt_smp(target);
+       struct target *gdb_target = NULL;
+       struct target_list *head;
+       struct target *curr;
+
+       if (debug_reason == DBG_REASON_NOTHALTED) {
+               LOG_DEBUG("Halting remaining targets in SMP group");
+               aarch64_halt_smp(target, true);
        }
-       return retval;
+
+       /* poll all targets in the group, but skip the target that serves GDB */
+       foreach_smp_target(head, target->head) {
+               curr = head->target;
+               /* skip calling context */
+               if (curr == target)
+                       continue;
+               if (!target_was_examined(curr))
+                       continue;
+               /* skip targets that were already halted */
+               if (curr->state == TARGET_HALTED)
+                       continue;
+               /* remember the gdb_service->target */
+               if (curr->gdb_service != NULL)
+                       gdb_target = curr->gdb_service->target;
+               /* skip it */
+               if (curr == gdb_target)
+                       continue;
+
+               /* avoid recursion in aarch64_poll() */
+               curr->smp = 0;
+               aarch64_poll(curr);
+               curr->smp = 1;
+       }
+
+       /* after all targets were updated, poll the gdb serving target */
+       if (gdb_target != NULL && gdb_target != target)
+               aarch64_poll(gdb_target);
+
+       return ERROR_OK;
 }
 
 /*
- * Cortex-A8 Run control
+ * Aarch64 Run control
  */
 
 static int aarch64_poll(struct target *target)
 {
+       enum target_state prev_target_state;
        int retval = ERROR_OK;
-       uint32_t dscr;
-       struct aarch64_common *aarch64 = target_to_aarch64(target);
-       struct armv8_common *armv8 = &aarch64->armv8_common;
-       enum target_state prev_target_state = target->state;
-       /*  toggle to another core is done by gdb as follow */
-       /*  maint packet J core_id */
-       /*  continue */
-       /*  the next polling trigger an halt event sent to gdb */
-       if ((target->state == TARGET_HALTED) && (target->smp) &&
-               (target->gdb_service) &&
-               (target->gdb_service->target == NULL)) {
-               target->gdb_service->target =
-                       get_aarch64(target, target->gdb_service->core[1]);
-               target_call_event_callbacks(target, TARGET_EVENT_HALTED);
-               return retval;
-       }
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       int halted;
+
+       retval = aarch64_check_state_one(target,
+                               PRSR_HALT, PRSR_HALT, &halted, NULL);
        if (retval != ERROR_OK)
                return retval;
-       aarch64->cpudbg_dscr = dscr;
 
-       if (DSCR_RUN_MODE(dscr) == 0x3) {
+       if (halted) {
+               prev_target_state = target->state;
                if (prev_target_state != TARGET_HALTED) {
+                       enum target_debug_reason debug_reason = target->debug_reason;
+
                        /* We have a halting debug event */
-                       LOG_DEBUG("Target halted");
                        target->state = TARGET_HALTED;
-                       if ((prev_target_state == TARGET_RUNNING)
-                               || (prev_target_state == TARGET_UNKNOWN)
-                               || (prev_target_state == TARGET_RESET)) {
-                               retval = aarch64_debug_entry(target);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                               if (target->smp) {
-                                       retval = update_halt_gdb(target);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               }
-                               target_call_event_callbacks(target,
-                                       TARGET_EVENT_HALTED);
-                       }
-                       if (prev_target_state == TARGET_DEBUG_RUNNING) {
-                               LOG_DEBUG(" ");
-
-                               retval = aarch64_debug_entry(target);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                               if (target->smp) {
-                                       retval = update_halt_gdb(target);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               }
+                       LOG_DEBUG("Target %s halted", target_name(target));
+                       retval = aarch64_debug_entry(target);
+                       if (retval != ERROR_OK)
+                               return retval;
+
+                       if (target->smp)
+                               update_halt_gdb(target, debug_reason);
 
-                               target_call_event_callbacks(target,
-                                       TARGET_EVENT_DEBUG_HALTED);
+                       if (arm_semihosting(target, &retval) != 0)
+                               return retval;
+
+                       switch (prev_target_state) {
+                       case TARGET_RUNNING:
+                       case TARGET_UNKNOWN:
+                       case TARGET_RESET:
+                               target_call_event_callbacks(target, TARGET_EVENT_HALTED);
+                               break;
+                       case TARGET_DEBUG_RUNNING:
+                               target_call_event_callbacks(target, TARGET_EVENT_DEBUG_HALTED);
+                               break;
+                       default:
+                               break;
                        }
                }
        } else
@@ -404,47 +549,16 @@ static int aarch64_poll(struct target *target)
 
 static int aarch64_halt(struct target *target)
 {
-       int retval = ERROR_OK;
-       uint32_t dscr;
        struct armv8_common *armv8 = target_to_armv8(target);
+       armv8->last_run_control_op = ARMV8_RUNCONTROL_HALT;
 
-       /*
-        * add HDE in halting debug mode
-        */
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-       if (retval == ERROR_OK)
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr | DSCR_HDE);
-       if (retval != ERROR_OK)
-               return retval;
-
-       /* trigger an event on channel 0, this outputs a halt request to the PE */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_APPPULSE, CTI_CHNL(0));
-       if (retval != ERROR_OK)
-               return retval;
+       if (target->smp)
+               return aarch64_halt_smp(target, false);
 
-       long long then = timeval_ms();
-       for (;; ) {
-               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if ((dscr & DSCRV8_HALT_MASK) != 0)
-                       break;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for halt");
-                       return ERROR_FAIL;
-               }
-       }
-
-       target->debug_reason = DBG_REASON_DBGRQ;
-
-       return ERROR_OK;
+       return aarch64_halt_one(target, HALT_SYNC);
 }
 
-static int aarch64_internal_restore(struct target *target, int current,
+static int aarch64_restore_one(struct target *target, int current,
        uint64_t *address, int handle_breakpoints, int debug_execution)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
@@ -452,6 +566,8 @@ static int aarch64_internal_restore(struct target *target, int current,
        int retval;
        uint64_t resume_pc;
 
+       LOG_DEBUG("%s", target_name(target));
+
        if (!debug_execution)
                target_free_all_working_areas(target);
 
@@ -483,42 +599,33 @@ static int aarch64_internal_restore(struct target *target, int current,
                        LOG_ERROR("How do I resume into Jazelle state??");
                        return ERROR_FAIL;
        }
-       LOG_DEBUG("resume pc = 0x%16" PRIx64, resume_pc);
+       LOG_DEBUG("resume pc = 0x%016" PRIx64, resume_pc);
        buf_set_u64(arm->pc->value, 0, 64, resume_pc);
-       arm->pc->dirty = 1;
-       arm->pc->valid = 1;
-       armv8_dpm_modeswitch(&armv8->dpm, ARM_MODE_ANY);
+       arm->pc->dirty = true;
+       arm->pc->valid = true;
 
        /* called it now before restoring context because it uses cpu
         * register r0 for restoring system control register */
        retval = aarch64_restore_system_control_reg(target);
-       if (retval != ERROR_OK)
-               return retval;
-       retval = aarch64_restore_context(target, handle_breakpoints);
-       if (retval != ERROR_OK)
-               return retval;
-       target->debug_reason = DBG_REASON_NOTHALTED;
-       target->state = TARGET_RUNNING;
-
-       /* registers are now invalid */
-       register_cache_invalidate(arm->core_cache);
+       if (retval == ERROR_OK)
+               retval = aarch64_restore_context(target, handle_breakpoints);
 
        return retval;
 }
 
-static int aarch64_internal_restart(struct target *target, bool slave_pe)
+/**
+ * prepare single target for restart
+ *
+ *
+ */
+static int aarch64_prepare_restart_one(struct target *target)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
-       struct arm *arm = &armv8->arm;
        int retval;
        uint32_t dscr;
-       /*
-        * * Restart core and wait for it to be started.  Clear ITRen and sticky
-        * * exception flags: see ARMv7 ARM, C5.9.
-        *
-        * REVISIT: for single stepping, we probably want to
-        * disable IRQs by default, with optional override...
-        */
+       uint32_t tmp;
+
+       LOG_DEBUG("%s", target_name(target));
 
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
@@ -526,71 +633,204 @@ static int aarch64_internal_restart(struct target *target, bool slave_pe)
                return retval;
 
        if ((dscr & DSCR_ITE) == 0)
-               LOG_ERROR("DSCR InstrCompl must be set before leaving debug!");
-
-       /* make sure to acknowledge the halt event before resuming */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->cti_base + CTI_INACK, CTI_TRIG(HALT));
+               LOG_ERROR("DSCR.ITE must be set before leaving debug!");
+       if ((dscr & DSCR_ERR) != 0)
+               LOG_ERROR("DSCR.ERR must be cleared before leaving debug!");
 
+       /* acknowledge a pending CTI halt event */
+       retval = arm_cti_ack_events(armv8->cti, CTI_TRIG(HALT));
        /*
         * open the CTI gate for channel 1 so that the restart events
-        * get passed along to all PEs
+        * get passed along to all PEs. Also close gate for channel 0
+        * to isolate the PE from halt events.
         */
        if (retval == ERROR_OK)
+               retval = arm_cti_ungate_channel(armv8->cti, 1);
+       if (retval == ERROR_OK)
+               retval = arm_cti_gate_channel(armv8->cti, 0);
+
+       /* make sure that DSCR.HDE is set */
+       if (retval == ERROR_OK) {
+               dscr |= DSCR_HDE;
                retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->cti_base + CTI_GATE, CTI_CHNL(1));
+                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       }
+
+       if (retval == ERROR_OK) {
+               /* clear sticky bits in PRSR, SDR is now 0 */
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_PRSR, &tmp);
+       }
+
+       return retval;
+}
+
+static int aarch64_do_restart_one(struct target *target, enum restart_mode mode)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       int retval;
+
+       LOG_DEBUG("%s", target_name(target));
+
+       /* trigger an event on channel 1, generates a restart request to the PE */
+       retval = arm_cti_pulse_channel(armv8->cti, 1);
        if (retval != ERROR_OK)
                return retval;
 
-       if (!slave_pe) {
-               /* trigger an event on channel 1, generates a restart request to the PE */
-               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->cti_base + CTI_APPPULSE, CTI_CHNL(1));
-               if (retval != ERROR_OK)
-                       return retval;
-
-               long long then = timeval_ms();
-               for (;; ) {
-                       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       if ((dscr & DSCR_HDE) != 0)
+       if (mode == RESTART_SYNC) {
+               int64_t then = timeval_ms();
+               for (;;) {
+                       int resumed;
+                       /*
+                        * if PRSR.SDR is set now, the target did restart, even
+                        * if it's now already halted again (e.g. due to breakpoint)
+                        */
+                       retval = aarch64_check_state_one(target,
+                                               PRSR_SDR, PRSR_SDR, &resumed, NULL);
+                       if (retval != ERROR_OK || resumed)
                                break;
+
                        if (timeval_ms() > then + 1000) {
-                               LOG_ERROR("Timeout waiting for resume");
-                               return ERROR_FAIL;
+                               LOG_ERROR("%s: Timeout waiting for resume"PRIx32, target_name(target));
+                               retval = ERROR_TARGET_TIMEOUT;
+                               break;
                        }
                }
        }
 
+       if (retval != ERROR_OK)
+               return retval;
+
        target->debug_reason = DBG_REASON_NOTHALTED;
        target->state = TARGET_RUNNING;
 
-       /* registers are now invalid */
-       register_cache_invalidate(arm->core_cache);
-
        return ERROR_OK;
 }
 
-static int aarch64_restore_smp(struct target *target, int handle_breakpoints)
+static int aarch64_restart_one(struct target *target, enum restart_mode mode)
 {
-       int retval = 0;
+       int retval;
+
+       LOG_DEBUG("%s", target_name(target));
+
+       retval = aarch64_prepare_restart_one(target);
+       if (retval == ERROR_OK)
+               retval = aarch64_do_restart_one(target, mode);
+
+       return retval;
+}
+
+/*
+ * prepare all but the current target for restart
+ */
+static int aarch64_prep_restart_smp(struct target *target, int handle_breakpoints, struct target **p_first)
+{
+       int retval = ERROR_OK;
        struct target_list *head;
-       struct target *curr;
+       struct target *first = NULL;
        uint64_t address;
-       head = target->head;
-       while (head != (struct target_list *)NULL) {
-               curr = head->target;
-               if ((curr != target) && (curr->state != TARGET_RUNNING)) {
-                       /*  resume current address , not in step mode */
-                       retval += aarch64_internal_restore(curr, 1, &address,
-                                       handle_breakpoints, 0);
-                       retval += aarch64_internal_restart(curr, true);
+
+       foreach_smp_target(head, target->head) {
+               struct target *curr = head->target;
+
+               /* skip calling target */
+               if (curr == target)
+                       continue;
+               if (!target_was_examined(curr))
+                       continue;
+               if (curr->state != TARGET_HALTED)
+                       continue;
+
+               /*  resume at current address, not in step mode */
+               retval = aarch64_restore_one(curr, 1, &address, handle_breakpoints, 0);
+               if (retval == ERROR_OK)
+                       retval = aarch64_prepare_restart_one(curr);
+               if (retval != ERROR_OK) {
+                       LOG_ERROR("failed to restore target %s", target_name(curr));
+                       break;
                }
-               head = head->next;
+               /* remember the first valid target in the group */
+               if (first == NULL)
+                       first = curr;
+       }
+
+       if (p_first)
+               *p_first = first;
+
+       return retval;
+}
+
+
+static int aarch64_step_restart_smp(struct target *target)
+{
+       int retval = ERROR_OK;
+       struct target_list *head;
+       struct target *first = NULL;
+
+       LOG_DEBUG("%s", target_name(target));
+
+       retval = aarch64_prep_restart_smp(target, 0, &first);
+       if (retval != ERROR_OK)
+               return retval;
 
+       if (first != NULL)
+               retval = aarch64_do_restart_one(first, RESTART_LAZY);
+       if (retval != ERROR_OK) {
+               LOG_DEBUG("error restarting target %s", target_name(first));
+               return retval;
        }
+
+       int64_t then = timeval_ms();
+       for (;;) {
+               struct target *curr = target;
+               bool all_resumed = true;
+
+               foreach_smp_target(head, target->head) {
+                       uint32_t prsr;
+                       int resumed;
+
+                       curr = head->target;
+
+                       if (curr == target)
+                               continue;
+
+                       if (!target_was_examined(curr))
+                               continue;
+
+                       retval = aarch64_check_state_one(curr,
+                                       PRSR_SDR, PRSR_SDR, &resumed, &prsr);
+                       if (retval != ERROR_OK || (!resumed && (prsr & PRSR_HALT))) {
+                               all_resumed = false;
+                               break;
+                       }
+
+                       if (curr->state != TARGET_RUNNING) {
+                               curr->state = TARGET_RUNNING;
+                               curr->debug_reason = DBG_REASON_NOTHALTED;
+                               target_call_event_callbacks(curr, TARGET_EVENT_RESUMED);
+                       }
+               }
+
+               if (all_resumed)
+                       break;
+
+               if (timeval_ms() > then + 1000) {
+                       LOG_ERROR("%s: timeout waiting for target resume", __func__);
+                       retval = ERROR_TARGET_TIMEOUT;
+                       break;
+               }
+               /*
+                * HACK: on Hi6220 there are 8 cores organized in 2 clusters
+                * and it looks like the CTI's are not connected by a common
+                * trigger matrix. It seems that we need to halt one core in each
+                * cluster explicitly. So if we find that a core has not halted
+                * yet, we trigger an explicit resume for the second cluster.
+                */
+               retval = aarch64_do_restart_one(curr, RESTART_LAZY);
+               if (retval != ERROR_OK)
+                       break;
+}
+
        return retval;
 }
 
@@ -600,24 +840,89 @@ static int aarch64_resume(struct target *target, int current,
        int retval = 0;
        uint64_t addr = address;
 
-       /* dummy resume for smp toggle in order to reduce gdb impact  */
-       if ((target->smp) && (target->gdb_service->core[1] != -1)) {
-               /*   simulate a start and halt of target */
-               target->gdb_service->target = NULL;
-               target->gdb_service->core[0] = target->gdb_service->core[1];
-               /*  fake resume at next poll we play the  target core[1], see poll*/
-               target_call_event_callbacks(target, TARGET_EVENT_RESUMED);
-               return 0;
-       }
-       aarch64_internal_restore(target, current, &addr, handle_breakpoints,
-                                debug_execution);
+       struct armv8_common *armv8 = target_to_armv8(target);
+       armv8->last_run_control_op = ARMV8_RUNCONTROL_RESUME;
+
+       if (target->state != TARGET_HALTED)
+               return ERROR_TARGET_NOT_HALTED;
+
+       /*
+        * If this target is part of a SMP group, prepare the others
+        * targets for resuming. This involves restoring the complete
+        * target register context and setting up CTI gates to accept
+        * resume events from the trigger matrix.
+        */
        if (target->smp) {
-               target->gdb_service->core[0] = -1;
-               retval = aarch64_restore_smp(target, handle_breakpoints);
+               retval = aarch64_prep_restart_smp(target, handle_breakpoints, NULL);
                if (retval != ERROR_OK)
                        return retval;
        }
-       aarch64_internal_restart(target, false);
+
+       /* all targets prepared, restore and restart the current target */
+       retval = aarch64_restore_one(target, current, &addr, handle_breakpoints,
+                                debug_execution);
+       if (retval == ERROR_OK)
+               retval = aarch64_restart_one(target, RESTART_SYNC);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (target->smp) {
+               int64_t then = timeval_ms();
+               for (;;) {
+                       struct target *curr = target;
+                       struct target_list *head;
+                       bool all_resumed = true;
+
+                       foreach_smp_target(head, target->head) {
+                               uint32_t prsr;
+                               int resumed;
+
+                               curr = head->target;
+                               if (curr == target)
+                                       continue;
+                               if (!target_was_examined(curr))
+                                       continue;
+
+                               retval = aarch64_check_state_one(curr,
+                                               PRSR_SDR, PRSR_SDR, &resumed, &prsr);
+                               if (retval != ERROR_OK || (!resumed && (prsr & PRSR_HALT))) {
+                                       all_resumed = false;
+                                       break;
+                               }
+
+                               if (curr->state != TARGET_RUNNING) {
+                                       curr->state = TARGET_RUNNING;
+                                       curr->debug_reason = DBG_REASON_NOTHALTED;
+                                       target_call_event_callbacks(curr, TARGET_EVENT_RESUMED);
+                               }
+                       }
+
+                       if (all_resumed)
+                               break;
+
+                       if (timeval_ms() > then + 1000) {
+                               LOG_ERROR("%s: timeout waiting for target %s to resume", __func__, target_name(curr));
+                               retval = ERROR_TARGET_TIMEOUT;
+                               break;
+                       }
+
+                       /*
+                        * HACK: on Hi6220 there are 8 cores organized in 2 clusters
+                        * and it looks like the CTI's are not connected by a common
+                        * trigger matrix. It seems that we need to halt one core in each
+                        * cluster explicitly. So if we find that a core has not halted
+                        * yet, we trigger an explicit resume for the second cluster.
+                        */
+                       retval = aarch64_do_restart_one(curr, RESTART_LAZY);
+                       if (retval != ERROR_OK)
+                               break;
+               }
+       }
+
+       if (retval != ERROR_OK)
+               return retval;
+
+       target->debug_reason = DBG_REASON_NOTHALTED;
 
        if (!debug_execution) {
                target->state = TARGET_RUNNING;
@@ -635,26 +940,41 @@ static int aarch64_resume(struct target *target, int current,
 static int aarch64_debug_entry(struct target *target)
 {
        int retval = ERROR_OK;
-       struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = target_to_armv8(target);
        struct arm_dpm *dpm = &armv8->dpm;
        enum arm_state core_state;
+       uint32_t dscr;
+
+       /* make sure to clear all sticky errors */
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
+       if (retval == ERROR_OK)
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       if (retval == ERROR_OK)
+               retval = arm_cti_ack_events(armv8->cti, CTI_TRIG(HALT));
+
+       if (retval != ERROR_OK)
+               return retval;
 
-       LOG_DEBUG("%s dscr = 0x%08" PRIx32, target_name(target), aarch64->cpudbg_dscr);
+       LOG_DEBUG("%s dscr = 0x%08" PRIx32, target_name(target), dscr);
 
-       dpm->dscr = aarch64->cpudbg_dscr;
+       dpm->dscr = dscr;
        core_state = armv8_dpm_get_core_state(dpm);
        armv8_select_opcodes(armv8, core_state == ARM_STATE_AARCH64);
        armv8_select_reg_access(armv8, core_state == ARM_STATE_AARCH64);
 
-       /* make sure to clear all sticky errors */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
+       /* close the CTI gate for all events */
+       if (retval == ERROR_OK)
+               retval = arm_cti_write_reg(armv8->cti, CTI_GATE, 0);
+       /* discard async exceptions */
+       if (retval == ERROR_OK)
+               retval = dpm->instr_cpsr_sync(dpm);
        if (retval != ERROR_OK)
                return retval;
 
        /* Examine debug reason */
-       armv8_dpm_report_dscr(&armv8->dpm, aarch64->cpudbg_dscr);
+       armv8_dpm_report_dscr(dpm, dscr);
 
        /* save address of instruction that triggered the watchpoint? */
        if (target->debug_reason == DBG_REASON_WATCHPOINT) {
@@ -690,55 +1010,48 @@ static int aarch64_post_debug_entry(struct target *target)
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        int retval;
-
-       /* clear sticky errors */
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-                                   armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
+       enum arm_mode target_mode = ARM_MODE_ANY;
+       uint32_t instr;
 
        switch (armv8->arm.core_mode) {
-               case ARMV8_64_EL0T:
-                       armv8_dpm_modeswitch(&armv8->dpm, ARMV8_64_EL1H);
-                       /* fall through */
-               case ARMV8_64_EL1T:
-               case ARMV8_64_EL1H:
-                       retval = armv8->arm.mrs(target, 3, /*op 0*/
-                                       0, 0,   /* op1, op2 */
-                                       1, 0,   /* CRn, CRm */
-                                       &aarch64->system_control_reg);
-                       if (retval != ERROR_OK)
-                               return retval;
+       case ARMV8_64_EL0T:
+               target_mode = ARMV8_64_EL1H;
+               /* fall through */
+       case ARMV8_64_EL1T:
+       case ARMV8_64_EL1H:
+               instr = ARMV8_MRS(SYSTEM_SCTLR_EL1, 0);
                break;
-               case ARMV8_64_EL2T:
-               case ARMV8_64_EL2H:
-                       retval = armv8->arm.mrs(target, 3, /*op 0*/
-                                       4, 0,   /* op1, op2 */
-                                       1, 0,   /* CRn, CRm */
-                                       &aarch64->system_control_reg);
-                       if (retval != ERROR_OK)
-                               return retval;
+       case ARMV8_64_EL2T:
+       case ARMV8_64_EL2H:
+               instr = ARMV8_MRS(SYSTEM_SCTLR_EL2, 0);
                break;
-               case ARMV8_64_EL3H:
-               case ARMV8_64_EL3T:
-                       retval = armv8->arm.mrs(target, 3, /*op 0*/
-                                       6, 0,   /* op1, op2 */
-                                       1, 0,   /* CRn, CRm */
-                                       &aarch64->system_control_reg);
-                       if (retval != ERROR_OK)
-                               return retval;
+       case ARMV8_64_EL3H:
+       case ARMV8_64_EL3T:
+               instr = ARMV8_MRS(SYSTEM_SCTLR_EL3, 0);
                break;
 
-               case ARM_MODE_SVC:
-                       retval = armv8->arm.mrc(target, 15, 0, 0, 1, 0, &aarch64->system_control_reg);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       break;
+       case ARM_MODE_SVC:
+       case ARM_MODE_ABT:
+       case ARM_MODE_FIQ:
+       case ARM_MODE_IRQ:
+       case ARM_MODE_SYS:
+               instr = ARMV4_5_MRC(15, 0, 0, 1, 0, 0);
+               break;
 
-               default:
-                       LOG_INFO("cannot read system control register in this mode");
-                       break;
+       default:
+               LOG_INFO("cannot read system control register in this mode");
+               return ERROR_FAIL;
        }
 
-       armv8_dpm_modeswitch(&armv8->dpm, ARM_MODE_ANY);
+       if (target_mode != ARM_MODE_ANY)
+               armv8_dpm_modeswitch(&armv8->dpm, target_mode);
+
+       retval = armv8->dpm.instr_read_data_r0(&armv8->dpm, instr, &aarch64->system_control_reg);
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (target_mode != ARM_MODE_ANY)
+               armv8_dpm_modeswitch(&armv8->dpm, ARM_MODE_ANY);
 
        LOG_DEBUG("System_register: %8.8" PRIx32, aarch64->system_control_reg);
        aarch64->system_control_reg_curr = aarch64->system_control_reg;
@@ -754,39 +1067,23 @@ static int aarch64_post_debug_entry(struct target *target)
                (aarch64->system_control_reg & 0x4U) ? 1 : 0;
        armv8->armv8_mmu.armv8_cache.i_cache_enabled =
                (aarch64->system_control_reg & 0x1000U) ? 1 : 0;
-       aarch64->curr_mode = armv8->arm.core_mode;
        return ERROR_OK;
 }
 
-static int aarch64_set_dscr_bits(struct target *target, unsigned long bit_mask, unsigned long value)
-{
-       struct armv8_common *armv8 = target_to_armv8(target);
-       uint32_t dscr;
-
-       /* Read DSCR */
-       int retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-       if (ERROR_OK != retval)
-               return retval;
-
-       /* clear bitfield */
-       dscr &= ~bit_mask;
-       /* put new value */
-       dscr |= value & bit_mask;
-
-       /* write new DSCR */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-       return retval;
-}
-
+/*
+ * single-step a target
+ */
 static int aarch64_step(struct target *target, int current, target_addr_t address,
        int handle_breakpoints)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct aarch64_common *aarch64 = target_to_aarch64(target);
+       int saved_retval = ERROR_OK;
        int retval;
        uint32_t edecr;
 
+       armv8->last_run_control_op = ARMV8_RUNCONTROL_STEP;
+
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
@@ -794,39 +1091,76 @@ static int aarch64_step(struct target *target, int current, target_addr_t addres
 
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_EDECR, &edecr);
-       if (retval != ERROR_OK)
-               return retval;
-
        /* make sure EDECR.SS is not set when restoring the register */
-       edecr &= ~0x4;
 
-       /* set EDECR.SS to enter hardware step mode */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_EDECR, (edecr|0x4));
+       if (retval == ERROR_OK) {
+               edecr &= ~0x4;
+               /* set EDECR.SS to enter hardware step mode */
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_EDECR, (edecr|0x4));
+       }
+       /* disable interrupts while stepping */
+       if (retval == ERROR_OK && aarch64->isrmasking_mode == AARCH64_ISRMASK_ON)
+               retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0x3 << 22);
+       /* bail out if stepping setup has failed */
        if (retval != ERROR_OK)
                return retval;
 
-       /* disable interrupts while stepping */
-       retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0x3 << 22);
-       if (retval != ERROR_OK)
-               return ERROR_OK;
+       if (target->smp && (current == 1)) {
+               /*
+                * isolate current target so that it doesn't get resumed
+                * together with the others
+                */
+               retval = arm_cti_gate_channel(armv8->cti, 1);
+               /* resume all other targets in the group */
+               if (retval == ERROR_OK)
+                       retval = aarch64_step_restart_smp(target);
+               if (retval != ERROR_OK) {
+                       LOG_ERROR("Failed to restart non-stepping targets in SMP group");
+                       return retval;
+               }
+               LOG_DEBUG("Restarted all non-stepping targets in SMP group");
+       }
+
+       /* all other targets running, restore and restart the current target */
+       retval = aarch64_restore_one(target, current, &address, 0, 0);
+       if (retval == ERROR_OK)
+               retval = aarch64_restart_one(target, RESTART_LAZY);
 
-       /* resume the target */
-       retval = aarch64_resume(target, current, address, 0, 0);
        if (retval != ERROR_OK)
                return retval;
 
-       long long then = timeval_ms();
-       while (target->state != TARGET_HALTED) {
-               retval = aarch64_poll(target);
-               if (retval != ERROR_OK)
-                       return retval;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("timeout waiting for target halt");
-                       return ERROR_FAIL;
+       LOG_DEBUG("target step-resumed at 0x%" PRIx64, address);
+       if (!handle_breakpoints)
+               target_call_event_callbacks(target, TARGET_EVENT_RESUMED);
+
+       int64_t then = timeval_ms();
+       for (;;) {
+               int stepped;
+               uint32_t prsr;
+
+               retval = aarch64_check_state_one(target,
+                                       PRSR_SDR|PRSR_HALT, PRSR_SDR|PRSR_HALT, &stepped, &prsr);
+               if (retval != ERROR_OK || stepped)
+                       break;
+
+               if (timeval_ms() > then + 100) {
+                       LOG_ERROR("timeout waiting for target %s halt after step",
+                                       target_name(target));
+                       retval = ERROR_TARGET_TIMEOUT;
+                       break;
                }
        }
 
+       /*
+        * At least on one SoC (Renesas R8A7795) stepping over a WFI instruction
+        * causes a timeout. The core takes the step but doesn't complete it and so
+        * debug state is never entered. However, you can manually halt the core
+        * as an external debug even is also a WFI wakeup event.
+        */
+       if (retval == ERROR_TARGET_TIMEOUT)
+               saved_retval = aarch64_halt_one(target, HALT_SYNC);
+
        /* restore EDECR */
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_EDECR, edecr);
@@ -834,24 +1168,38 @@ static int aarch64_step(struct target *target, int current, target_addr_t addres
                return retval;
 
        /* restore interrupts */
-       retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0);
-       if (retval != ERROR_OK)
-               return ERROR_OK;
+       if (aarch64->isrmasking_mode == AARCH64_ISRMASK_ON) {
+               retval = aarch64_set_dscr_bits(target, 0x3 << 22, 0);
+               if (retval != ERROR_OK)
+                       return ERROR_OK;
+       }
 
-       return ERROR_OK;
+       if (saved_retval != ERROR_OK)
+               return saved_retval;
+
+       return aarch64_poll(target);
 }
 
 static int aarch64_restore_context(struct target *target, bool bpwp)
 {
        struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm *arm = &armv8->arm;
 
-       LOG_DEBUG(" ");
+       int retval;
+
+       LOG_DEBUG("%s", target_name(target));
 
        if (armv8->pre_restore_context)
                armv8->pre_restore_context(target);
 
-       return armv8_dpm_write_dirty_registers(&armv8->dpm, bpwp);
+       retval = armv8_dpm_write_dirty_registers(&armv8->dpm, bpwp);
+       if (retval == ERROR_OK) {
+               /* registers are now invalid */
+               register_cache_invalidate(arm->core_cache);
+               register_cache_invalidate(arm->core_cache->next);
+       }
 
+       return retval;
 }
 
 /*
@@ -918,7 +1266,7 @@ static int aarch64_set_breakpoint(struct target *target,
        } else if (breakpoint->type == BKPT_SOFT) {
                uint8_t code[4];
 
-               buf_set_u32(code, 0, 32, ARMV8_HLT(0x11));
+               buf_set_u32(code, 0, 32, armv8_opcode(armv8, ARMV8_OPC_HLT));
                retval = target_read_memory(target,
                                breakpoint->address & 0xFFFFFFFFFFFFFFFE,
                                breakpoint->length, 1,
@@ -1323,7 +1671,10 @@ static int aarch64_assert_reset(struct target *target)
        }
 
        /* registers are now invalid */
-       register_cache_invalidate(armv8->arm.core_cache);
+       if (target_was_examined(target)) {
+               register_cache_invalidate(armv8->arm.core_cache);
+               register_cache_invalidate(armv8->arm.core_cache->next);
+       }
 
        target->state = TARGET_RESET;
 
@@ -1339,24 +1690,121 @@ static int aarch64_deassert_reset(struct target *target)
        /* be certain SRST is off */
        jtag_add_reset(0, 0);
 
+       if (!target_was_examined(target))
+               return ERROR_OK;
+
        retval = aarch64_poll(target);
        if (retval != ERROR_OK)
                return retval;
 
+       retval = aarch64_init_debug_access(target);
+       if (retval != ERROR_OK)
+               return retval;
+
        if (target->reset_halt) {
                if (target->state != TARGET_HALTED) {
                        LOG_WARNING("%s: ran after reset and before halt ...",
                                target_name(target));
                        retval = target_halt(target);
-                       if (retval != ERROR_OK)
-                               return retval;
                }
        }
 
+       return retval;
+}
+
+static int aarch64_write_cpu_memory_slow(struct target *target,
+       uint32_t size, uint32_t count, const uint8_t *buffer, uint32_t *dscr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
+       struct arm *arm = &armv8->arm;
+       int retval;
+
+       armv8_reg_current(arm, 1)->dirty = true;
+
+       /* change DCC to normal mode if necessary */
+       if (*dscr & DSCR_MA) {
+               *dscr &= ~DSCR_MA;
+               retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
+
+       while (count) {
+               uint32_t data, opcode;
+
+               /* write the data to store into DTRRX */
+               if (size == 1)
+                       data = *buffer;
+               else if (size == 2)
+                       data = target_buffer_get_u16(target, buffer);
+               else
+                       data = target_buffer_get_u32(target, buffer);
+               retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DTRRX, data);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (arm->core_state == ARM_STATE_AARCH64)
+                       retval = dpm->instr_execute(dpm, ARMV8_MRS(SYSTEM_DBG_DTRRX_EL0, 1));
+               else
+                       retval = dpm->instr_execute(dpm, ARMV4_5_MRC(14, 0, 1, 0, 5, 0));
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (size == 1)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_STRB_IP);
+               else if (size == 2)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_STRH_IP);
+               else
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_STRW_IP);
+               retval = dpm->instr_execute(dpm, opcode);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Advance */
+               buffer += size;
+               --count;
+       }
+
+       return ERROR_OK;
+}
+
+static int aarch64_write_cpu_memory_fast(struct target *target,
+       uint32_t count, const uint8_t *buffer, uint32_t *dscr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm *arm = &armv8->arm;
+       int retval;
+
+       armv8_reg_current(arm, 1)->dirty = true;
+
+       /* Step 1.d   - Change DCC to memory mode */
+       *dscr |= DSCR_MA;
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+
+       /* Step 2.a   - Do the write */
+       retval = mem_ap_write_buf_noincr(armv8->debug_ap,
+                                       buffer, 4, count, armv8->debug_base + CPUV8_DBG_DTRRX);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Step 3.a   - Switch DTR mode back to Normal mode */
+       *dscr &= ~DSCR_MA;
+       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
        return ERROR_OK;
 }
 
-static int aarch64_write_apb_ap_memory(struct target *target,
+static int aarch64_write_cpu_memory(struct target *target,
        uint64_t address, uint32_t size,
        uint32_t count, const uint8_t *buffer)
 {
@@ -1365,155 +1813,213 @@ static int aarch64_write_apb_ap_memory(struct target *target,
        struct armv8_common *armv8 = target_to_armv8(target);
        struct arm_dpm *dpm = &armv8->dpm;
        struct arm *arm = &armv8->arm;
-       int total_bytes = count * size;
-       int total_u32;
-       int start_byte = address & 0x3;
-       int end_byte   = (address + total_bytes) & 0x3;
-       struct reg *reg;
        uint32_t dscr;
-       uint8_t *tmp_buff = NULL;
 
-       LOG_DEBUG("Writing APB-AP memory address 0x%" PRIx64 " size %"  PRIu32 " count%"  PRIu32,
-                         address, size, count);
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
        }
 
-       total_u32 = DIV_ROUND_UP((address & 3) + total_bytes, 4);
-
-       /* Mark register R0 as dirty, as it will be used
+       /* Mark register X0 as dirty, as it will be used
         * for transferring the data.
         * It will be restored automatically when exiting
         * debug mode
         */
-       reg = armv8_reg_current(arm, 1);
-       reg->dirty = true;
-
-       reg = armv8_reg_current(arm, 0);
-       reg->dirty = true;
-
-       /*  clear any abort  */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
-       if (retval != ERROR_OK)
-               return retval;
-
+       armv8_reg_current(arm, 0)->dirty = true;
 
        /* This algorithm comes from DDI0487A.g, chapter J9.1 */
 
-       /* The algorithm only copies 32 bit words, so the buffer
-        * should be expanded to include the words at either end.
-        * The first and last words will be read first to avoid
-        * corruption if needed.
-        */
-       tmp_buff = malloc(total_u32 * 4);
-
-       if ((start_byte != 0) && (total_u32 > 1)) {
-               /* First bytes not aligned - read the 32 bit word to avoid corrupting
-                * the other bytes in the word.
-                */
-               retval = aarch64_read_apb_ap_memory(target, (address & ~0x3), 4, 1, tmp_buff);
-               if (retval != ERROR_OK)
-                       goto error_free_buff_w;
-       }
-
-       /* If end of write is not aligned, or the write is less than 4 bytes */
-       if ((end_byte != 0) ||
-               ((total_u32 == 1) && (total_bytes != 4))) {
-
-               /* Read the last word to avoid corruption during 32 bit write */
-               int mem_offset = (total_u32-1) * 4;
-               retval = aarch64_read_apb_ap_memory(target, (address & ~0x3) + mem_offset, 4, 1, &tmp_buff[mem_offset]);
-               if (retval != ERROR_OK)
-                       goto error_free_buff_w;
-       }
-
-       /* Copy the write buffer over the top of the temporary buffer */
-       memcpy(&tmp_buff[start_byte], buffer, total_bytes);
-
-       /* We now have a 32 bit aligned buffer that can be written */
-
        /* Read DSCR */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
-               goto error_free_buff_w;
+               return retval;
 
        /* Set Normal access mode  */
        dscr = (dscr & ~DSCR_MA);
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       if (retval != ERROR_OK)
+               return retval;
 
        if (arm->core_state == ARM_STATE_AARCH64) {
                /* Write X0 with value 'address' using write procedure */
                /* Step 1.a+b - Write the address for read access into DBGDTR_EL0 */
                /* Step 1.c   - Copy value from DTR to R0 using instruction mrs DBGDTR_EL0, x0 */
                retval = dpm->instr_write_data_dcc_64(dpm,
-                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), address & ~0x3ULL);
+                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), address);
        } else {
                /* Write R0 with value 'address' using write procedure */
                /* Step 1.a+b - Write the address for read access into DBGDTRRX */
                /* Step 1.c   - Copy value from DTR to R0 using instruction mrc DBGDTRTXint, r0 */
-               dpm->instr_write_data_dcc(dpm,
-                               ARMV4_5_MRC(14, 0, 0, 0, 5, 0), address & ~0x3ULL);
+               retval = dpm->instr_write_data_dcc(dpm,
+                               ARMV4_5_MRC(14, 0, 0, 0, 5, 0), address);
+       }
+
+       if (retval != ERROR_OK)
+               return retval;
+
+       if (size == 4 && (address % 4) == 0)
+               retval = aarch64_write_cpu_memory_fast(target, count, buffer, &dscr);
+       else
+               retval = aarch64_write_cpu_memory_slow(target, size, count, buffer, &dscr);
+
+       if (retval != ERROR_OK) {
+               /* Unset DTR mode */
+               mem_ap_read_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+               dscr &= ~DSCR_MA;
+               mem_ap_write_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       }
+
+       /* Check for sticky abort flags in the DSCR */
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       dpm->dscr = dscr;
+       if (dscr & (DSCR_ERR | DSCR_SYS_ERROR_PEND)) {
+               /* Abort occurred - clear it and exit */
+               LOG_ERROR("abort occurred - dscr = 0x%08" PRIx32, dscr);
+               armv8_dpm_handle_exception(dpm, true);
+               return ERROR_FAIL;
+       }
+
+       /* Done */
+       return ERROR_OK;
+}
+
+static int aarch64_read_cpu_memory_slow(struct target *target,
+       uint32_t size, uint32_t count, uint8_t *buffer, uint32_t *dscr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
+       struct arm *arm = &armv8->arm;
+       int retval;
+
+       armv8_reg_current(arm, 1)->dirty = true;
+
+       /* change DCC to normal mode (if necessary) */
+       if (*dscr & DSCR_MA) {
+               *dscr &= DSCR_MA;
+               retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
+
+       while (count) {
+               uint32_t opcode, data;
+
+               if (size == 1)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_LDRB_IP);
+               else if (size == 2)
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_LDRH_IP);
+               else
+                       opcode = armv8_opcode(armv8, ARMV8_OPC_LDRW_IP);
+               retval = dpm->instr_execute(dpm, opcode);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (arm->core_state == ARM_STATE_AARCH64)
+                       retval = dpm->instr_execute(dpm, ARMV8_MSR_GP(SYSTEM_DBG_DTRTX_EL0, 1));
+               else
+                       retval = dpm->instr_execute(dpm, ARMV4_5_MCR(14, 0, 1, 0, 5, 0));
+               if (retval != ERROR_OK)
+                       return retval;
+
+               retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                               armv8->debug_base + CPUV8_DBG_DTRTX, &data);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               if (size == 1)
+                       *buffer = (uint8_t)data;
+               else if (size == 2)
+                       target_buffer_set_u16(target, buffer, (uint16_t)data);
+               else
+                       target_buffer_set_u32(target, buffer, data);
+
+               /* Advance */
+               buffer += size;
+               --count;
+       }
+
+       return ERROR_OK;
+}
+
+static int aarch64_read_cpu_memory_fast(struct target *target,
+       uint32_t count, uint8_t *buffer, uint32_t *dscr)
+{
+       struct armv8_common *armv8 = target_to_armv8(target);
+       struct arm_dpm *dpm = &armv8->dpm;
+       struct arm *arm = &armv8->arm;
+       int retval;
+       uint32_t value;
 
-       }
-       /* Step 1.d   - Change DCC to memory mode */
-       dscr = dscr | DSCR_MA;
-       retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-       if (retval != ERROR_OK)
-               goto error_unset_dtr_w;
+       /* Mark X1 as dirty */
+       armv8_reg_current(arm, 1)->dirty = true;
 
+       if (arm->core_state == ARM_STATE_AARCH64) {
+               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
+               retval = dpm->instr_execute(dpm, ARMV8_MSR_GP(SYSTEM_DBG_DBGDTR_EL0, 0));
+       } else {
+               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
+               retval = dpm->instr_execute(dpm, ARMV4_5_MCR(14, 0, 0, 0, 5, 0));
+       }
 
-       /* Step 2.a   - Do the write */
-       retval = mem_ap_write_buf_noincr(armv8->debug_ap,
-                                       tmp_buff, 4, total_u32, armv8->debug_base + CPUV8_DBG_DTRRX);
        if (retval != ERROR_OK)
-               goto error_unset_dtr_w;
+               return retval;
 
-       /* Step 3.a   - Switch DTR mode back to Normal mode */
-       dscr = (dscr & ~DSCR_MA);
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       /* Step 1.e - Change DCC to memory mode */
+       *dscr |= DSCR_MA;
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
        if (retval != ERROR_OK)
-               goto error_unset_dtr_w;
+               return retval;
 
-       /* Check for sticky abort flags in the DSCR */
+       /* Step 1.f - read DBGDTRTX and discard the value */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+                       armv8->debug_base + CPUV8_DBG_DTRTX, &value);
        if (retval != ERROR_OK)
-               goto error_free_buff_w;
+               return retval;
 
-       dpm->dscr = dscr;
-       if (dscr & (DSCR_ERR | DSCR_SYS_ERROR_PEND)) {
-               /* Abort occurred - clear it and exit */
-               LOG_ERROR("abort occurred - dscr = 0x%08" PRIx32, dscr);
-               mem_ap_write_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUV8_DBG_DRCR, 1<<2);
-               armv8_dpm_handle_exception(dpm);
-               goto error_free_buff_w;
+       count--;
+       /* Read the data - Each read of the DTRTX register causes the instruction to be reissued
+        * Abort flags are sticky, so can be read at end of transactions
+        *
+        * This data is read in aligned to 32 bit boundary.
+        */
+
+       if (count) {
+               /* Step 2.a - Loop n-1 times, each read of DBGDTRTX reads the data from [X0] and
+                * increments X0 by 4. */
+               retval = mem_ap_read_buf_noincr(armv8->debug_ap, buffer, 4, count,
+                                                                       armv8->debug_base + CPUV8_DBG_DTRTX);
+               if (retval != ERROR_OK)
+                       return retval;
        }
 
-       /* Done */
-       free(tmp_buff);
-       return ERROR_OK;
+       /* Step 3.a - set DTR access mode back to Normal mode   */
+       *dscr &= ~DSCR_MA;
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+                                       armv8->debug_base + CPUV8_DBG_DSCR, *dscr);
+       if (retval != ERROR_OK)
+               return retval;
 
-error_unset_dtr_w:
-       /* Unset DTR mode */
-       mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-       dscr = (dscr & ~DSCR_MA);
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-error_free_buff_w:
-       LOG_ERROR("error");
-       free(tmp_buff);
-       return ERROR_FAIL;
+       /* Step 3.b - read DBGDTRTX for the final value */
+       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DTRTX, &value);
+       if (retval != ERROR_OK)
+               return retval;
+
+       target_buffer_set_u32(target, buffer + count * 4, value);
+       return retval;
 }
 
-static int aarch64_read_apb_ap_memory(struct target *target,
+static int aarch64_read_cpu_memory(struct target *target,
        target_addr_t address, uint32_t size,
        uint32_t count, uint8_t *buffer)
 {
@@ -1522,166 +2028,86 @@ static int aarch64_read_apb_ap_memory(struct target *target,
        struct armv8_common *armv8 = target_to_armv8(target);
        struct arm_dpm *dpm = &armv8->dpm;
        struct arm *arm = &armv8->arm;
-       int total_bytes = count * size;
-       int total_u32;
-       int start_byte = address & 0x3;
-       int end_byte   = (address + total_bytes) & 0x3;
-       struct reg *reg;
        uint32_t dscr;
-       uint8_t *tmp_buff = NULL;
-       uint8_t *u8buf_ptr;
-       uint32_t value;
 
-       LOG_DEBUG("Reading APB-AP memory address 0x%" TARGET_PRIxADDR " size %" PRIu32 " count%"  PRIu32,
-                         address, size, count);
+       LOG_DEBUG("Reading CPU memory address 0x%016" PRIx64 " size %" PRIu32 " count %" PRIu32,
+                       address, size, count);
+
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
        }
 
-       total_u32 = DIV_ROUND_UP((address & 3) + total_bytes, 4);
-       /* Mark register X0, X1 as dirty, as it will be used
+       /* Mark register X0 as dirty, as it will be used
         * for transferring the data.
         * It will be restored automatically when exiting
         * debug mode
         */
-       reg = armv8_reg_current(arm, 1);
-       reg->dirty = true;
-
-       reg = armv8_reg_current(arm, 0);
-       reg->dirty = true;
-
-       /*      clear any abort  */
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
-       if (retval != ERROR_OK)
-               goto error_free_buff_r;
+       armv8_reg_current(arm, 0)->dirty = true;
 
        /* Read DSCR */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                                armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
+       if (retval != ERROR_OK)
+               return retval;
 
        /* This algorithm comes from DDI0487A.g, chapter J9.1 */
 
        /* Set Normal access mode  */
-       dscr = (dscr & ~DSCR_MA);
-       retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
+       dscr &= ~DSCR_MA;
+       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_DSCR, dscr);
+       if (retval != ERROR_OK)
+               return retval;
 
        if (arm->core_state == ARM_STATE_AARCH64) {
                /* Write X0 with value 'address' using write procedure */
                /* Step 1.a+b - Write the address for read access into DBGDTR_EL0 */
                /* Step 1.c   - Copy value from DTR to R0 using instruction mrs DBGDTR_EL0, x0 */
-               retval += dpm->instr_write_data_dcc_64(dpm,
-                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), address & ~0x3ULL);
-               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
-               retval += dpm->instr_execute(dpm, ARMV8_MSR_GP(SYSTEM_DBG_DBGDTR_EL0, 0));
-               /* Step 1.e - Change DCC to memory mode */
-               dscr = dscr | DSCR_MA;
-               retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-               /* Step 1.f - read DBGDTRTX and discard the value */
-               retval += mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DTRTX, &value);
+               retval = dpm->instr_write_data_dcc_64(dpm,
+                               ARMV8_MRS(SYSTEM_DBG_DBGDTR_EL0, 0), address);
        } else {
                /* Write R0 with value 'address' using write procedure */
                /* Step 1.a+b - Write the address for read access into DBGDTRRXint */
                /* Step 1.c   - Copy value from DTR to R0 using instruction mrc DBGDTRTXint, r0 */
-               retval += dpm->instr_write_data_dcc(dpm,
-                               ARMV4_5_MRC(14, 0, 0, 0, 5, 0), address & ~0x3ULL);
-               /* Step 1.d - Dummy operation to ensure EDSCR.Txfull == 1 */
-               retval += dpm->instr_execute(dpm, ARMV4_5_MCR(14, 0, 0, 0, 5, 0));
-               /* Step 1.e - Change DCC to memory mode */
-               dscr = dscr | DSCR_MA;
-               retval +=  mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-               /* Step 1.f - read DBGDTRTX and discard the value */
-               retval += mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DTRTX, &value);
-
+               retval = dpm->instr_write_data_dcc(dpm,
+                               ARMV4_5_MRC(14, 0, 0, 0, 5, 0), address);
        }
-       if (retval != ERROR_OK)
-               goto error_unset_dtr_r;
-
-       /* Optimize the read as much as we can, either way we read in a single pass  */
-       if ((start_byte) || (end_byte)) {
-               /* The algorithm only copies 32 bit words, so the buffer
-                * should be expanded to include the words at either end.
-                * The first and last words will be read into a temp buffer
-                * to avoid corruption
-                */
-               tmp_buff = malloc(total_u32 * 4);
-               if (!tmp_buff)
-                       goto error_unset_dtr_r;
-
-               /* use the tmp buffer to read the entire data */
-               u8buf_ptr = tmp_buff;
-       } else
-               /* address and read length are aligned so read directly into the passed buffer */
-               u8buf_ptr = buffer;
-
-       /* Read the data - Each read of the DTRTX register causes the instruction to be reissued
-        * Abort flags are sticky, so can be read at end of transactions
-        *
-        * This data is read in aligned to 32 bit boundary.
-        */
 
-       /* Step 2.a - Loop n-1 times, each read of DBGDTRTX reads the data from [X0] and
-        * increments X0 by 4. */
-       retval = mem_ap_read_buf_noincr(armv8->debug_ap, u8buf_ptr, 4, total_u32-1,
-                                                                       armv8->debug_base + CPUV8_DBG_DTRTX);
        if (retval != ERROR_OK)
-                       goto error_unset_dtr_r;
+               return retval;
 
-       /* Step 3.a - set DTR access mode back to Normal mode   */
-       dscr = (dscr & ~DSCR_MA);
-       retval =  mem_ap_write_atomic_u32(armv8->debug_ap,
+       if (size == 4 && (address % 4) == 0)
+               retval = aarch64_read_cpu_memory_fast(target, count, buffer, &dscr);
+       else
+               retval = aarch64_read_cpu_memory_slow(target, size, count, buffer, &dscr);
+
+       if (dscr & DSCR_MA) {
+               dscr &= ~DSCR_MA;
+               mem_ap_write_atomic_u32(armv8->debug_ap,
                                        armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-       if (retval != ERROR_OK)
-               goto error_free_buff_r;
+       }
 
-       /* Step 3.b - read DBGDTRTX for the final value */
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DTRTX, &value);
-       memcpy(u8buf_ptr + (total_u32-1) * 4, &value, 4);
+       if (retval != ERROR_OK)
+               return retval;
 
        /* Check for sticky abort flags in the DSCR */
        retval = mem_ap_read_atomic_u32(armv8->debug_ap,
                                armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
        if (retval != ERROR_OK)
-               goto error_free_buff_r;
+               return retval;
 
        dpm->dscr = dscr;
 
        if (dscr & (DSCR_ERR | DSCR_SYS_ERROR_PEND)) {
                /* Abort occurred - clear it and exit */
                LOG_ERROR("abort occurred - dscr = 0x%08" PRIx32, dscr);
-               mem_ap_write_atomic_u32(armv8->debug_ap,
-                                       armv8->debug_base + CPUV8_DBG_DRCR, DRCR_CSE);
-               armv8_dpm_handle_exception(dpm);
-               goto error_free_buff_r;
-       }
-
-       /* check if we need to copy aligned data by applying any shift necessary */
-       if (tmp_buff) {
-               memcpy(buffer, tmp_buff + start_byte, total_bytes);
-               free(tmp_buff);
+               armv8_dpm_handle_exception(dpm, true);
+               return ERROR_FAIL;
        }
 
        /* Done */
        return ERROR_OK;
-
-error_unset_dtr_r:
-       /* Unset DTR mode */
-       mem_ap_read_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, &dscr);
-       dscr = (dscr & ~DSCR_MA);
-       mem_ap_write_atomic_u32(armv8->debug_ap,
-                               armv8->debug_base + CPUV8_DBG_DSCR, dscr);
-error_free_buff_r:
-       LOG_ERROR("error");
-       free(tmp_buff);
-       return ERROR_FAIL;
 }
 
 static int aarch64_read_phys_memory(struct target *target,
@@ -1689,15 +2115,13 @@ static int aarch64_read_phys_memory(struct target *target,
        uint32_t count, uint8_t *buffer)
 {
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
-       LOG_DEBUG("Reading memory at real address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32,
-               address, size, count);
 
        if (count && buffer) {
                /* read memory through APB-AP */
                retval = aarch64_mmu_modify(target, 0);
                if (retval != ERROR_OK)
                        return retval;
-               retval = aarch64_read_apb_ap_memory(target, address, size, count, buffer);
+               retval = aarch64_read_cpu_memory(target, address, size, count, buffer);
        }
        return retval;
 }
@@ -1708,25 +2132,18 @@ static int aarch64_read_memory(struct target *target, target_addr_t address,
        int mmu_enabled = 0;
        int retval;
 
-       /* aarch64 handles unaligned memory access */
-       LOG_DEBUG("Reading memory at address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32, address,
-               size, count);
-
        /* determine if MMU was enabled on target stop */
        retval = aarch64_mmu(target, &mmu_enabled);
        if (retval != ERROR_OK)
                return retval;
 
        if (mmu_enabled) {
-               retval = aarch64_check_address(target, address);
-               if (retval != ERROR_OK)
-                       return retval;
                /* enable MMU as we could have disabled it for phys access */
                retval = aarch64_mmu_modify(target, 1);
                if (retval != ERROR_OK)
                        return retval;
        }
-       return aarch64_read_apb_ap_memory(target, address, size, count, buffer);
+       return aarch64_read_cpu_memory(target, address, size, count, buffer);
 }
 
 static int aarch64_write_phys_memory(struct target *target,
@@ -1735,15 +2152,12 @@ static int aarch64_write_phys_memory(struct target *target,
 {
        int retval = ERROR_COMMAND_SYNTAX_ERROR;
 
-       LOG_DEBUG("Writing memory to real address 0x%" TARGET_PRIxADDR "; size %" PRId32 "; count %" PRId32, address,
-               size, count);
-
        if (count && buffer) {
                /* write memory through APB-AP */
                retval = aarch64_mmu_modify(target, 0);
                if (retval != ERROR_OK)
                        return retval;
-               return aarch64_write_apb_ap_memory(target, address, size, count, buffer);
+               return aarch64_write_cpu_memory(target, address, size, count, buffer);
        }
 
        return retval;
@@ -1755,25 +2169,18 @@ static int aarch64_write_memory(struct target *target, target_addr_t address,
        int mmu_enabled = 0;
        int retval;
 
-       /* aarch64 handles unaligned memory access */
-       LOG_DEBUG("Writing memory at address 0x%" TARGET_PRIxADDR "; size %" PRId32
-                 "; count %" PRId32, address, size, count);
-
        /* determine if MMU was enabled on target stop */
        retval = aarch64_mmu(target, &mmu_enabled);
        if (retval != ERROR_OK)
                return retval;
 
        if (mmu_enabled) {
-               retval = aarch64_check_address(target, address);
-               if (retval != ERROR_OK)
-                       return retval;
                /* enable MMU as we could have disabled it for phys access */
                retval = aarch64_mmu_modify(target, 1);
                if (retval != ERROR_OK)
                        return retval;
        }
-       return aarch64_write_apb_ap_memory(target, address, size, count, buffer);
+       return aarch64_write_cpu_memory(target, address, size, count, buffer);
 }
 
 static int aarch64_handle_target_request(void *priv)
@@ -1813,20 +2220,14 @@ static int aarch64_examine_first(struct target *target)
        struct aarch64_common *aarch64 = target_to_aarch64(target);
        struct armv8_common *armv8 = &aarch64->armv8_common;
        struct adiv5_dap *swjdp = armv8->arm.dap;
+       struct aarch64_private_config *pc;
        int i;
        int retval = ERROR_OK;
        uint64_t debug, ttypr;
        uint32_t cpuid;
-       uint32_t tmp0, tmp1;
+       uint32_t tmp0, tmp1, tmp2, tmp3;
        debug = ttypr = cpuid = 0;
 
-       /* We do one extra read to ensure DAP is configured,
-        * we call ahbap_debugport_init(swjdp) instead
-        */
-       retval = dap_dp_init(swjdp);
-       if (retval != ERROR_OK)
-               return retval;
-
        /* Search for the APB-AB - it is needed for access to debug registers */
        retval = dap_find_ap(swjdp, AP_TYPE_APB_AP, &armv8->debug_ap);
        if (retval != ERROR_OK) {
@@ -1840,7 +2241,7 @@ static int aarch64_examine_first(struct target *target)
                return retval;
        }
 
-       armv8->debug_ap->memaccess_tck = 80;
+       armv8->debug_ap->memaccess_tck = 10;
 
        if (!target->dbgbase_set) {
                uint32_t dbgbase;
@@ -1860,13 +2261,6 @@ static int aarch64_examine_first(struct target *target)
        } else
                armv8->debug_base = target->dbgbase;
 
-       retval = mem_ap_write_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_LOCKACCESS, 0xC5ACCE55);
-       if (retval != ERROR_OK) {
-               LOG_DEBUG("LOCK debug access fail");
-               return retval;
-       }
-
        retval = mem_ap_write_atomic_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_OSLAR, 0);
        if (retval != ERROR_OK) {
@@ -1874,47 +2268,54 @@ static int aarch64_examine_first(struct target *target)
                return retval;
        }
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+       retval = mem_ap_read_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_MAINID0, &cpuid);
        if (retval != ERROR_OK) {
                LOG_DEBUG("Examine %s failed", "CPUID");
                return retval;
        }
 
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
+       retval = mem_ap_read_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_MEMFEATURE0, &tmp0);
-       retval += mem_ap_read_atomic_u32(armv8->debug_ap,
+       retval += mem_ap_read_u32(armv8->debug_ap,
                        armv8->debug_base + CPUV8_DBG_MEMFEATURE0 + 4, &tmp1);
        if (retval != ERROR_OK) {
                LOG_DEBUG("Examine %s failed", "Memory Model Type");
                return retval;
        }
-       ttypr |= tmp1;
-       ttypr = (ttypr << 32) | tmp0;
-
-       retval = mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DBGFEATURE0, &tmp0);
-       retval += mem_ap_read_atomic_u32(armv8->debug_ap,
-                       armv8->debug_base + CPUV8_DBG_DBGFEATURE0 + 4, &tmp1);
+       retval = mem_ap_read_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DBGFEATURE0, &tmp2);
+       retval += mem_ap_read_u32(armv8->debug_ap,
+                       armv8->debug_base + CPUV8_DBG_DBGFEATURE0 + 4, &tmp3);
        if (retval != ERROR_OK) {
                LOG_DEBUG("Examine %s failed", "ID_AA64DFR0_EL1");
                return retval;
        }
-       debug |= tmp1;
-       debug = (debug << 32) | tmp0;
+
+       retval = dap_run(armv8->debug_ap->dap);
+       if (retval != ERROR_OK) {
+               LOG_ERROR("%s: examination failed\n", target_name(target));
+               return retval;
+       }
+
+       ttypr |= tmp1;
+       ttypr = (ttypr << 32) | tmp0;
+       debug |= tmp3;
+       debug = (debug << 32) | tmp2;
 
        LOG_DEBUG("cpuid = 0x%08" PRIx32, cpuid);
        LOG_DEBUG("ttypr = 0x%08" PRIx64, ttypr);
        LOG_DEBUG("debug = 0x%08" PRIx64, debug);
 
-       if (target->ctibase == 0) {
-               /* assume a v8 rom table layout */
-               armv8->cti_base = target->ctibase = armv8->debug_base + 0x10000;
-               LOG_INFO("Target ctibase is not set, assuming 0x%0" PRIx32, target->ctibase);
-       } else
-               armv8->cti_base = target->ctibase;
+       if (target->private_config == NULL)
+               return ERROR_FAIL;
+
+       pc = (struct aarch64_private_config *)target->private_config;
+       if (pc->cti == NULL)
+               return ERROR_FAIL;
+
+       armv8->cti = pc->cti;
 
-       armv8->arm.core_type = ARM_MODE_MON;
        retval = aarch64_dpm_setup(aarch64, debug);
        if (retval != ERROR_OK)
                return retval;
@@ -1937,6 +2338,9 @@ static int aarch64_examine_first(struct target *target)
 
        LOG_DEBUG("Configured %i hw breakpoints", aarch64->brp_num);
 
+       target->state = TARGET_UNKNOWN;
+       target->debug_reason = DBG_REASON_NOTHALTED;
+       aarch64->isrmasking_mode = AARCH64_ISRMASK_ON;
        target_set_examined(target);
        return ERROR_OK;
 }
@@ -1964,58 +2368,67 @@ static int aarch64_init_target(struct command_context *cmd_ctx,
        struct target *target)
 {
        /* examine_first() does a bunch of this */
+       arm_semihosting_init(target);
        return ERROR_OK;
 }
 
 static int aarch64_init_arch_info(struct target *target,
-       struct aarch64_common *aarch64, struct jtag_tap *tap)
+       struct aarch64_common *aarch64, struct adiv5_dap *dap)
 {
        struct armv8_common *armv8 = &aarch64->armv8_common;
-       struct adiv5_dap *dap = armv8->arm.dap;
-
-       armv8->arm.dap = dap;
 
        /* Setup struct aarch64_common */
        aarch64->common_magic = AARCH64_COMMON_MAGIC;
-       /*  tap has no dap initialized */
-       if (!tap->dap) {
-               tap->dap = dap_init();
-
-               /* Leave (only) generic DAP stuff for debugport_init() */
-               tap->dap->tap = tap;
-       }
-
-       armv8->arm.dap = tap->dap;
-
-       aarch64->fast_reg_read = 0;
+       armv8->arm.dap = dap;
 
        /* register arch-specific functions */
        armv8->examine_debug_reason = NULL;
-
        armv8->post_debug_entry = aarch64_post_debug_entry;
-
        armv8->pre_restore_context = NULL;
-
        armv8->armv8_mmu.read_physical_memory = aarch64_read_phys_memory;
 
-       /* REVISIT v7a setup should be in a v7a-specific routine */
        armv8_init_arch_info(target, armv8);
-       target_register_timer_callback(aarch64_handle_target_request, 1, 1, target);
+       target_register_timer_callback(aarch64_handle_target_request, 1,
+               TARGET_TIMER_TYPE_PERIODIC, target);
 
        return ERROR_OK;
 }
 
 static int aarch64_target_create(struct target *target, Jim_Interp *interp)
 {
-       struct aarch64_common *aarch64 = calloc(1, sizeof(struct aarch64_common));
+       struct aarch64_private_config *pc = target->private_config;
+       struct aarch64_common *aarch64;
+
+       if (adiv5_verify_config(&pc->adiv5_config) != ERROR_OK)
+               return ERROR_FAIL;
+
+       aarch64 = calloc(1, sizeof(struct aarch64_common));
+       if (aarch64 == NULL) {
+               LOG_ERROR("Out of memory");
+               return ERROR_FAIL;
+       }
+
+       return aarch64_init_arch_info(target, aarch64, pc->adiv5_config.dap);
+}
+
+static void aarch64_deinit_target(struct target *target)
+{
+       struct aarch64_common *aarch64 = target_to_aarch64(target);
+       struct armv8_common *armv8 = &aarch64->armv8_common;
+       struct arm_dpm *dpm = &armv8->dpm;
 
-       return aarch64_init_arch_info(target, aarch64, target->tap);
+       armv8_free_reg_cache(target);
+       free(aarch64->brp_list);
+       free(dpm->dbp);
+       free(dpm->dwp);
+       free(target->private_config);
+       free(aarch64);
 }
 
 static int aarch64_mmu(struct target *target, int *enabled)
 {
        if (target->state != TARGET_HALTED) {
-               LOG_ERROR("%s: target not halted", __func__);
+               LOG_ERROR("%s: target %s not halted", __func__, target_name(target));
                return ERROR_TARGET_INVALID;
        }
 
@@ -2029,6 +2442,94 @@ static int aarch64_virt2phys(struct target *target, target_addr_t virt,
        return armv8_mmu_translate_va_pa(target, virt, phys, 1);
 }
 
+/*
+ * private target configuration items
+ */
+enum aarch64_cfg_param {
+       CFG_CTI,
+};
+
+static const Jim_Nvp nvp_config_opts[] = {
+       { .name = "-cti", .value = CFG_CTI },
+       { .name = NULL, .value = -1 }
+};
+
+static int aarch64_jim_configure(struct target *target, Jim_GetOptInfo *goi)
+{
+       struct aarch64_private_config *pc;
+       Jim_Nvp *n;
+       int e;
+
+       pc = (struct aarch64_private_config *)target->private_config;
+       if (pc == NULL) {
+                       pc = calloc(1, sizeof(struct aarch64_private_config));
+                       target->private_config = pc;
+       }
+
+       /*
+        * Call adiv5_jim_configure() to parse the common DAP options
+        * It will return JIM_CONTINUE if it didn't find any known
+        * options, JIM_OK if it correctly parsed the topmost option
+        * and JIM_ERR if an error occured during parameter evaluation.
+        * For JIM_CONTINUE, we check our own params.
+        */
+       e = adiv5_jim_configure(target, goi);
+       if (e != JIM_CONTINUE)
+               return e;
+
+       /* parse config or cget options ... */
+       if (goi->argc > 0) {
+               Jim_SetEmptyResult(goi->interp);
+
+               /* check first if topmost item is for us */
+               e = Jim_Nvp_name2value_obj(goi->interp, nvp_config_opts,
+                               goi->argv[0], &n);
+               if (e != JIM_OK)
+                       return JIM_CONTINUE;
+
+               e = Jim_GetOpt_Obj(goi, NULL);
+               if (e != JIM_OK)
+                       return e;
+
+               switch (n->value) {
+               case CFG_CTI: {
+                       if (goi->isconfigure) {
+                               Jim_Obj *o_cti;
+                               struct arm_cti *cti;
+                               e = Jim_GetOpt_Obj(goi, &o_cti);
+                               if (e != JIM_OK)
+                                       return e;
+                               cti = cti_instance_by_jim_obj(goi->interp, o_cti);
+                               if (cti == NULL) {
+                                       Jim_SetResultString(goi->interp, "CTI name invalid!", -1);
+                                       return JIM_ERR;
+                               }
+                               pc->cti = cti;
+                       } else {
+                               if (goi->argc != 0) {
+                                       Jim_WrongNumArgs(goi->interp,
+                                                       goi->argc, goi->argv,
+                                                       "NO PARAMS");
+                                       return JIM_ERR;
+                               }
+
+                               if (pc == NULL || pc->cti == NULL) {
+                                       Jim_SetResultString(goi->interp, "CTI not configured", -1);
+                                       return JIM_ERR;
+                               }
+                               Jim_SetResultString(goi->interp, arm_cti_name(pc->cti), -1);
+                       }
+                       break;
+               }
+
+               default:
+                       return JIM_CONTINUE;
+               }
+       }
+
+       return JIM_OK;
+}
+
 COMMAND_HANDLER(aarch64_handle_cache_info_command)
 {
        struct target *target = get_current_target(CMD_CTX);
@@ -2086,27 +2587,171 @@ COMMAND_HANDLER(aarch64_handle_smp_on_command)
        return ERROR_OK;
 }
 
-COMMAND_HANDLER(aarch64_handle_smp_gdb_command)
+COMMAND_HANDLER(aarch64_mask_interrupts_command)
 {
        struct target *target = get_current_target(CMD_CTX);
-       int retval = ERROR_OK;
-       struct target_list *head;
-       head = target->head;
-       if (head != (struct target_list *)NULL) {
-               if (CMD_ARGC == 1) {
-                       int coreid = 0;
-                       COMMAND_PARSE_NUMBER(int, CMD_ARGV[0], coreid);
-                       if (ERROR_OK != retval)
-                               return retval;
-                       target->gdb_service->core[1] = coreid;
+       struct aarch64_common *aarch64 = target_to_aarch64(target);
 
+       static const Jim_Nvp nvp_maskisr_modes[] = {
+               { .name = "off", .value = AARCH64_ISRMASK_OFF },
+               { .name = "on", .value = AARCH64_ISRMASK_ON },
+               { .name = NULL, .value = -1 },
+       };
+       const Jim_Nvp *n;
+
+       if (CMD_ARGC > 0) {
+               n = Jim_Nvp_name2value_simple(nvp_maskisr_modes, CMD_ARGV[0]);
+               if (n->name == NULL) {
+                       LOG_ERROR("Unknown parameter: %s - should be off or on", CMD_ARGV[0]);
+                       return ERROR_COMMAND_SYNTAX_ERROR;
                }
-               command_print(CMD_CTX, "gdb coreid  %" PRId32 " -> %" PRId32, target->gdb_service->core[0]
-                       , target->gdb_service->core[1]);
+
+               aarch64->isrmasking_mode = n->value;
        }
+
+       n = Jim_Nvp_value2name_simple(nvp_maskisr_modes, aarch64->isrmasking_mode);
+       command_print(CMD_CTX, "aarch64 interrupt mask %s", n->name);
+
        return ERROR_OK;
 }
 
+static int jim_mcrmrc(Jim_Interp *interp, int argc, Jim_Obj * const *argv)
+{
+       struct command_context *context;
+       struct target *target;
+       struct arm *arm;
+       int retval;
+       bool is_mcr = false;
+       int arg_cnt = 0;
+
+       if (Jim_CompareStringImmediate(interp, argv[0], "mcr")) {
+               is_mcr = true;
+               arg_cnt = 7;
+       } else {
+               arg_cnt = 6;
+       }
+
+       context = current_command_context(interp);
+       assert(context != NULL);
+
+       target = get_current_target(context);
+       if (target == NULL) {
+               LOG_ERROR("%s: no current target", __func__);
+               return JIM_ERR;
+       }
+       if (!target_was_examined(target)) {
+               LOG_ERROR("%s: not yet examined", target_name(target));
+               return JIM_ERR;
+       }
+
+       arm = target_to_arm(target);
+       if (!is_arm(arm)) {
+               LOG_ERROR("%s: not an ARM", target_name(target));
+               return JIM_ERR;
+       }
+
+       if (target->state != TARGET_HALTED)
+               return ERROR_TARGET_NOT_HALTED;
+
+       if (arm->core_state == ARM_STATE_AARCH64) {
+               LOG_ERROR("%s: not 32-bit arm target", target_name(target));
+               return JIM_ERR;
+       }
+
+       if (argc != arg_cnt) {
+               LOG_ERROR("%s: wrong number of arguments", __func__);
+               return JIM_ERR;
+       }
+
+       int cpnum;
+       uint32_t op1;
+       uint32_t op2;
+       uint32_t CRn;
+       uint32_t CRm;
+       uint32_t value;
+       long l;
+
+       /* NOTE:  parameter sequence matches ARM instruction set usage:
+        *      MCR     pNUM, op1, rX, CRn, CRm, op2    ; write CP from rX
+        *      MRC     pNUM, op1, rX, CRn, CRm, op2    ; read CP into rX
+        * The "rX" is necessarily omitted; it uses Tcl mechanisms.
+        */
+       retval = Jim_GetLong(interp, argv[1], &l);
+       if (retval != JIM_OK)
+               return retval;
+       if (l & ~0xf) {
+               LOG_ERROR("%s: %s %d out of range", __func__,
+                       "coprocessor", (int) l);
+               return JIM_ERR;
+       }
+       cpnum = l;
+
+       retval = Jim_GetLong(interp, argv[2], &l);
+       if (retval != JIM_OK)
+               return retval;
+       if (l & ~0x7) {
+               LOG_ERROR("%s: %s %d out of range", __func__,
+                       "op1", (int) l);
+               return JIM_ERR;
+       }
+       op1 = l;
+
+       retval = Jim_GetLong(interp, argv[3], &l);
+       if (retval != JIM_OK)
+               return retval;
+       if (l & ~0xf) {
+               LOG_ERROR("%s: %s %d out of range", __func__,
+                       "CRn", (int) l);
+               return JIM_ERR;
+       }
+       CRn = l;
+
+       retval = Jim_GetLong(interp, argv[4], &l);
+       if (retval != JIM_OK)
+               return retval;
+       if (l & ~0xf) {
+               LOG_ERROR("%s: %s %d out of range", __func__,
+                       "CRm", (int) l);
+               return JIM_ERR;
+       }
+       CRm = l;
+
+       retval = Jim_GetLong(interp, argv[5], &l);
+       if (retval != JIM_OK)
+               return retval;
+       if (l & ~0x7) {
+               LOG_ERROR("%s: %s %d out of range", __func__,
+                       "op2", (int) l);
+               return JIM_ERR;
+       }
+       op2 = l;
+
+       value = 0;
+
+       if (is_mcr == true) {
+               retval = Jim_GetLong(interp, argv[6], &l);
+               if (retval != JIM_OK)
+                       return retval;
+               value = l;
+
+               /* NOTE: parameters reordered! */
+               /* ARMV4_5_MCR(cpnum, op1, 0, CRn, CRm, op2) */
+               retval = arm->mcr(target, cpnum, op1, op2, CRn, CRm, value);
+               if (retval != ERROR_OK)
+                       return JIM_ERR;
+       } else {
+               /* NOTE: parameters reordered! */
+               /* ARMV4_5_MRC(cpnum, op1, 0, CRn, CRm, op2) */
+               retval = arm->mrc(target, cpnum, op1, op2, CRn, CRm, &value);
+               if (retval != ERROR_OK)
+                       return JIM_ERR;
+
+               Jim_SetResult(interp, Jim_NewIntObj(interp, value));
+       }
+
+       return JIM_OK;
+}
+
 static const struct command_registration aarch64_exec_command_handlers[] = {
        {
                .name = "cache_info",
@@ -2136,27 +2781,39 @@ static const struct command_registration aarch64_exec_command_handlers[] = {
                .usage = "",
        },
        {
-               .name = "smp_gdb",
-               .handler = aarch64_handle_smp_gdb_command,
+               .name = "maskisr",
+               .handler = aarch64_mask_interrupts_command,
+               .mode = COMMAND_ANY,
+               .help = "mask aarch64 interrupts during single-step",
+               .usage = "['on'|'off']",
+       },
+       {
+               .name = "mcr",
                .mode = COMMAND_EXEC,
-               .help = "display/fix current core played to gdb",
-               .usage = "",
+               .jim_handler = jim_mcrmrc,
+               .help = "write coprocessor register",
+               .usage = "cpnum op1 CRn CRm op2 value",
+       },
+       {
+               .name = "mrc",
+               .mode = COMMAND_EXEC,
+               .jim_handler = jim_mcrmrc,
+               .help = "read coprocessor register",
+               .usage = "cpnum op1 CRn CRm op2",
        },
 
 
        COMMAND_REGISTRATION_DONE
 };
+
 static const struct command_registration aarch64_command_handlers[] = {
-       {
-               .chain = arm_command_handlers,
-       },
        {
                .chain = armv8_command_handlers,
        },
        {
-               .name = "cortex_a",
+               .name = "aarch64",
                .mode = COMMAND_ANY,
-               .help = "Cortex-A command group",
+               .help = "Aarch64 command group",
                .usage = "",
                .chain = aarch64_exec_command_handlers,
        },
@@ -2177,16 +2834,12 @@ struct target_type aarch64_target = {
        .deassert_reset = aarch64_deassert_reset,
 
        /* REVISIT allow exporting VFP3 registers ... */
+       .get_gdb_arch = armv8_get_gdb_arch,
        .get_gdb_reg_list = armv8_get_gdb_reg_list,
 
        .read_memory = aarch64_read_memory,
        .write_memory = aarch64_write_memory,
 
-       .checksum_memory = arm_checksum_memory,
-       .blank_check_memory = arm_blank_check_memory,
-
-       .run_algorithm = armv4_5_run_algorithm,
-
        .add_breakpoint = aarch64_add_breakpoint,
        .add_context_breakpoint = aarch64_add_context_breakpoint,
        .add_hybrid_breakpoint = aarch64_add_hybrid_breakpoint,
@@ -2196,7 +2849,9 @@ struct target_type aarch64_target = {
 
        .commands = aarch64_command_handlers,
        .target_create = aarch64_target_create,
+       .target_jim_configure = aarch64_jim_configure,
        .init_target = aarch64_init_target,
+       .deinit_target = aarch64_deinit_target,
        .examine = aarch64_examine,
 
        .read_phys_memory = aarch64_read_phys_memory,

Linking to existing account procedure

If you already have an account and want to add another login method you MUST first sign in with your existing account and then change URL to read https://review.openocd.org/login/?link to get to this page again but this time it'll work for linking. Thank you.

SSH host keys fingerprints

1024 SHA256:YKx8b7u5ZWdcbp7/4AeXNaqElP49m6QrwfXaqQGJAOk gerrit-code-review@openocd.zylin.com (DSA)
384 SHA256:jHIbSQa4REvwCFG4cq5LBlBLxmxSqelQPem/EXIrxjk gerrit-code-review@openocd.org (ECDSA)
521 SHA256:UAOPYkU9Fjtcao0Ul/Rrlnj/OsQvt+pgdYSZ4jOYdgs gerrit-code-review@openocd.org (ECDSA)
256 SHA256:A13M5QlnozFOvTllybRZH6vm7iSt0XLxbA48yfc2yfY gerrit-code-review@openocd.org (ECDSA)
256 SHA256:spYMBqEYoAOtK7yZBrcwE8ZpYt6b68Cfh9yEVetvbXg gerrit-code-review@openocd.org (ED25519)
+--[ED25519 256]--+
|=..              |
|+o..   .         |
|*.o   . .        |
|+B . . .         |
|Bo. = o S        |
|Oo.+ + =         |
|oB=.* = . o      |
| =+=.+   + E     |
|. .=o   . o      |
+----[SHA256]-----+
2048 SHA256:0Onrb7/PHjpo6iVZ7xQX2riKN83FJ3KGU0TvI0TaFG4 gerrit-code-review@openocd.zylin.com (RSA)