startup_ARMv8x1_GCC
来源:互联网 发布:Windows7远程端口设置 编辑:程序博客网 时间:2024/06/03 02:22
startup.S
start64: //gshen为各个ELx配置终端向量 // // program the VBARs // ldr x1, =el1_vectors msr VBAR_EL1, x1 ldr x1, =el2_vectors msr VBAR_EL2, x1 ldr x1, =el3_vectors msr VBAR_EL3, x1 //SCR_EL3, Secure Configuration Register msr SCR_EL3, xzr // Ensure NS bit is initially clear, so secure copy of ICC_SRE_EL1 can be configured isb mov x0, #15 msr ICC_SRE_EL3, x0 isb msr ICC_SRE_EL1, x0 // Secure copy of ICC_SRE_EL1 // // set lower exception levels as non-secure, with no access // back to EL2 or EL3, and are AArch64 capable // mov x3, #(SCR_EL3_RW | \ SCR_EL3_SMD | \ SCR_EL3_NS) // Set NS bit, to access Non-secure registers msr SCR_EL3, x3 isb mov x0, #15 msr ICC_SRE_EL2, x0 isb msr ICC_SRE_EL1, x0 // Non-secure copy of ICC_SRE_EL1 // // no traps or VM modifications from the Hypervisor, EL1 is AArch64 // mov x2, #HCR_EL2_RW msr HCR_EL2, x2 // // VMID is still significant, even when virtualisation is not // being used, so ensure VTTBR_EL2 is properly initialised // msr VTTBR_EL2, xzr // // VMPIDR_EL2 holds the value of the Virtualization Multiprocessor ID. This is the value returned by Non-secure EL1 reads of MPIDR_EL1. // VPIDR_EL2 holds the value of the Virtualization Processor ID. This is the value returned by Non-secure EL1 reads of MIDR_EL1. // Both of these registers are architecturally UNKNOWN at reset, and so they must be set to the correct value // (even if EL2/virtualization is not being used), otherwise non-secure EL1 reads of MPIDR_EL1/MIDR_EL1 will return garbage values. // This guarantees that any future reads of MPIDR_EL1 and MIDR_EL1 from Non-secure EL1 will return the correct value. // // keep MPIDR_EL1.Aff0 (i.e. the CPU no. on Cortex-A cores) in // x19 (defined by the AAPCS as callee-saved), so we can re-use // the number later // mrs x0, MPIDR_EL1 ubfx x19, x0, #MPIDR_EL1_AFF0_LSB, #MPIDR_EL1_AFF_WIDTH msr VMPIDR_EL2, x0 mrs x0, MIDR_EL1 msr VPIDR_EL2, x0 // // neither EL3 nor EL2 trap floating point or accesses to CPACR // msr CPTR_EL3, xzr msr CPTR_EL2, xzr // // SCTLR_ELx may come out of reset with UNKNOWN values so we will // set the fields to 0 except, possibly, the endianess field(s). // Note that setting SCTLR_EL2 or the EL0 related fields of SCTLR_EL1 // is not strictly needed, since we're never in EL2 or EL0 //#ifdef __ARM_BIG_ENDIAN mov x0, #(SCTLR_ELx_EE | SCTLR_EL1_E0E)#else mov x0, #0#endif msr SCTLR_EL3, x0 msr SCTLR_EL2, x0 msr SCTLR_EL1, x0#ifdef CORTEXA // // Configure ACTLR_EL[23] // ---------------------- // // These bits are IMPLEMENTATION DEFINED, so are different for // different processors // // For Cortex-A57, the controls we set are: // // Enable lower level access to CPUACTLR_EL1 // Enable lower level access to CPUECTLR_EL1 // Enable lower level access to L2CTLR_EL1 // Enable lower level access to L2ECTLR_EL1 // Enable lower level access to L2ACTLR_EL1 // mov x0, #((1 << 0) | \ (1 << 1) | \ (1 << 4) | \ (1 << 5) | \ (1 << 6)) msr ACTLR_EL3, x0 msr ACTLR_EL2, x0 // // configure CPUECTLR_EL1 // // These bits are IMP DEF, so need to different for different // processors // // SMPEN - bit 6 - Enables the processor to receive cache // and TLB maintenance operations // // Note: For Cortex-A57/53 SMPEN should be set before enabling // the caches and MMU, or performing any cache and TLB // maintenance operations. // // This register has a defined reset value, so we use a // read-modify-write sequence to set SMPEN // mrs x0, S3_1_c15_c2_1 // Read EL1 CPU Extended Control Register orr x0, x0, #(1 << 6) // Set the SMPEN bit msr S3_1_c15_c2_1, x0 // Write EL1 CPU Extended Control Register isb#endif // // That's the last of the control settings for now // // Note: no ISB after all these changes, as registers won't be // accessed until after an exception return, which is itself a // context synchronisation event // // // Setup some EL3 stack space, ready for calling some subroutines, below. // // Stack space allocation is CPU-specific, so use CPU // number already held in x19 // // 2^12 bytes per CPU for the EL3 stacks // ldr x0, =__el3_stack sub x0, x0, x19, lsl #12 mov sp, x0 // // we need to configure the GIC while still in secure mode, specifically // all PPIs and SPIs have to be programmed as Group1 interrupts // // // Before the GIC can be reliably programmed, we need to // enable Affinity Routing, as this affects where the configuration // registers are (with Affinity Routing enabled, some registers are // in the Redistributor, whereas those same registers are in the // Distributor with Affinity Routing disabled (i.e. when in GICv2 // compatibility mode). // mov x0, #(1 << 4) | (1 << 5) // gicdctlr_ARE_S | gicdctlr_ARE_NS mov x1, x19 bl SyncAREinGICD // // The Redistributor comes out of reset assuming the processor is // asleep - correct that assumption // mov w0, w19 bl WakeupGICR // // Now we're ready to set security and other initialisations // // This is a per-CPU configuration for these interrupts // // for the first cluster, CPU number is the redistributor index // mov w0, w19 mov w1, #1 // gicigroupr_G1NS bl SetPrivateIntSecurityBlock // // While we're in the Secure World, set the priority mask low enough // for it to be writable in the Non-Secure World // //mov x0, #16 << 3 // 5 bits of priority in the Secure world mov x0, #0xFF // for Non-Secure interrupts msr ICC_PMR_EL1, x0 // // there's more GIC setup to do, but only for the primary CPU // cbnz x19, drop_to_el1 // // There's more to do to the GIC - call the utility routine to set // all SPIs to Group1 // mov w0, #1 // gicigroupr_G1NS bl SetSPISecurityAll // // Set up EL1 entry point and "dummy" exception return information, // then perform exception return to enter EL1 // .global drop_to_el1drop_to_el1: adr x1, el1_entry_aarch64 msr ELR_EL3, x1 mov x1, #(AARCH64_SPSR_EL1h | \ AARCH64_SPSR_F | \ AARCH64_SPSR_I | \ AARCH64_SPSR_A) msr SPSR_EL3, x1 //gshen通过eret跳转到EL1 eret// ------------------------------------------------------------// EL1 - Common start-up code// ------------------------------------------------------------ .global el1_entry_aarch64 .type el1_entry_aarch64, "function"el1_entry_aarch64: // // Now we're in EL1, setup the application stack // the scatter file allocates 2^14 bytes per app stack // ldr x0, =__stack sub x0, x0, x19, lsl #14 mov sp, x0 // // Enable floating point // mov x0, #CPACR_EL1_FPEN msr CPACR_EL1, x0 // // Invalidate caches and TLBs for all stage 1 // translations used at EL1 // // Cortex-A processors automatically invalidate their caches on reset // (unless suppressed with the DBGL1RSTDISABLE or L2RSTDISABLE pins). // It is therefore not necessary for software to invalidate the caches // on startup, however, this is done here in case of a warm reset. bl InvalidateUDCaches tlbi VMALLE1 // // Set TTBR0 Base address // // The CPUs share one set of translation tables that are // generated by CPU0 at run-time // // TTBR1_EL1 is not used in this example // ldr x1, =__ttb0_l1 msr TTBR0_EL1, x1 // // Set up memory attributes // // These equate to: // // 0 -> 0b01000100 = 0x00000044 = Normal, Inner/Outer Non-Cacheable // 1 -> 0b11111111 = 0x0000ff00 = Normal, Inner/Outer WriteBack Read/Write Allocate // 2 -> 0b00000100 = 0x00040000 = Device-nGnRE // mov x1, #0xff44 movk x1, #4, LSL #16 // equiv to: movk x1, #0x0000000000040000 msr MAIR_EL1, x1 // // Set up TCR_EL1 // // We're using only TTBR0 (EPD1 = 1), and the page table entries: // - are using an 8-bit ASID from TTBR0 // - have a 4K granularity (TG0 = 0b00) // - are outer-shareable (SH0 = 0b10) // - are using Inner & Outer WBWA Normal memory ([IO]RGN0 = 0b01) // - map // + 32 bits of VA space (T0SZ = 0x20) // + into a 32-bit PA space (IPS = 0b000) // // 36 32 28 24 20 16 12 8 4 0 // -----+----+----+----+----+----+----+----+----+----+ // | | |OOII| | | |OOII| | | // TT | | |RRRR|E T | T| |RRRR|E T | T| // BB | I I|TTSS|GGGG|P 1 | 1|TTSS|GGGG|P 0 | 0| // IIA| P P|GGHH|NNNN|DAS | S|GGHH|NNNN|D S | S| // 10S| S-S|1111|1111|11Z-|---Z|0000|0000|0 Z-|---Z| // // 000 0000 0000 0000 1000 0000 0010 0101 0010 0000 // // 0x 8 0 2 5 2 0 // // Note: the ISB is needed to ensure the changes to system // context are before the write of SCTLR_EL1.M to enable // the MMU. It is likely on a "real" implementation that // this setup would work without an ISB, due to the // amount of code that gets executed before enabling the // MMU, but that would not be architecturally correct. // ldr x1, =0x0000000000802520 msr TCR_EL1, x1 isb // // x19 already contains the CPU number, so branch to secondary // code if we're not on CPU0 // cbnz x19, el1_secondary // // Fall through to primary code ////// ------------------------------------------------------------//// EL1 - primary CPU init code//// This code is run on CPU0, while the other CPUs are in the// holding pen// .global el1_primary .type el1_primary, "function"el1_primary: // // We're now on the primary processor in the NS world: turn on // the banked GIC distributor enable, ready for individual CPU // enables later // mov w0, #(1 << 1) // gicdctlr_EnableGrp1A bl EnableGICD // // Generate TTBR0 L1 // // at 4KB granularity, 32-bit VA space, table lookup starts at // L1, with 1GB regions // // we are going to create entries pointing to L2 tables for a // couple of these 1GB regions, the first of which is the // RAM on the VE board model - get the table addresses and // start by emptying out the L1 page tables (4 entries at L1 // for a 4K granularity) // // x21 = address of L1 tables // ldr x21, =__ttb0_l1 mov x0, x21 mov x1, #(4 << 3) bl ZeroBlock // // time to start mapping the RAM regions - clear out the // L2 tables and point to them from the L1 tables // // x22 = address of L2 tables, needs to be remembered in case // we want to re-use the tables for mapping peripherals // ldr x22, =__ttb0_l2_ram mov x1, #(512 << 3) mov x0, x22 bl ZeroBlock // // Get the start address of RAM (the EXEC region) into x4 // and calculate the offset into the L1 table (1GB per region, // max 4GB) // // x23 = L1 table offset, saved for later comparison against // peripheral offset // ldr x4, =__code_start ubfx x23, x4, #30, #2 orr x1, x22, #TT_S1_ATTR_PAGE str x1, [x21, x23, lsl #3] // // we've already used the RAM start address in x4 - we now need // to get this in terms of an offset into the L2 page tables, // where each entry covers 2MB // ubfx x2, x4, #21, #9 // // TOP_OF_RAM in the scatter file marks the end of the // Execute region in RAM: convert the end of this region to an // offset too, being careful to round up, then calculate the // number of entries to write // ldr x5, =__top_of_ram sub x3, x5, #1 ubfx x3, x3, #21, #9 add x3, x3, #1 sub x3, x3, x2 // // set x1 to the required page table attributes, then orr // in the start address (modulo 2MB) // // L2 tables in our configuration cover 2MB per entry - map // memory as Shared, Normal WBWA (MAIR[1]) with a flat // VA->PA translation // bic x4, x4, #((1 << 21) - 1) mov x1, #(TT_S1_ATTR_BLOCK | \ (1 << TT_S1_ATTR_MATTR_LSB) | \ TT_S1_ATTR_NS | \ TT_S1_ATTR_AP_RW_PL1 | \ TT_S1_ATTR_SH_INNER | \ TT_S1_ATTR_AF | \ TT_S1_ATTR_nG) orr x1, x1, x4 // // factor the offset into the page table address and then write // the entries // add x0, x22, x2, lsl #3loop1: subs x3, x3, #1 str x1, [x0], #8 add x1, x1, #0x200, LSL #12 // equiv to add x1, x1, #(1 << 21) // 2MB per entry bne loop1 // // now mapping the Peripheral regions - clear out the // L2 tables and point to them from the L1 tables // // The assumption here is that all peripherals live within // a common 1GB region (i.e. that there's a single set of // L2 pages for all the peripherals). We only use a UART // and the GIC in this example, so the assumption is sound // // x24 = address of L2 peripheral tables // ldr x24, =__ttb0_l2_periph // // get the GICD address into x4 and calculate // the offset into the L1 table // // x25 = L1 table offset // ldr x4, =gicd ubfx x25, x4, #30, #2 // // here's the tricky bit: it's possible that the peripherals are // in the same 1GB region as the RAM, in which case we don't need // to prime a separate set of L2 page tables, nor add them to the // L1 tables // // if we're going to re-use the TTB0_L2_RAM tables, get their // address into x24, which is used later on to write the PTEs // cmp x25, x23 csel x24, x22, x24, EQ b.eq nol2setup // // Peripherals are in a separate 1GB region, and so have their own // set of L2 tables - clean out the tables and add them to the L1 // table // mov x0, x24 mov x1, #512 << 3 bl ZeroBlock orr x1, x24, #TT_S1_ATTR_PAGE str x1, [x21, x25, lsl #3] // // there's only going to be a single 2MB region for GICD (in // x4) - get this in terms of an offset into the L2 page tables // // with larger systems, it is possible that the GIC redistributor // registers require extra 2MB pages, in which case extra code // would be required here //nol2setup: ubfx x2, x4, #21, #9 // // set x1 to the required page table attributes, then orr // in the start address (modulo 2MB) // // L2 tables in our configuration cover 2MB per entry - map // memory as NS Device-nGnRE (MAIR[2]) with a flat VA->PA // translation // bic x4, x4, #((1 << 21) - 1) // start address mod 2MB mov x1, #(TT_S1_ATTR_BLOCK | \ (2 << TT_S1_ATTR_MATTR_LSB) | \ TT_S1_ATTR_NS | \ TT_S1_ATTR_AP_RW_PL1 | \ TT_S1_ATTR_AF | \ TT_S1_ATTR_nG) orr x1, x1, x4 // // only a single L2 entry for this, so no loop as we have for RAM, above // str x1, [x24, x2, lsl #3] // // we have CS3_PERIPHERALS that include the UART controller // // Again, the code is making assumptions - this time that the CS3_PERIPHERALS // region uses the same 1GB portion of the address space as the GICD, // and thus shares the same set of L2 page tables // // Get CS3_PERIPHERALS address into x4 and calculate the offset into the // L2 tables // ldr x4, =__cs3_peripherals ubfx x2, x4, #21, #9 // // set x1 to the required page table attributes, then orr // in the start address (modulo 2MB) // // L2 tables in our configuration cover 2MB per entry - map // memory as NS Device-nGnRE (MAIR[2]) with a flat VA->PA // translation // bic x4, x4, #((1 << 21) - 1) // start address mod 2MB mov x1, #(TT_S1_ATTR_BLOCK | \ (2 << TT_S1_ATTR_MATTR_LSB) | \ TT_S1_ATTR_NS | \ TT_S1_ATTR_AP_RW_PL1 | \ TT_S1_ATTR_AF | \ TT_S1_ATTR_nG) orr x1, x1, x4 // // only a single L2 entry again - write it // str x1, [x24, x2, lsl #3] // // issue a barrier to ensure all table entry writes are complete // dsb ish // // Enable the MMU. Caches will be enabled later, after scatterloading. // mrs x1, SCTLR_EL1 orr x1, x1, #SCTLR_ELx_M bic x1, x1, #SCTLR_ELx_A // Disable alignment fault checking. To enable, change bic to orr msr SCTLR_EL1, x1 isb // // The ARM Architecture Reference Manual for ARMv8-A states: // // Instruction accesses to Non-cacheable Normal memory can be held in instruction caches. // Correspondingly, the sequence for ensuring that modifications to instructions are available // for execution must include invalidation of the modified locations from the instruction cache, // even if the instructions are held in Normal Non-cacheable memory. // This includes cases where the instruction cache is disabled. // dsb ish // ensure all previous stores have completed before invalidating ic ialluis // I cache invalidate all inner shareable to PoU (which includes secondary cores) dsb ish // ensure completion on inner shareable domain (which includes secondary cores) isb // Scatter-loading is complete, so enable the caches here, so that the C-library's mutex initialization later will work mrs x1, SCTLR_EL1 orr x1, x1, #SCTLR_ELx_C orr x1, x1, #SCTLR_ELx_I msr SCTLR_EL1, x1 isb // Zero the bss ldr x0, =__bss_start__ // Start of block mov x1, #0 // Fill value ldr x2, =__bss_end__ // End of block sub x2, x2, x0 // Length of block bl memset // Set up the standard file handles bl initialise_monitor_handles // Set up _fini and fini_array to be called at exit ldr x0, =__libc_fini_array bl atexit // Call preinit_array, _init and init_array bl __libc_init_array // Set argc = 1, argv[0] = "" and then call main .pushsection .data .align 3argv: .dword arg0 .dword 0arg0: .byte 0 .popsection mov x0, #1 ldr x1, =argv bl main b exit // Will not return// ------------------------------------------------------------// EL1 - secondary CPU init code//// This code is run on CPUs 1, 2, 3 etc....// ------------------------------------------------------------ .global el1_secondary .type el1_secondary, "function"el1_secondary:loop_wfi: dsb SY // Clear all pending data accesses wfi // Go to sleep
timer_interrupts.c
/* Bare-metal example for ARMv8 Foundation Platform model *//* Timer and interrupts *//* Copyright (C) ARM Limited, 2016. All rights reserved. */#include <stdio.h>#include "GICv3.h"#include "GICv3_gicc.h"#include "sp804_timer.h"// LED Base address#define LED_BASE (volatile unsigned int *)0x1C010008void nudge_leds(void) // Move LEDs along{ static int state = 1; static int value = 1; if (state) { int max = (1 << 7); value <<= 1; if (value == max) state = 0; } else { value >>= 1; if (value == 1) state = 1; } *LED_BASE = value; // Update LEDs hardware}// Initialize Timer 0 and Interrupt Controllervoid init_timer(void){ // Enable interrupts __asm("MSR DAIFClr, #0xF"); setICC_IGRPEN1_EL1(igrpEnable); // Configure the SP804 timer to generate an interrupt setTimerBaseAddress(0x1C110000); initTimer(0x8000, SP804_AUTORELOAD, SP804_GENERATE_IRQ); startTimer(); // The SP804 timer generates SPI INTID 34. Enable // this ID, and route it to core 0.0.0.0 (this one!) SetSPIRoute(34, 0, gicdirouter_ModeSpecific); // Route INTID 34 to 0.0.0.0 (this core) SetSPIPriority(34, 0); // Set INTID 34 to priority to 0 ConfigureSPI(34, gicdicfgr_Level); // Set INTID 34 as level-sensitive EnableSPI(34); // Enable INTID 34}// --------------------------------------------------------void irqHandler(void){ unsigned int ID; ID = getICC_IAR1(); // readIntAck(); // Check for reserved IDs if ((1020 <= ID) && (ID <= 1023)) { printf("irqHandler() - Reserved INTID %d\n\n", ID); return; } switch(ID) { case 34: // Dual-Timer 0 (SP804) printf("irqHandler() - External timer interrupt\n\n"); nudge_leds(); clearTimerIrq(); break; default: // Unexpected ID value printf("irqHandler() - Unexpected INTID %d\n\n", ID); break; } // Write the End of Interrupt register to tell the GIC // we've finished handling the interrupt setICC_EOIR1(ID); // writeAliasedEOI(ID);}// --------------------------------------------------------// Not actually used in this example, but provided for completenessvoid fiqHandler(void){ unsigned int ID; unsigned int aliased = 0; ID = getICC_IAR0(); // readIntAck(); printf("fiqHandler() - Read %d from IAR0\n", ID); // Check for reserved IDs if ((1020 <= ID) && (ID <= 1023)) { printf("fiqHandler() - Reserved INTID %d\n\n", ID); ID = getICC_IAR1(); // readAliasedIntAck(); printf("fiqHandler() - Read %d from AIAR\n", ID); aliased = 1; // If still spurious then simply return if ((1020 <= ID) && (ID <= 1023)) return; } switch(ID) { case 34: // Dual-Timer 0 (SP804) printf("fiqHandler() - External timer interrupt\n\n"); clearTimerIrq(); break; default: // Unexpected ID value printf("fiqHandler() - Unexpected INTID %d\n\n", ID); break; } // Write the End of Interrupt register to tell the GIC // we've finished handling the interrupt // NOTE: If the ID was read from the Aliased IAR, then // the aliased EOI register must be used if (aliased == 0) setICC_EOIR0(ID); // writeEOI(ID); else setICC_EOIR1(ID); // writeAliasedEOI(ID);}
0 0
- startup_ARMv8x1_GCC
- es 分词配置
- 垃圾陷阱 洛谷1156 dp
- es6笔记5^_^set、map、iterator
- OpenWRT(六)添加SSH
- fragment状态的保存与获取
- startup_ARMv8x1_GCC
- C#视频——基础知识
- NyOj-28大数阶乘
- python爬虫学习获取邮箱
- javaweb项目中新建一个jsp出错
- 【Java基础知识】Java反射--Class、Constructor、Filed、Method类的使用
- mybatis if test 不为空字符串或null
- BZOJ 1052: [HAOI2007]覆盖问题 二分, 贪心
- es6笔记6^_^generator