diff --git a/boot/start.S b/boot/start.S index 946171f..0270c5d 100644 --- a/boot/start.S +++ b/boot/start.S @@ -1,51 +1,170 @@ -interrupt_vector_table: - b . @ Reset - b . - b . @ SWI instruction - b . - b . - b . - b . - b . - -.comm stack, 0x10000 @ Reserve 64k stack in the BSS +/* + * Kernel entry in assembly. This handles relocating the kernel so that it is + * in both physical and virtual memory where we want it to be. We copy the + * kernel to a different physical location if necessary, turn on the MMU, + * setting up a dual-mapping if the kernel is not in physical memory at the + * same place it was linked against. Finally, we jump into the kernel's main() + * function in C using the address it is linked against. When the MMU gets + * initialized fully later, it will remove the initial 1:1 mapping. + */ .globl start - start: - ldr r0, tt_base - mcr p15, 0, r0, c2, c0, 0 /* TTBR0 */ + str r1, machine_type /* Backup atags/machine type registers so we can access them later from C */ + str r2, atags_ptr -/* Setup page table entries for the page table and kernel (domain 0) */ - ldr r0, tt_tt_addr - ldr r1, tt_tt_val - str r1, [r0] + bl copy_kernel +copy_kernel_lr: /* Used to calculate address at which kernel is currently loaded by copy_kernel */ - ldr r0, kernel_tt_addr - ldr r1, kernel_tt_val - str r1, [r0] + bl setup_mmu -/* Set access permissions for domain 0 to "Manager" */ - mov r0, #0x3 - mcr p15, 0, r0, c3, c0, 0 /* DACR */ + ldr sp, =stack+0x10000 /* Set up the stack */ + bl main -/* Enable the MMU */ - mrc p15, 0, r0, c1, c0, 0 /* SCTLR */ - orr r0, r0, #0x1 - mcr p15, 0, r0, c1, c0, 0 /* SCTLR */ +1: + b 1b /* Halt */ - ldr sp, =stack+0x10000 @ Set up the stack - bl main @ Jump to the main function +copy_kernel: +/* + * Because we're not necessarily loaded at an address that's aligned the same + * as where we're linked, copy the kernel over to fix that up. + * + * clobbers: + * r0-r10 + * returns: + * r0 = new kernel base address + */ + sub r0, lr, $(copy_kernel_lr - start) /* r0 <- current address of start */ + ldr r1, tt_section_align + ands r2, r0, r1 /* If we're already aligned to 1mb, early out */ + bxeq lr -1: - b 1b @ Halt + mov r2, r0 /* r2 <- r0 <- current address of start */ + mov r3, #1 + lsl r3, r3, #20 /* r3 <- 1mb */ + add r0, r0, r3 + bic r0, r0, r1 /* r0 <- new address of start */ + sub r1, r0, r2 /* r1 <- offset between current and new start */ -tt_base: - .word 0x80000000 -tt_tt_addr: - .word 0x80002000 -tt_tt_val: - .word 0x80000c02 /* ...c02 means read/write at any priviledge level, and that it's a section w/o PXN bit set */ -kernel_tt_addr: - .word 0x80002004 -kernel_tt_val: - .word 0x80100c02 + /* TODO only copy kernel image sections that aren't zeroed (leave out .bss) */ + ldr r5, =start + ldr r6, =kernel_end + sub r6, r6, r5 + add r6, r6, r2 /* r6 <- old kernel_end */ + add r6, r6, #16 + bic r6, r6, #0xf /* r6 <- old kernel_end (aligned to 16 bytes) */ + add r5, r6, r1 /* r5 <- new kernel_end */ + +copy_kernel_loop: + /* Copy the kernel to its new location, 16 bytes at a time. We do this + * from the end to the begininning so we don't overwrite the old kernel if the + * destination and source overlap. */ + sub r6, r6, #16 + sub r5, r5, #16 + ldm r6, {r7, r8, r9, r10} + stm r5, {r7, r8, r9, r10} + subs r4, r5, r0 + bne copy_kernel_loop + + add lr, lr, r1 /* Fixup link register for new kernel location */ + bx lr + +setup_mmu: +/* + * Calculate the address at which we will store our translation table. + * Currently, we store it just past the end of the kernel. Getting the physical + * address of the end of the kernel is tricky, since kernel_end is the address + * the end of the kernel is linked at, so we have to do a little math. + * + * arguments: + * r0 = current kernel base address (physical), aligned to 1mb boundary + * clobbers: + * r0-r10 + */ + /* Find future virtual address of the translation table */ + ldr r1, =kernel_end + ldr r2, tt_base_align + ands r3, r1, r2 + mov r3, r1 + addne r3, r1, r2 + bic r2, r3, r2 /* r2 <- future virtual address of translation table */ + str r2, tt_base_virtual + + /* Find physical address of the translation table */ + ldr r1, =start + sub r1, r2, r1 + add r1, r0, r1 /* r1 <- physical address of translation table */ + str r1, tt_base_physical + + /* How many sections do we need to map to make sure we have the kernel + * and translation table covered? */ + ldr r3, tt_base_align + add r3, r3, r1 + sub r3, r3, r0 + lsr r3, r3, #20 + add r3, r3, #1 /* r3 <- number of sections to map */ + + ldr r4, =start /* r4 <- kernel virtual start address */ + lsr r5, r4, #18 /* 18 = 20 (1mb) - 2 (4 bytes per entry) */ + add r5, r5, r1 /* r5 <- address of translation page entry for first kernel section (final mapping) */ + + mov r6, r0 /* r6 <- kernel physical start address */ + lsr r7, r6, #18 /* 18 = 20 (1mb) - 2 (4 bytes per entry) */ + add r7, r7, r1 /* r7 <- address of translation page entry for first kernel section (initial, 1:1 mapping) */ + + mov r8, #1 + lsl r8, r8, #20 /* r8 <- 1mb */ + mov r9, #0xc + lsl r9, r9, #8 + orr r9, r9, #2 /* r9 <- 0xc02, which means read/write at any priviledge level, and that it's a section w/o PXN bit set */ + +initial_tt_loop: + /* Setup translation table entries for the translation table and kernel (domain 0) */ + ldr r10, tt_section_align + bic r10, r6, r10 + orr r10, r10, r9 /* r9=0xc02, which means read/write at any priviledge level */ + str r10, [r7] + str r10, [r5] + + add r6, r6, r8 + add r7, r7, #4 + add r5, r5, #4 + + subs r3, r3, #1 + bne initial_tt_loop + + mcr p15, 0, r1, c2, c0, 0 /* TTBR0 <- physical address of translation table */ + + /* Set access permissions for domain 0 to "Manager" */ + mov r1, #0x3 + mcr p15, 0, r1, c3, c0, 0 /* DACR */ + + /* Enable the MMU */ + mrc p15, 0, r1, c1, c0, 0 /* SCTLR */ + orr r1, r1, #0x1 + mcr p15, 0, r1, c1, c0, 0 /* SCTLR */ + + /* Update lr for new memory mapping */ + ldr r1, =start + sub r0, r1, r0 + add lr, lr, r0 + + bx lr /* Finally, we jump into the new memory mapping, which matches where we were linked */ + +tt_base_align: + .word 0b111111111111111 /* 16k - 1 */ +tt_section_align: + .word 0b11111111111111111111 /* 1mb - 1 */ +.globl tt_base_virtual +tt_base_virtual: + .word 0 +.globl tt_base_physical +tt_base_physical: + .word 0 +.globl atags_ptr +atags_ptr: + .word 0 +.globl machine_type +machine_type: + .word 0 + +.comm stack, 0x10000 /* Reserve 64k for the stack in .bss */ diff --git a/include/mmu.h b/include/mmu.h index 3269761..402b396 100644 --- a/include/mmu.h +++ b/include/mmu.h @@ -1,6 +1,8 @@ #ifndef MMU_H #define MMU_H +extern unsigned int *kernel_start_phys, *kernel_start_virt, *kernel_end_phys, *kernel_end_virt; + void mmu_reinit(); #endif /* MMU_H */ diff --git a/kernel/mmu.c b/kernel/mmu.c index 4a59f1b..bde3c4b 100644 --- a/kernel/mmu.c +++ b/kernel/mmu.c @@ -10,20 +10,41 @@ #define cp_read(var, ...) _cp_read(var, __VA_ARGS__) #define cp_write(var, ...) _cp_write(var, __VA_ARGS__) +#define TT_BASE_SIZE (1<<14) /* 16k */ + +unsigned int *kernel_start_phys, *kernel_start_virt, *kernel_end_phys, *kernel_end_virt; + void mmu_reinit() { - unsigned int *curr_tt_entry; + extern unsigned int tt_base_virtual, tt_base_physical, start; unsigned int curr_addr; + unsigned int *curr_tt_entry; + int virt_phys_offset; + + virt_phys_offset = tt_base_virtual - tt_base_physical; + kernel_start_virt = &start; + kernel_start_phys = kernel_start_virt - virt_phys_offset/4; + kernel_end_virt = (unsigned int *)(tt_base_virtual + TT_BASE_SIZE); + kernel_end_phys = (unsigned int *)(tt_base_physical + TT_BASE_SIZE); //get the current translation table base address - cp_read(curr_tt_entry, TTBR0); + curr_tt_entry = (unsigned int *)tt_base_virtual; //do first loop iteration outside the loop, because we have to check against wrapping back around to know we're done - *curr_tt_entry = 0xc02; + *curr_tt_entry = 0xc02; /* 0xc02 means read/write at any priviledge level, and that it's a section w/o PXN bit set */ curr_tt_entry++; - //create identity mapping for entire address space using sections + //create identity mapping for entire address space using sections. + //BUT, if we've relocated the kernel from where it is in physical + //memory, make sure we keep those mappings correct, and we'll actually + //swap the twp mappings so all of memory is addressable. for (curr_addr = 0x00100000; curr_addr != 0; curr_addr += 0x00100000) { - *curr_tt_entry = curr_addr | 0xc02; + if ((unsigned int *)curr_addr >= kernel_start_phys && (unsigned int *)curr_addr < kernel_end_phys) { + *curr_tt_entry = (curr_addr + virt_phys_offset) | 0xc02; + } else if ((unsigned int *)curr_addr >= kernel_start_virt && (unsigned int *)curr_addr < kernel_end_virt) { + *curr_tt_entry = (curr_addr - virt_phys_offset) | 0xc02; + } else { + *curr_tt_entry = curr_addr | 0xc02; + } curr_tt_entry++; } } diff --git a/kernel/start_kernel.c b/kernel/start_kernel.c index 47f59ac..c978642 100644 --- a/kernel/start_kernel.c +++ b/kernel/start_kernel.c @@ -7,8 +7,6 @@ #include #include -extern const unsigned int kernel_end; - struct fb myfb; void video(void) { @@ -64,7 +62,8 @@ int main(void) { //setup memory mm_init(); mm_add_free_region((void*)0x60000000, (void*)0x7FFFFFFF); - lower = (char*) &kernel_end; + mm_add_free_region((void*)0x80000000, (void*)0x800FFFFF); + lower = (char*) &kernel_end_virt; if ((unsigned int)lower % MM_PAGE_SIZE != 0) lower += (MM_PAGE_SIZE - ((unsigned int)lower % MM_PAGE_SIZE)); mm_add_free_region((void*)lower, (void*)0x9FFFFFFF); //subtract the memory used by the kernel