1/* 2 * linux/arch/i386/mm/init.c 3 * 4 * Copyright (C) 1995 Linus Torvalds 5 * 6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 7 */ 8 9#include <linux/config.h> 10#include <linux/signal.h> 11#include <linux/sched.h> 12#include <linux/kernel.h> 13#include <linux/errno.h> 14#include <linux/string.h> 15#include <linux/types.h> 16#include <linux/ptrace.h> 17#include <linux/mman.h> 18#include <linux/mm.h> 19#include <linux/swap.h> 20#include <linux/smp.h> 21#include <linux/init.h> 22#ifdef CONFIG_BLK_DEV_INITRD 23#include <linux/blk.h> 24#endif 25#include <linux/highmem.h> 26#include <linux/pagemap.h> 27#include <linux/bootmem.h> 28#include <linux/slab.h> 29 30#include <asm/processor.h> 31#include <asm/system.h> 32#include <asm/uaccess.h> 33#include <asm/pgtable.h> 34#include <asm/pgalloc.h> 35#include <asm/dma.h> 36#include <asm/fixmap.h> 37#include <asm/e820.h> 38#include <asm/apic.h> 39#include <asm/tlb.h> 40 41mmu_gather_t mmu_gathers[NR_CPUS]; 42unsigned long highstart_pfn, highend_pfn; 43static unsigned long totalram_pages; 44static unsigned long totalhigh_pages; 45 46int do_check_pgt_cache(int low, int high) 47{ 48 int freed = 0; 49 if(pgtable_cache_size > high) { 50 do { 51 if (pgd_quicklist) { 52 free_pgd_slow(get_pgd_fast()); 53 freed++; 54 } 55 if (pmd_quicklist) { 56 pmd_free_slow(pmd_alloc_one_fast(NULL, 0)); 57 freed++; 58 } 59 if (pte_quicklist) { 60 pte_free_slow(pte_alloc_one_fast(NULL, 0)); 61 freed++; 62 } 63 } while(pgtable_cache_size > low); 64 } 65 return freed; 66} 67 68/* 69 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the 70 * physical space so we can cache the place of the first one and move 71 * around without checking the pgd every time. 72 */ 73 74#if CONFIG_HIGHMEM 75pte_t *kmap_pte; 76pgprot_t kmap_prot; 77 78#define kmap_get_fixmap_pte(vaddr) \ 79 pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) 80 81void __init kmap_init(void) 82{ 83 unsigned long kmap_vstart; 84 85 /* cache the first kmap pte */ 86 kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); 87 kmap_pte = kmap_get_fixmap_pte(kmap_vstart); 88 89 kmap_prot = PAGE_KERNEL; 90} 91#endif /* CONFIG_HIGHMEM */ 92 93void show_mem(void) 94{ 95 int i, total = 0, reserved = 0; 96 int shared = 0, cached = 0; 97 int highmem = 0; 98 99 printk("Mem-info:\n"); 100 show_free_areas(); 101 printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); 102 i = max_mapnr; 103 while (i-- > 0) { 104 total++; 105 if (PageHighMem(mem_map+i)) 106 highmem++; 107 if (PageReserved(mem_map+i)) 108 reserved++; 109 else if (PageSwapCache(mem_map+i)) 110 cached++; 111 else if (page_count(mem_map+i)) 112 shared += page_count(mem_map+i) - 1; 113 } 114 printk("%d pages of RAM\n", total); 115 printk("%d pages of HIGHMEM\n",highmem); 116 printk("%d reserved pages\n",reserved); 117 printk("%d pages shared\n",shared); 118 printk("%d pages swap cached\n",cached); 119 printk("%ld pages in page table cache\n",pgtable_cache_size); 120 show_buffers(); 121} 122 123/* References to section boundaries */ 124 125extern char _text, _etext, _edata, __bss_start, _end; 126extern char __init_begin, __init_end; 127 128static inline void set_pte_phys (unsigned long vaddr, 129 unsigned long phys, pgprot_t flags) 130{ 131 pgd_t *pgd; 132 pmd_t *pmd; 133 pte_t *pte; 134 135 pgd = swapper_pg_dir + __pgd_offset(vaddr); 136 if (pgd_none(*pgd)) { 137 printk("PAE BUG #00!\n"); 138 return; 139 } 140 pmd = pmd_offset(pgd, vaddr); 141 if (pmd_none(*pmd)) { 142 printk("PAE BUG #01!\n"); 143 return; 144 } 145 pte = pte_offset(pmd, vaddr); 146 /* <phys,flags> stored as-is, to permit clearing entries */ 147 set_pte(pte, mk_pte_phys(phys, flags)); 148 149 /* 150 * It's enough to flush this one mapping. 151 * (PGE mappings get flushed as well) 152 */ 153 __flush_tlb_one(vaddr); 154} 155 156void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) 157{ 158 unsigned long address = __fix_to_virt(idx); 159 160 if (idx >= __end_of_fixed_addresses) { 161 printk("Invalid __set_fixmap\n"); 162 return; 163 } 164 set_pte_phys(address, phys, flags); 165} 166 167static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base) 168{ 169 pgd_t *pgd; 170 pmd_t *pmd; 171 pte_t *pte; 172 int i, j; 173 unsigned long vaddr; 174 175 vaddr = start; 176 i = __pgd_offset(vaddr); 177 j = __pmd_offset(vaddr); 178 pgd = pgd_base + i; 179 180 for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { 181#if CONFIG_X86_PAE 182 if (pgd_none(*pgd)) { 183 pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); 184 set_pgd(pgd, __pgd(__pa(pmd) + 0x1)); 185 if (pmd != pmd_offset(pgd, 0)) 186 printk("PAE BUG #02!\n"); 187 } 188 pmd = pmd_offset(pgd, vaddr); 189#else 190 pmd = (pmd_t *)pgd; 191#endif 192 for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { 193 if (pmd_none(*pmd)) { 194 pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); 195 set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte))); 196 if (pte != pte_offset(pmd, 0)) 197 BUG(); 198 } 199 vaddr += PMD_SIZE; 200 } 201 j = 0; 202 } 203} 204 205static void __init pagetable_init (void) 206{ 207 unsigned long vaddr, end; 208 pgd_t *pgd, *pgd_base; 209 int i, j, k; 210 pmd_t *pmd; 211 pte_t *pte, *pte_base; 212 213 /* 214 * This can be zero as well - no problem, in that case we exit 215 * the loops anyway due to the PTRS_PER_* conditions. 216 */ 217 end = (unsigned long)__va(max_low_pfn*PAGE_SIZE); 218 219 pgd_base = swapper_pg_dir; 220#if CONFIG_X86_PAE 221 for (i = 0; i < PTRS_PER_PGD; i++) 222 set_pgd(pgd_base + i, __pgd(1 + __pa(empty_zero_page))); 223#endif 224 i = __pgd_offset(PAGE_OFFSET); 225 pgd = pgd_base + i; 226 227 for (; i < PTRS_PER_PGD; pgd++, i++) { 228 vaddr = i*PGDIR_SIZE; 229 if (end && (vaddr >= end)) 230 break; 231#if CONFIG_X86_PAE 232 pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); 233 set_pgd(pgd, __pgd(__pa(pmd) + 0x1)); 234#else 235 pmd = (pmd_t *)pgd; 236#endif 237 if (pmd != pmd_offset(pgd, 0)) 238 BUG(); 239 for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { 240 vaddr = i*PGDIR_SIZE + j*PMD_SIZE; 241 if (end && (vaddr >= end)) 242 break; 243 if (cpu_has_pse) { 244 unsigned long __pe; 245 246 set_in_cr4(X86_CR4_PSE); 247 boot_cpu_data.wp_works_ok = 1; 248 __pe = _KERNPG_TABLE + _PAGE_PSE + __pa(vaddr); 249 /* Make it "global" too if supported */ 250 if (cpu_has_pge) { 251 set_in_cr4(X86_CR4_PGE); 252 __pe += _PAGE_GLOBAL; 253 } 254 set_pmd(pmd, __pmd(__pe)); 255 continue; 256 } 257 258 pte_base = pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); 259 260 for (k = 0; k < PTRS_PER_PTE; pte++, k++) { 261 vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE; 262 if (end && (vaddr >= end)) 263 break; 264 *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL); 265 } 266 set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base))); 267 if (pte_base != pte_offset(pmd, 0)) 268 BUG(); 269 270 } 271 } 272 273 /* 274 * Fixed mappings, only the page table structure has to be 275 * created - mappings will be set by set_fixmap(): 276 */ 277 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; 278 fixrange_init(vaddr, 0, pgd_base); 279 280#if CONFIG_HIGHMEM 281 /* 282 * Permanent kmaps: 283 */ 284 vaddr = PKMAP_BASE; 285 fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); 286 287 pgd = swapper_pg_dir + __pgd_offset(vaddr); 288 pmd = pmd_offset(pgd, vaddr); 289 pte = pte_offset(pmd, vaddr); 290 pkmap_page_table = pte; 291#endif 292 293#if CONFIG_X86_PAE 294 /* 295 * Add low memory identity-mappings - SMP needs it when 296 * starting up on an AP from real-mode. In the non-PAE 297 * case we already have these mappings through head.S. 298 * All user-space mappings are explicitly cleared after 299 * SMP startup. 300 */ 301 pgd_base[0] = pgd_base[USER_PTRS_PER_PGD]; 302#endif 303} 304 305void __init zap_low_mappings (void) 306{ 307 int i; 308 /* 309 * Zap initial low-memory mappings. 310 * 311 * Note that "pgd_clear()" doesn't do it for 312 * us, because pgd_clear() is a no-op on i386. 313 */ 314 for (i = 0; i < USER_PTRS_PER_PGD; i++) 315#if CONFIG_X86_PAE 316 set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); 317#else 318 set_pgd(swapper_pg_dir+i, __pgd(0)); 319#endif 320 flush_tlb_all(); 321} 322 323static void __init zone_sizes_init(void) 324{ 325 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; 326 unsigned int max_dma, high, low; 327 328 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; 329 low = max_low_pfn; 330 high = highend_pfn; 331 332 if (low < max_dma) 333 zones_size[ZONE_DMA] = low; 334 else { 335 zones_size[ZONE_DMA] = max_dma; 336 zones_size[ZONE_NORMAL] = low - max_dma; 337#ifdef CONFIG_HIGHMEM 338 zones_size[ZONE_HIGHMEM] = high - low; 339#endif 340 } 341 free_area_init(zones_size); 342} 343 344/* 345 * paging_init() sets up the page tables - note that the first 8MB are 346 * already mapped by head.S. 347 * 348 * This routines also unmaps the page at virtual kernel address 0, so 349 * that we can trap those pesky NULL-reference errors in the kernel. 350 */ 351void __init paging_init(void) 352{ 353 pagetable_init(); 354 355 load_cr3(swapper_pg_dir); 356 357#if CONFIG_X86_PAE 358 /* 359 * We will bail out later - printk doesnt work right now so 360 * the user would just see a hanging kernel. 361 */ 362 if (cpu_has_pae) 363 set_in_cr4(X86_CR4_PAE); 364#endif 365 366 __flush_tlb_all(); 367 368#ifdef CONFIG_HIGHMEM 369 kmap_init(); 370#endif 371 zone_sizes_init(); 372} 373 374/* 375 * Test if the WP bit works in supervisor mode. It isn't supported on 386's 376 * and also on some strange 486's (NexGen etc.). All 586+'s are OK. The jumps 377 * before and after the test are here to work-around some nasty CPU bugs. 378 */ 379 380/* 381 * This function cannot be __init, since exceptions don't work in that 382 * section. 383 */ 384static int do_test_wp_bit(unsigned long vaddr); 385 386void __init test_wp_bit(void) 387{ 388/* 389 * Ok, all PSE-capable CPUs are definitely handling the WP bit right. 390 */ 391 const unsigned long vaddr = PAGE_OFFSET; 392 pgd_t *pgd; 393 pmd_t *pmd; 394 pte_t *pte, old_pte; 395 396 printk("Checking if this processor honours the WP bit even in supervisor mode... "); 397 398 pgd = swapper_pg_dir + __pgd_offset(vaddr); 399 pmd = pmd_offset(pgd, vaddr); 400 pte = pte_offset(pmd, vaddr); 401 old_pte = *pte; 402 *pte = mk_pte_phys(0, PAGE_READONLY); 403 local_flush_tlb(); 404 405 boot_cpu_data.wp_works_ok = do_test_wp_bit(vaddr); 406 407 *pte = old_pte; 408 local_flush_tlb(); 409 410 if (!boot_cpu_data.wp_works_ok) { 411 printk("No.\n"); 412#ifdef CONFIG_X86_WP_WORKS_OK 413 panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!"); 414#endif 415 } else { 416 printk("Ok.\n"); 417 } 418} 419 420static inline int page_is_ram (unsigned long pagenr) 421{ 422 int i; 423 424 for (i = 0; i < e820.nr_map; i++) { 425 unsigned long addr, end; 426 427 if (e820.map[i].type != E820_RAM) /* not usable memory */ 428 continue; 429 addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT; 430 end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT; 431 if ((pagenr >= addr) && (pagenr < end)) 432 return 1; 433 } 434 return 0; 435} 436 437static inline int page_kills_ppro(unsigned long pagenr) 438{ 439 if(pagenr >= 0x70000 && pagenr <= 0x7003F) 440 return 1; 441 return 0; 442} 443 444#ifdef CONFIG_HIGHMEM 445void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) 446{ 447 if (!page_is_ram(pfn)) { 448 SetPageReserved(page); 449 return; 450 } 451 452 if (bad_ppro && page_kills_ppro(pfn)) { 453 SetPageReserved(page); 454 return; 455 } 456 457 ClearPageReserved(page); 458 set_bit(PG_highmem, &page->flags); 459 atomic_set(&page->count, 1); 460 __free_page(page); 461 totalhigh_pages++; 462} 463#endif /* CONFIG_HIGHMEM */ 464 465static void __init set_max_mapnr_init(void) 466{ 467#ifdef CONFIG_HIGHMEM 468 highmem_start_page = mem_map + highstart_pfn; 469 max_mapnr = num_physpages = highend_pfn; 470 num_mappedpages = max_low_pfn; 471#else 472 max_mapnr = num_mappedpages = num_physpages = max_low_pfn; 473#endif 474} 475 476static int __init free_pages_init(void) 477{ 478 extern int ppro_with_ram_bug(void); 479 int bad_ppro, reservedpages, pfn; 480 481 bad_ppro = ppro_with_ram_bug(); 482 483 /* this will put all low memory onto the freelists */ 484 totalram_pages += free_all_bootmem(); 485 486 reservedpages = 0; 487 for (pfn = 0; pfn < max_low_pfn; pfn++) { 488 /* 489 * Only count reserved RAM pages 490 */ 491 if (page_is_ram(pfn) && PageReserved(mem_map+pfn)) 492 reservedpages++; 493 } 494#ifdef CONFIG_HIGHMEM 495 for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--) 496 one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro); 497 totalram_pages += totalhigh_pages; 498#endif 499 return reservedpages; 500} 501 502void __init mem_init(void) 503{ 504 int codesize, reservedpages, datasize, initsize; 505 506 if (!mem_map) 507 BUG(); 508 509 set_max_mapnr_init(); 510 511 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); 512 513 /* clear the zero-page */ 514 memset(empty_zero_page, 0, PAGE_SIZE); 515 516 reservedpages = free_pages_init(); 517 518 codesize = (unsigned long) &_etext - (unsigned long) &_text; 519 datasize = (unsigned long) &_edata - (unsigned long) &_etext; 520 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; 521 522 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", 523 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 524 max_mapnr << (PAGE_SHIFT-10), 525 codesize >> 10, 526 reservedpages << (PAGE_SHIFT-10), 527 datasize >> 10, 528 initsize >> 10, 529 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) 530 ); 531 532#if CONFIG_X86_PAE 533 if (!cpu_has_pae) 534 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); 535#endif 536 if (boot_cpu_data.wp_works_ok < 0) 537 test_wp_bit(); 538 539 /* 540 * Subtle. SMP is doing it's boot stuff late (because it has to 541 * fork idle threads) - but it also needs low mappings for the 542 * protected-mode entry to work. We zap these entries only after 543 * the WP-bit has been tested. 544 */ 545#ifndef CONFIG_SMP 546 zap_low_mappings(); 547#endif 548 549} 550 551/* Put this after the callers, so that it cannot be inlined */ 552static int do_test_wp_bit(unsigned long vaddr) 553{ 554 char tmp_reg; 555 int flag; 556 557 __asm__ __volatile__( 558 " movb %0,%1 \n" 559 "1: movb %1,%0 \n" 560 " xorl %2,%2 \n" 561 "2: \n" 562 ".section __ex_table,\"a\"\n" 563 " .align 4 \n" 564 " .long 1b,2b \n" 565 ".previous \n" 566 :"=m" (*(char *) vaddr), 567 "=q" (tmp_reg), 568 "=r" (flag) 569 :"2" (1) 570 :"memory"); 571 572 return flag; 573} 574 575void free_initmem(void) 576{ 577 unsigned long addr; 578 579 addr = (unsigned long)(&__init_begin); 580 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { 581 ClearPageReserved(virt_to_page(addr)); 582 set_page_count(virt_to_page(addr), 1); 583 free_page(addr); 584 totalram_pages++; 585 } 586 printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10); 587} 588 589#ifdef CONFIG_BLK_DEV_INITRD 590void free_initrd_mem(unsigned long start, unsigned long end) 591{ 592 if (start < end) 593 printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); 594 for (; start < end; start += PAGE_SIZE) { 595 ClearPageReserved(virt_to_page(start)); 596 set_page_count(virt_to_page(start), 1); 597 free_page(start); 598 totalram_pages++; 599 } 600} 601#endif 602 603void si_meminfo(struct sysinfo *val) 604{ 605 val->totalram = totalram_pages; 606 val->sharedram = 0; 607 val->freeram = nr_free_pages(); 608 val->bufferram = atomic_read(&buffermem_pages); 609 val->totalhigh = totalhigh_pages; 610 val->freehigh = nr_free_highpages(); 611 val->mem_unit = PAGE_SIZE; 612 return; 613} 614 615#if defined(CONFIG_X86_PAE) 616struct kmem_cache_s *pae_pgd_cachep; 617void __init pgtable_cache_init(void) 618{ 619 /* 620 * PAE pgds must be 16-byte aligned: 621 */ 622 pae_pgd_cachep = kmem_cache_create("pae_pgd", 32, 0, 623 SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, NULL, NULL); 624 if (!pae_pgd_cachep) 625 panic("init_pae(): Cannot alloc pae_pgd SLAB cache"); 626} 627#endif /* CONFIG_X86_PAE */ 628