Linux ioremap 的实现
转载自 https://blog.csdn.net/njuitjf/article/details/40745227在 linux kernel 的代码中,经常看到 ioremap 函数。其功能是将给定的物理地址映射为虚拟地址。注意,此处的物理地址并不是真正内存的物理地址,而是cpu上的io memory。可以参考芯片《Reference Manual》中断 memory map 章节。 0x40
转载自 https://blog.csdn.net/njuitjf/article/details/40745227
在 linux kernel 的代码中,经常看到 ioremap 函数。
其功能是将给定的物理地址映射为虚拟地址。
注意,此处的物理地址并不是真正内存的物理地址,而是cpu上的io memory。
可以参考芯片《Reference Manual》中断 memory map 章节。
0x402a0000 ~ 0x402a02e4似乎只是pinctrl寄存器,芯片内地址难道是从0~0x80000000 ?
ioremap 的定义:
- #define ioremap(cookie,size) __arch_ioremap((cookie), (size), MT_DEVICE)
- #define MT_DEVICE 0
- #define __arch_ioremap __arm_ioremap
- void __iomem *
- __arm_ioremap(unsigned long phys_addr, size_t size, unsigned int mtype)
- {
- return __arm_ioremap_caller(phys_addr, size, mtype,
- __builtin_return_address(0));
- }
- void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size,
- unsigned int mtype, voidvoid *caller)
- {
- unsigned long last_addr;
- unsigned long offset = phys_addr & ~PAGE_MASK;
- unsigned long pfn = __phys_to_pfn(phys_addr);
- /*
- * Don't allow wraparound or zero size
- */
- last_addr = phys_addr + size - 1;
- if (!size || last_addr < phys_addr)
- return NULL;
- return __arm_ioremap_pfn_caller(pfn, offset, size, mtype,
- caller);
- }
- void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn,
- unsigned long offset, size_t size, unsigned int mtype, voidvoid *caller)
- {
- const struct mem_type *type;
- int err;
- unsigned long addr;
- struct vm_struct * area;
- /*
- * High mappings must be supersection aligned
- */
- // 高端内存需要对齐到 supersection
- if (pfn >= 0x100000 && (__pfn_to_phys(pfn) & ~SUPERSECTION_MASK))
- return NULL;
- /*
- * Don't allow RAM to be mapped - this causes problems with ARMv6+
- */
- // map 的不能是 RAM ,只能是 soc 的 io memory
- /*
- int pfn_valid(unsigned long pfn)
- {
- return memblock_is_memory(pfn << PAGE_SHIFT);
- }
- */
- if (WARN_ON(pfn_valid(pfn)))
- return NULL;
- // get_mem_type 的实现见后文
- // 从前文的定义可知, mtype 为 MT_DEVICE
- type = get_mem_type(mtype);
- if (!type)
- return NULL;
- /*
- * Page align the mapping size, taking account of any offset.
- */
- size = PAGE_ALIGN(offset + size);
- // get_vm_area_caller 函数的实现见后面
- area = get_vm_area_caller(size, VM_IOREMAP, caller);
- if (!area)
- return NULL;
- addr = (unsigned long)area->addr;
- #ifndef CONFIG_SMP
- if (DOMAIN_IO == 0 &&
- (((cpu_architecture() >= CPU_ARCH_ARMv6) && (get_cr() & CR_XP)) ||
- cpu_is_xsc3()) && pfn >= 0x100000 &&
- !((__pfn_to_phys(pfn) | size | addr) & ~SUPERSECTION_MASK)) {
- area->flags |= VM_ARM_SECTION_MAPPING;
- err = remap_area_supersections(addr, pfn, size, type);
- } else if (!((__pfn_to_phys(pfn) | size | addr) & ~PMD_MASK)) {
- area->flags |= VM_ARM_SECTION_MAPPING;
- err = remap_area_sections(addr, pfn, size, type);
- } else
- #endif
- // ioremap_page_range 函数的实现见后文
- err = ioremap_page_range(addr, addr + size, __pfn_to_phys(pfn),
- __pgprot(type->prot_pte));
- if (err) {
- vunmap((voidvoid *)addr);
- return NULL;
- }
- flush_cache_vmap(addr, addr + size);
- return (void __iomem *) (offset + addr);
- }
kernel\arch\arm\mm\init.c
#ifdef CONFIG_HAVE_ARCH_PFN_VALID
int pfn_valid(unsigned long pfn)
{
return memblock_is_memory(__pfn_to_phys(pfn));
}
EXPORT_SYMBOL(pfn_valid);
int __init_memblock memblock_is_memory(phys_addr_t addr)
{
return memblock_search(&memblock.memory, addr) != -1;
}
static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
{
unsigned int left = 0, right = type->cnt;
do {
unsigned int mid = (right + left) / 2;
if (addr < type->regions[mid].base)
right = mid;
else if (addr >= (type->regions[mid].base +
type->regions[mid].size))
left = mid + 1;
else
return mid;
} while (left < right);
return -1;
}
get_mem_type 函数的实现:
- const struct mem_type *get_mem_type(unsigned int type)
- {
- return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
- }
mem_types 的定义:
- static struct mem_type mem_types[] = {
- [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */
- .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
- L_PTE_SHARED,
- .prot_l1 = PMD_TYPE_TABLE,
- .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S,
- .domain = DOMAIN_IO,
- },
- [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
- .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
- .prot_l1 = PMD_TYPE_TABLE,
- .prot_sect = PROT_SECT_DEVICE,
- .domain = DOMAIN_IO,
- },
- [MT_DEVICE_CACHED] = { /* ioremap_cached */
- .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
- .prot_l1 = PMD_TYPE_TABLE,
- .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB,
- .domain = DOMAIN_IO,
- },
- [MT_DEVICE_WC] = { /* ioremap_wc */
- .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
- .prot_l1 = PMD_TYPE_TABLE,
- .prot_sect = PROT_SECT_DEVICE,
- .domain = DOMAIN_IO,
- },
- [MT_UNCACHED] = {
- .prot_pte = PROT_PTE_DEVICE,
- .prot_l1 = PMD_TYPE_TABLE,
- .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
- .domain = DOMAIN_IO,
- },
- [MT_CACHECLEAN] = {
- .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
- .domain = DOMAIN_KERNEL,
- },
- [MT_MINICLEAN] = {
- .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
- .domain = DOMAIN_KERNEL,
- },
- [MT_LOW_VECTORS] = {
- .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
- L_PTE_RDONLY,
- .prot_l1 = PMD_TYPE_TABLE,
- .domain = DOMAIN_USER,
- },
- [MT_HIGH_VECTORS] = {
- .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
- L_PTE_USER | L_PTE_RDONLY,
- .prot_l1 = PMD_TYPE_TABLE,
- .domain = DOMAIN_USER,
- },
- [MT_MEMORY] = {
- .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
- .prot_l1 = PMD_TYPE_TABLE,
- .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
- .domain = DOMAIN_KERNEL,
- },
- [MT_ROM] = {
- .prot_sect = PMD_TYPE_SECT,
- .domain = DOMAIN_KERNEL,
- },
- [MT_MEMORY_NONCACHED] = {
- .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
- L_PTE_MT_BUFFERABLE,
- .prot_l1 = PMD_TYPE_TABLE,
- .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
- .domain = DOMAIN_KERNEL,
- },
- [MT_MEMORY_DTCM] = {
- .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
- L_PTE_XN,
- .prot_l1 = PMD_TYPE_TABLE,
- .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
- .domain = DOMAIN_KERNEL,
- },
- [MT_MEMORY_ITCM] = {
- .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
- .prot_l1 = PMD_TYPE_TABLE,
- .domain = DOMAIN_KERNEL,
- },
- };
get_vm_area_caller 函数的实现:
- struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
- voidvoid *caller)
- {
- /*
- * Just any arbitrary offset to the start of the vmalloc VM area: the
- * current 8MB value just means that there will be a 8MB "hole" after the
- * physical memory until the kernel virtual memory starts. That means that
- * any out-of-bounds memory accesses will hopefully be caught.
- * The vmalloc() routines leaves a hole of 4kB between each vmalloced
- * area for the same reason. ;)
- *
- * Note that platforms may override VMALLOC_START, but they must provide
- * VMALLOC_END. VMALLOC_END defines the (exclusive) limit of this space,
- * which may not overlap IO space.
- */
- /*
- #ifndef VMALLOC_START
- #define VMALLOC_OFFSET (8*1024*1024)
- // high_memory 在 arch/arm/mm/init.c 文件中的 bootmem_init 函数中赋值,该函数的实现见后文
- #define VMALLOC_START (((unsigned long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))
- #endif
- */
- /* vmalloc ending address */
- #define VMALLOC_END 0xf2000000UL
- return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
- -1, GFP_KERNEL, caller);
- }
- static struct vm_struct *__get_vm_area_node(unsigned long size,
- unsigned long align, unsigned long flags, unsigned long start,
- unsigned long end, int node, gfp_t gfp_mask, voidvoid *caller)
- {
- static struct vmap_area *va;
- struct vm_struct *area;
- BUG_ON(in_interrupt());
- /* bits in flags of vmalloc's vm_struct below */
- // #define VM_IOREMAP 0x00000001 /* ioremap() and friends */
- if (flags & VM_IOREMAP) {
- int bit = fls(size);
- if (bit > IOREMAP_MAX_ORDER)
- bit = IOREMAP_MAX_ORDER;
- else if (bit < PAGE_SHIFT)
- bit = PAGE_SHIFT;
- align = 1ul << bit;
- }
- size = PAGE_ALIGN(size);
- if (unlikely(!size))
- return NULL;
- /**
- * kzalloc_node - allocate zeroed memory from a particular memory node.
- * @size: how many bytes of memory are required.
- * @flags: the type of memory to allocate (see kmalloc).
- * @node: memory node from which to allocate
- */
- // 分配一个 vm_struct 结构体
- area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
- if (unlikely(!area))
- return NULL;
- /*
- * We always allocate a guard page.
- */
- size += PAGE_SIZE;
- // start 和 end 分别为 VMALLOC_START 和 VMALLOC_END
- // align 为 1
- // alloc_vmap_area 函数的注释:
- /*
- * Allocate a region of KVA of the specified size and alignment, within the
- * vstart and vend.
- */
- // 已经使用的 vm 的信息分别存在各个 vmap_area 结构体中
- // 所有的 vmap_area 结构体都在红黑树 vmap_area_root 中
- // alloc_vmap_area 函数的主要功能是,查找红黑树 vmap_area_root ,找到 start 和 end 之间满足 size 大小的未使用空间,
- // 创建一个 vmap_area 结构体,并用找到的未使用空间信息初始化该结构体,然后将该结构体插入到红黑树 vmap_area_root 中
- va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
- if (IS_ERR(va)) {
- kfree(area);
- return NULL;
- }
- /*
- * When this function is called from __vmalloc_node_range,
- * we do not add vm_struct to vmlist here to avoid
- * accessing uninitialized members of vm_struct such as
- * pages and nr_pages fields. They will be set later.
- * To distinguish it from others, we use a VM_UNLIST flag.
- */
- if (flags & VM_UNLIST)
- setup_vmalloc_vm(area, va, flags, caller);
- else
- // 看前面注释可知,上面的 if 分支只是个特殊情况,我们只分析 else 分支
- // insert_vmalloc_vm 函数的实现见后文
- insert_vmalloc_vm(area, va, flags, caller);
- return area;
- }
文件中的 bootmem_init 函数中赋值,该函数的实现:
- void __init bootmem_init(void)
- {
- unsigned long min, max_low, max_high;
- max_low = max_high = 0;
- // 找到内存的起始地址, 低端内存的最高地址, 高端内存的最高地址
- // find_limits 函数实现见后文
- find_limits(&min, &max_low, &max_high);
- arm_bootmem_init(min, max_low);
- /*
- * Sparsemem tries to allocate bootmem in memory_present(),
- * so must be done after the fixed reservations
- */
- arm_memory_present();
- /*
- * sparse_init() needs the bootmem allocator up and running.
- */
- sparse_init();
- /*
- * Now free the memory - free_area_init_node needs
- * the sparse mem_map arrays initialized by sparse_init()
- * for memmap_init_zone(), otherwise all PFNs are invalid.
- */
- arm_bootmem_free(min, max_low, max_high);
- // high_memory 为高端内存的起始虚拟地址
- high_memory = __va(((phys_addr_t)max_low << PAGE_SHIFT) - 1) + 1;
- /*
- * This doesn't seem to be used by the Linux memory manager any
- * more, but is used by ll_rw_block. If we can get rid of it, we
- * also get rid of some of the stuff above as well.
- *
- * Note: max_low_pfn and max_pfn reflect the number of _pages_ in
- * the system, not the maximum PFN.
- */
- max_low_pfn = max_low - PHYS_PFN_OFFSET;
- max_pfn = max_high - PHYS_PFN_OFFSET;
- }
find_limits 函数实现:
- static void __init find_limits(unsigned longlong *min, unsigned longlong *max_low,
- unsigned longlong *max_high)
- {
- struct meminfo *mi = &meminfo;
- int i;
- *min = -1UL;
- *max_low = *max_high = 0;
- for_each_bank (i, mi) {
- struct membank *bank = &mi->bank[i];
- unsigned long start, end;
- start = bank_pfn_start(bank);
- end = bank_pfn_end(bank);
- if (*min > start)
- *min = start;
- if (*max_high < end)
- *max_high = end;
- // 如果是高端内存,就不用更新 max_low 了
- // 参考后面的 sanity_check_meminfo 函数
- if (bank->highmem)
- continue;
- if (*max_low < end)
- *max_low = end;
- }
- }
sanity_check_meminfo 函数的实现:
- void __init sanity_check_meminfo(void)
- {
- int i, j, highmem = 0;
- for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
- struct membank *bank = &meminfo.bank[j];
- *bank = meminfo.bank[i];
- #ifdef CONFIG_HIGHMEM
- // static void * __initdata vmalloc_min = (void *)(VMALLOC_END - SZ_128M);
- if (__va(bank->start) >= vmalloc_min ||
- __va(bank->start) < (voidvoid *)PAGE_OFFSET)
- highmem = 1;
- bank->highmem = highmem;
- ...
- #else
- bank->highmem = highmem;
- ...
- }
- ...
- }
回到函数 get_vm_area_caller 的实现。
insert_vmalloc_vm 函数的实现:
- static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
- unsigned long flags, voidvoid *caller)
- {
- // vm(vm_struct) 结构体在函数 __get_vm_area_node 中分配
- // va(vmap_area) 结构体,在函数 __get_vm_area_node 中通过调用 alloc_vmap_area 分配
- setup_vmalloc_vm(vm, va, flags, caller);
- insert_vmalloc_vmlist(vm);
- }
setup_vmalloc_vm 函数的实现:
- static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
- unsigned long flags, voidvoid *caller)
- {
- vm->flags = flags;
- vm->addr = (voidvoid *)va->va_start;
- vm->size = va->va_end - va->va_start;
- vm->caller = caller;
- va->vm = vm;
- va->flags |= VM_VM_AREA;
- }
insert_vmalloc_vmlist 函数的实现:
- static void insert_vmalloc_vmlist(struct vm_struct *vm)
- {
- struct vm_struct *tmp, **p;
- vm->flags &= ~VM_UNLIST;
- write_lock(&vmlist_lock);
- // 将 vm_struct 结构体插入的 vmlist 中
- for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
- if (tmp->addr >= vm->addr)
- break;
- }
- vm->next = *p;
- *p = vm;
- write_unlock(&vmlist_lock);
- }
ioremap_page_range 函数的实现:
- int ioremap_page_range(unsigned long addr,
- unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
- {
- pgd_t *pgd;
- unsigned long start;
- unsigned long next;
- int err;
- BUG_ON(addr >= end);
- start = addr;
- phys_addr -= addr;
- pgd = pgd_offset_k(addr);
- do {
- next = pgd_addr_end(addr, end);
- err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot);
- if (err)
- break;
- } while (pgd++, addr = next, addr != end);
- flush_cache_vmap(start, end);
- return err;
- }
- static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
- unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
- {
- pud_t *pud;
- unsigned long next;
- phys_addr -= addr;
- pud = pud_alloc(&init_mm, pgd, addr);
- if (!pud)
- return -ENOMEM;
- do {
- next = pud_addr_end(addr, end);
- if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot))
- return -ENOMEM;
- } while (pud++, addr = next, addr != end);
- return 0;
- }
- static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
- unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
- {
- pmd_t *pmd;
- unsigned long next;
- phys_addr -= addr;
- pmd = pmd_alloc(&init_mm, pud, addr);
- if (!pmd)
- return -ENOMEM;
- do {
- next = pmd_addr_end(addr, end);
- if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot))
- return -ENOMEM;
- } while (pmd++, addr = next, addr != end);
- return 0;
- }
- static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
- unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
- {
- pte_t *pte;
- u64 pfn;
- pfn = phys_addr >> PAGE_SHIFT;
- pte = pte_alloc_kernel(pmd, addr);
- if (!pte)
- return -ENOMEM;
- do {
- BUG_ON(!pte_none(*pte));
- set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
- pfn++;
- } while (pte++, addr += PAGE_SIZE, addr != end);
- return 0;
- }
上面几个函数的功能,是建立 linux 4级页表。
linux 4级页表参考:
http://larmbr.me/2014/01/19/the-evolution-of-4-level-page-talbe-in-linux/
总价一下。
ioremap中首先做了一些检查,其中一项检查是要处理的物理地址是不是 RAM ,因为 ioremap 只处理 soc 的 io memory ,不处理 RAM 。
分配一个 vm_struct 结构体。
之后分配一个 vmap_area 结构体,并查找红黑树 vmap_area_root 找到合适的 hole 。
然后初始化 vm_struct 结构体和 vmap_area 结构体的一些成员。
最后建立 linux 的4级内存页表。
4级即: PGD -> PUD -> PMD -> PTE
更多推荐
所有评论(0)