2014년 6월 9일 월요일

[Linux Kernel] 57주차(2014.06.07)

ARM10C 57주차

일시 : 2014.06.07 (57주차)
모임명 : NAVER개발자커뮤니티지원_IAMROOT.ORG_10차ARM-C
장소 : 토즈 타워점
장소지원 : NAVER 개발자 커뮤니티 지원 프로그램
참여인원 : 5명

스터디 진도 :

  • mem_init()을 계속 분석합니다.
  • start_kernel()-> mm_init()->kmem_cache_init()->create_boot_cache() 분석중

main.c:: mm_init()

// ARM10C 20140329
static void __init mm_init(void)
{
    /*
     * page_cgroup requires contiguous pages,
     * bigger than MAX_ORDER unless SPARSEMEM.
     */
    page_cgroup_init_flatmem(); // null function
    mem_init();
    // bootmem으로 관리하던 메모리를 buddy로 이관.
    // 각 section 메모리 크기를 출력.

    // mm/Makefile 에서 CONFIG_SLUB 설정으로 slub.c 로 jump
    kmem_cache_init();
kmem_cache_init()으로 이동.

slub.c::kmem_cache_init()

// ARM10C 20140419
void __init kmem_cache_init(void)
{
    static __initdata struct kmem_cache boot_kmem_cache,
        boot_kmem_cache_node;

    // debug_guardpage_minorder(): 0
    if (debug_guardpage_minorder())
        slub_max_order = 0;

    // kmem_cache_node: NULL
    kmem_cache_node = &boot_kmem_cache_node;
    // kmem_cache_node: &boot_kmem_cache_node

    // kmem_cache: NULL
    kmem_cache = &boot_kmem_cache;
    // kmem_cache: &boot_kmem_cache

    // &boot_kmem_cache_node, "kmem_cache_node", sizeof(struct kmem_cache_node): 44 byte,
    // SLAB_HWCACHE_ALIGN: 0x00002000UL
    create_boot_cache(kmem_cache_node, "kmem_cache_node",
        sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN);
// &boot_kmem_cache_node, “kmem_cache_node”, sizeof(struct kmem_cache_node): 44 byte,
// SLAB_HWCACHE_ALIGN: 0x00002000UL
create_boot_cache(kmem_cache_node, “kmem_cache_node”, sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN);

slab_common.c::create_boot_cache()

// ARM10C 20140419
// &boot_kmem_cache_node, “kmem_cache_node”, sizeof(struct kmem_cache_node): 44 byte,
// SLAB_HWCACHE_ALIGN: 0x00002000UL
void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
        unsigned long flags)
{
    int err;

    // s->name: boot_kmem_cache_node.name: NULL
    s->name = name;
    // s->name: boot_kmem_cache_node.name: "kmem_cache_node"

    // s->size: boot_kmem_cache_node.size: 0
    // s->object_size: boot_kmem_cache_node.object_size: 0
    s->size = s->object_size = size;
    // s->size: boot_kmem_cache_node.size: 44
    // s->object_size: boot_kmem_cache_node.object_size: 44

    // flags: SLAB_HWCACHE_ALIGN: 0x00002000UL, ARCH_KMALLOC_MINALIGN: 64, size: 44
    // s->align: boot_kmem_cache_node.align: 0
    s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
    // s->align: boot_kmem_cache_node.align: 64

    // s: &boot_kmem_cache_node, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
    err = __kmem_cache_create(s, flags);
// s: &boot_kmem_cache_node, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
err = __kmem_cache_create(s, flags);

slub.c::__kmem_cache_create()

// ARM10C 20140419
// s: &boot_kmem_cache_node, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
{
    int err;

    // s: &boot_kmem_cache_node, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
    // kmem_cache_open(&boot_kmem_cache_node, 0x00002000UL): 0
    err = kmem_cache_open(s, flags);
// s: &boot_kmem_cache_node, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
// kmem_cache_open(&boot_kmem_cache_node, 0x00002000UL): 0
err = kmem_cache_open(s, flags);

slub.c::kmem_cache_open()

// ARM10C 20140419
// s: &boot_kmem_cache_node, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
{
    // s->size: boot_kmem_cache_node.size: 44, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL,
    // s->name: boot_kmem_cache_node.name: "kmem_cache_node, s->ctor: boot_kmem_cache_node.ctor: NULL
    s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
    // s->flags: boot_kmem_cache_node.flags: SLAB_HWCACHE_ALIGN: 0x00002000UL

    // s->reserved: boot_kmem_cache_node.reserved: 0
    s->reserved = 0;
    // s->reserved: boot_kmem_cache_node.reserved: 0

    // need_reserve_slab_rcu: 0 , s->flags: boot_kmem_cache_node.flags: SLAB_HWCACHE_ALIGN
    if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
        s->reserved = sizeof(struct rcu_head);

    // s: &boot_kmem_cache_node, -1, calculate_sizes(&boot_kmem_cache_node, -1): 1
    if (!calculate_sizes(s, -1))
        goto error;

    // disable_higher_order_debug: 0
    if (disable_higher_order_debug) {
        /*
         * Disable debugging flags that store metadata if the min slab
         * order increased.
         */
        if (get_order(s->size) > get_order(s->object_size)) {
            s->flags &= ~DEBUG_METADATA_FLAGS;
            s->offset = 0;
            if (!calculate_sizes(s, -1))
                goto error;
        }
    }
    /*
     * The larger the object size is, the more pages we want on the partial
     * list to avoid pounding the page allocator excessively.
     */
    // s->size: boot_kmem_cache_node.size: 64, ilog2(64): 6
    // s: &boot_kmem_cache_node, 3
    set_min_partial(s, ilog2(s->size) / 2);
    // boot_kmem_cache_node.min_partial: 5

    /*
     * cpu_partial determined the maximum number of objects kept in the
     * per cpu partial lists of a processor.
     *
     * Per cpu partial lists mainly contain slabs that just have one
     * object freed. If they are used for allocation then they can be
     * filled up again with minimal effort. The slab will never hit the
     * per node partial lists and therefore no locking will be required.
     *
     * This setting also determines
     *
     * A) The number of objects from per cpu partial slabs dumped to the
     *    per node list when we reach the limit.
     * B) The number of objects in cpu partial slabs to extract from the
     *    per node list when we run out of per cpu objects. We only fetch
     *    50% to keep some capacity around for frees.
     */

    // s: &boot_kmem_cache_node, kmem_cache_has_cpu_partial(&boot_kmem_cache_node): 1
    // s->size: boot_kmem_cache_node.size: 64, PAGE_SIZE: 0x1000
    if (!kmem_cache_has_cpu_partial(s))
        s->cpu_partial = 0;
    else if (s->size >= PAGE_SIZE)
        s->cpu_partial = 2;
    else if (s->size >= 1024)
        s->cpu_partial = 6;
    else if (s->size >= 256)
        s->cpu_partial = 13;
    else
        // s->cpu_partial: boot_kmem_cache_node.cpu_partial: 0
        s->cpu_partial = 30;
        // boot_kmem_cache_node.cpu_partial: 30

// 2014/04/19 종료
// 2014/04/26 시작
    // s: &boot_kmem_cache_node, init_kmem_cache_nodes(&boot_kmem_cache_node): 1
    if (!init_kmem_cache_nodes(s))
        goto error;

    // init_kmem_cache_nodes 가 한일:
    // migratetype이 MIGRATE_UNMOVABLE인 page 할당 받음
    // page 맴버를 셋팅함
    // page->slab_cache: &boot_kmem_cache_node 주소를 set
    // page->flags에 7 (PG_slab) bit를 set
    // page->freelist: UNMOVABLE인 page 의 object의 시작 virtual address + 64
    // page->inuse: 1, page->frozen: 0 page 맴버를 셋팅함
    // slab 의 objects 들의 freepointer를 맵핑함
    // 할당받은 slab object를 kmem_cache_node 로 사용하고 kmem_cache_node의 멤버 필드를 초기화함
    // kmem_cache_node->nr_partial: 1
    // kmem_cache_node->list_lock: spinlock 초기화 수행
    // kmem_cache_node->slabs: 1, kmem_cache_node->total_objects: 64 로 세팀함
    // kmem_cache_node->full: 리스트 초기화
    // kmem_cache_node의 partial 맴버에 현재 page의 lru 리스트를 추가함

    // s: &boot_kmem_cache_node
    if (alloc_kmem_cache_cpus(s))
        return 0;
  • 지난 시간에 if (!init_kmem_cache_nodes(s)) goto error;를 분서했었다.
  • init_kmem_cache_nodes는 slab으로 사용할 page를 할당받아 설정값(slab_cache, flags, freelist, inuse, frozen)을 바꿔준다.
  • 이후 할당받은 slab object를 kmem_cache_node로 사용하며,
  • kmem_cache_node의 맴버 속성을 초기화합니다.
  • 초기화되는 맴버속성은 (nr_partial, list_lock, slabs, full)가 있습니다.
  • init_kmem_cache_nodes(s)의 리턴값이 있기 때문에 if문은 부정되어 다음 문장이 실행됩니다.
// s: &boot_kmem_cache_node
if (alloc_kmem_cache_cpus(s))
return 0;

slub.c::alloc_kmem_cache_cpus()

// ARM10C 20140531
// s: &boot_kmem_cache_node
static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
{
    // PERCPU_DYNAMIC_EARLY_SIZE: 0x3000, KMALLOC_SHIFT_HIGH: 13
    // sizeof(struct kmem_cache_cpu): 16 bytes
    BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
            KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));

    /*
     * Must align to double word boundary for the double cmpxchg
     * instructions to work; see __pcpu_double_call_return_bool().
     */
    // s->cpu_slab: (&boot_kmem_cache_node)->cpu_slab
    // sizeof(struct kmem_cache_cpu): 16 bytes, sizeof(void *): 8 bytes
    // __alloc_percpu(16, 8): 0xc0502d00
    s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
                     2 * sizeof(void *));
  • 충분한 PERCPU_DYNAMIC_EARLY_SIZE가 (0x3000) 확보되었는지 검사합니다.
    // s->cpu_slab: (&boot_kmem_cache_node)->cpu_slab
    // sizeof(struct kmem_cache_cpu): 16 bytes, sizeof(void *): 8 bytes
    // __alloc_percpu(16, 8)
    s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu), 2 sizeof(void ));

percpu.c::__alloc_percpu()

// ARM10C 20140531
// __alloc_percpu(16, 8)
void __percpu *__alloc_percpu(size_t size, size_t align)
{
    // size: 16, align: 8
    return pcpu_alloc(size, align, false);
}
// size: 16, align: 8
return pcpu_alloc(size, align, false);

percpu.c::pcpu_alloc()

// ARM10C 20140531
// size: 16, align: 8, false
static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
{
    static int warn_limit = 10;
    // warn_limit: 10
    struct pcpu_chunk *chunk;
    const char *err;
    int slot, off, new_alloc;
    unsigned long flags;
    void __percpu *ptr;

    // size: 16, align: 8, PCPU_MIN_UNIT_SIZE: 0x8000, PAGE_SIZE: 0x1000
    if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
        WARN(true, "illegal size (%zu) or align (%zu) for "
             "percpu allocation\n", size, align);
        return NULL;
    }

    mutex_lock(&pcpu_alloc_mutex);
    // pcpu_alloc_mutex의 mutex lock을 수행

    spin_lock_irqsave(&pcpu_lock, flags);
    // pcpu_lock의 spin lock 을 수행하고 cpsr을 flags에 저장

    /* serve reserved allocations from the reserved chunk if available */
    // reserved: false, pcpu_reserved_chunk: pcpu_setup_first_chunk()함수에서 할당한 schunk
    if (reserved && pcpu_reserved_chunk) {
    // reserved가 false이므로 부정된다. 
        chunk = pcpu_reserved_chunk;

        if (size > chunk->contig_hint) {
            err = "alloc from reserved chunk failed";
            goto fail_unlock;
        }

        while ((new_alloc = pcpu_need_to_extend(chunk))) {
            spin_unlock_irqrestore(&pcpu_lock, flags);
            if (pcpu_extend_area_map(chunk, new_alloc) < 0) {
                err = "failed to extend area map of reserved chunk";
                goto fail_unlock_mutex;
            }
            spin_lock_irqsave(&pcpu_lock, flags);
        }

        off = pcpu_alloc_area(chunk, size, align);
        if (off >= 0)
            goto area_found;

        err = "alloc from reserved chunk failed";
        goto fail_unlock;
    }

restart:
    /* search through normal chunks */
    // size: 16, pcpu_size_to_slot(16): 1, pcpu_nr_slots: 15
    for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) {

        // slot: 1~10
        // list_for_each_entry 의 &chunk->list != (&pcpu_slot[slot]) 조건에 의해
        // 수행 되지 않음

        list_for_each_entry(chunk, &pcpu_slot[slot], list) {
        // for (chunk = list_first_entry(&pcpu_slot[slot], typeof(*chunk), list);
        //      &chunk->list != (&pcpu_slot[slot]); chunk = list_next_entry(chunk, list))

            // chuck: &pcpu_slot[11]

            // size: 16, chunk->contig_hint: (&pcpu_slot[11])->contig_hint: 0x3000
            if (size > chunk->contig_hint)
                continue;

            // chuck: &pcpu_slot[11]: dchunk: 4K만큼 할당 받은 주소
            new_alloc = pcpu_need_to_extend(chunk);
            // new_alloc: 0

            if (new_alloc) {
                spin_unlock_irqrestore(&pcpu_lock, flags);
                if (pcpu_extend_area_map(chunk,
                             new_alloc) < 0) {
                    err = "failed to extend area map";
                    goto fail_unlock_mutex;
                }
                spin_lock_irqsave(&pcpu_lock, flags);
                /*
                 * pcpu_lock has been dropped, need to
                 * restart cpu_slot list walking.
                 */
                goto restart;
            }

            // chuck: &pcpu_slot[11]: dchunk: 4K만큼 할당 받은 주소, size: 16, align: 8
            // pcpu_alloc_area(&pcpu_slot[11], 16, 8): 0x1d00 + 0x2000
            off = pcpu_alloc_area(chunk, size, align);
  • pcpu_alloc(size, align, false)로 매개변수를 전달받았기 때문에
    return pcpu_alloc(size, align, false);
  • if (reserved && pcpu_reserved_chunk) 에서 reserved 값이 false이므로 이 if문을 실행되지 않고
  • restart로 이동하게 된다.
  • for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) {
    • slot은 11만 설정했기 때문에 slot: 1~10는
    • list_for_each_entry 의 &chunk->list != (&pcpu_slot[slot]) 조건에 의해
    • 실행되지 않는다.
  • new_alloc = pcpu_need_to_extend(chunk);
  • new_alloc 은 pcpu_slot[11]에 의해서 dchunk의 4K만큼 할당 받은 주소이다.
    // chuck: &pcpu_slot[11]: dchunk: 4K만큼 할당 받은 주소, size: 16, align: 8
    // pcpu_alloc_area(&pcpu_slot[11], 16, 8): 0x1d00 + 0x2000
    off = pcpu_alloc_area(chunk, size, align);

percpu.c::pcpu_alloc_area()

static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align)
{
    // chuck: &pcpu_slot[11]: dchunk: 4K만큼 할당 받은 주소
    // pcpu_chunk_slot(&pcpu_slot[11]): 11
    int oslot = pcpu_chunk_slot(chunk);
    // oslot: 11
    int max_contig = 0;
    // max_contig: 0
    int i, off;

    // chunk->map_used: dchunk->map_used: 2,
    // chunk->map[0]: dchunk->map[0]: -(0x1d00 + 0x2000)
    for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) {
        // [loop 1] i: 0, chunk->map_used: dchunk->map_used: 2
        // [loop 2] i: 1, chunk->map_used: dchunk->map_used: 2
        bool is_last = i + 1 == chunk->map_used;
        // [loop 1] is_last: 0
        // [loop 2] is_last: 1
        int head, tail;

        /* extra for alignment requirement */
        // [loop 1] off: 0, align: 8
        // [loop 2] off: 0x1d00 + 0x2000, align: 8
        head = ALIGN(off, align) - off;
        // [loop 1] head: 0
        // [loop 2] head: 0

        // [loop 1] i: 0, head: 0
        // [loop 2] i: 0, head: 0
        BUG_ON(i == 0 && head != 0);

        // [loop 1] i: 0, chunk->map[0]: dhunk->map[0]: -(0x1d00 + 0x2000)
        // [loop 2] i: 1, chunk->map[1]: dhunk->map[1]: 0x3000
        if (chunk->map[i] < 0)
            continue;
            // [loop 1] continue 수행

        // [loop 2] i: 1, chunk->map[1]: dhunk->map[1]: 0x3000, head: 0, size: 16
        if (chunk->map[i] < head + size) {
            max_contig = max(chunk->map[i], max_contig);
            continue;
        }

        /*
         * If head is small or the previous block is free,
         * merge'em.  Note that 'small' is defined as smaller
         * than sizeof(int), which is very small but isn't too
         * uncommon for percpu allocations.
         */
        // [loop 2] i: 1, chunk->map[0]: -(0x1d00 + 0x2000),
        // [loop 2] head: 0, size: 16
        if (head && (head < sizeof(int) || chunk->map[i - 1] > 0)) {
            if (chunk->map[i - 1] > 0)
                chunk->map[i - 1] += head;
            else {
                chunk->map[i - 1] -= head;
                chunk->free_size -= head;
            }
            chunk->map[i] -= head;
            off += head;
            head = 0;
        }

        /* if tail is small, just keep it around */
        // [loop 2] i: 1, chunk->map[1]: dhunk->map[1]: 0x3000, head: 0, size: 16
        tail = chunk->map[i] - head - size;
        // [loop 2] tail: 0x2ff0

        // [loop 2] tail: 0x2ff0, sizeof(int): 4
        if (tail < sizeof(int))
            tail = 0;

        /* split if warranted */
        // [loop 2] head: 0, tail: 0x2ff0
        if (head || tail) {
            // [loop 2] chuck: &pcpu_slot[11]: dchunk: 4K만큼 할당 받은 주소,
            // [loop 2] i: 1, head: 0, tail: 0x2ff0
            pcpu_split_block(chunk, i, head, tail);

            if (head) {
                i++;
                off += head;
                max_contig = max(chunk->map[i - 1], max_contig);
            }

            if (tail)
                max_contig = max(chunk->map[i + 1], max_contig);
        }
pcpu_alloc_area는 pcpu_chunk에서 pcpu_alloc_area - allocated are를 계산하는 과정이다.
 if (head || tail) {
   pcpu_split_block(chunk, i, head, tail);
            if (head) {
                i++;
                off += head;
                max_contig = max(chunk->map[i - 1], max_contig);
            }
            if (tail)
                max_contig = max(chunk->map[i + 1], max_contig);
        }
// [loop 2] chuck: &pcpu_slot[11]: dchunk: 4K만큼 할당 받은 주소,
// [loop 2] i: 1, head: 0, tail: 0x2ff0
pcpu_split_block(chunk, i, head, tail);

percpu.c::pcpu_split_block()

// ARM10C 20140607
// [loop 2] chuck: &pcpu_slot[11]: dchunk: 4K만큼 할당 받은 주소,
// [loop 2] i: 1, head: 0, tail: 0x2ff0
static void pcpu_split_block(struct pcpu_chunk *chunk, int i,
                 int head, int tail)
{
    // head: 0, tail: 0x2ff0
    int nr_extra = !!head + !!tail;
    // nr_extra: 1

    // chunk->map_alloc: dchunk->map_alloc: 128,
    // chunk->map_used: dchunk->map_used: 2, nr_extra: 1
    BUG_ON(chunk->map_alloc < chunk->map_used + nr_extra);

    /* insert new subblocks */
    // i: 1, nr_extra: 1, chunk->map[2]: dchunk->map[2]: 0,
    // chunk->map[1]: dchunk->map[1]: 0x3000,
    // sizeof(chunk->map[0]): 4, chunk->map_used: dchunk->map_used: 2
    // memmove(&dchunk->map[2], &dchunk->map[1], 4)
    memmove(&chunk->map[i + nr_extra], &chunk->map[i],
        sizeof(chunk->map[0]) * (chunk->map_used - i));
    // dchunk->map[2]: 0x3000
memmove( DST, SRC, Size);
memmove (&dchunk->map[2], &chunk->map[1], 4);
dchunk->map[1]를 chunk->map[2] 로 이동시키는 것이다.
결국 4Byte만큼 memmove했다.
    // chunk->map_used : dchunk->map_used: 2, nr_extra: 1
    chunk->map_used += nr_extra;
    // chunk->map_used: dchunk->map_used: 3

    // head: 0
    if (head) {
        chunk->map[i + 1] = chunk->map[i] - head;
        chunk->map[i++] = head;
    }
    // tail: 0x2ff0
    if (tail) {
        // i: 1, chunk->map[1]: dchunk->map[1]: 0x3000, tail: 0x2ff0
        chunk->map[i++] -= tail;
        // chunk->map[1]: dchunk->map[1]: 16

        // i: 2, chunk->map[2]: dchunk->map[2]: 0x3000, tail: 0x2ff0
        chunk->map[i] = tail;
        // dchunk->map[2]: 0x2ff0
    }
}

percpu.c::pcpu_alloc_area()

pcpu_split_block에서 처리한 내용
// [loop 2] chunk->map_used : dchunk->map_used: 3
// [loop 2] chunk->map[1]: dchunk->map[1]: 16
// [loop 2] chunk->map[2]: dchunk->map[2]: 0x2ff0
이후 if(head)와 if(tail)로 분기하며, 지금은 tail로 조건으로 실행한다.
        // [loop 2] head: 0, tail: 0x2ff0
        if (head || tail) {
            pcpu_split_block(chunk, i, head, tail);
            // [loop 2] chunk->map_used : dchunk->map_used: 3
            // [loop 2] chunk->map[1]: dchunk->map[1]: 16
            // [loop 2] chunk->map[2]: dchunk->map[2]: 0x2ff0

            // [loop 2] head: 0
            if (head) {
                i++;
                off += head;
                max_contig = max(chunk->map[i - 1], max_contig);
            }
            // [loop 2] tail: 0x2ff0
            if (tail)
                // [loop 2] : i
                max_contig = max(chunk->map[i + 1], max_contig);
                // [loop 2] max_contig: 0x2ff0
        }

        // [loop 2] is_last: 1
        if (is_last)
            // [loop 2] chunk->contig_hint: dchunk->contig_hit, max_contig: 0x2ff0
            chunk->contig_hint = max_contig; /* fully scanned */
            // [loop 2] chunk->contig_hint: 0x2ff0
        else
            chunk->contig_hint = max(chunk->contig_hint,
                         max_contig);

        // [loop 2] chunk->free_size: dchunk->free_size: 0x300
        // [loop 2] i: 1, chunk->map[1]: dchunk->map[1]: 16
        chunk->free_size -= chunk->map[i];
        // [loop 2] chunk->free_size: dchunk->free_size : 0x2ff0

        // [loop 2] i: 1, chunk->map[1]: dchunk->map[1]: 16
        chunk->map[i] = -chunk->map[i];
        // [loop 2] i: 1, chunk->map[1]: dchunk->map[1]: -16

        // chunk: &pcpu_slot[11], 4k만큼 할당 받은 주소, oslot: 11
        pcpu_chunk_relocate(chunk, oslot);
// chunk: &pcpu_slot[11], 4k만큼 할당 받은 주소, oslot: 11
pcpu_chunk_relocate(chunk, oslot);

percpu.c::pcpu_chunk_relocate()

// ARM10C 20140607
// chuck: &pcpu_slot[11]: dchunk: 4K만큼 할당 받은 주소, oslot: 11
static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
{
    // chunk: dchunk
    // chunk: dchunk
    int nslot = pcpu_chunk_slot(chunk);
    // nslot: 11
    // nslot: 11

    // chunk: dchunk, pcpu_reserved_chunk: pcpu_setup_first_chunk()함수에서 할당한 schunk
    // oslot: -1, nslot: 11
    // chunk: dchunk, pcpu_reserved_chunk: pcpu_setup_first_chunk()함수에서 할당한 schunk
    // oslot: 11, nslot: 11
    if (chunk != pcpu_reserved_chunk && oslot != nslot) {
        // oslot: -1, nslot: 11
        if (oslot < nslot)
        // false
            // &chunk->list: &dchunk->list, nslot: 11, &pcpu_slot[11]
            list_move(&chunk->list, &pcpu_slot[nslot]);
            // &pcpu_slot[11](list)에 &dchunk->list 추가
        else
            list_move_tail(&chunk->list, &pcpu_slot[nslot]);
    }
}
  • 여기서 if (chunk != pcpu_reserved_chunk && oslot != nslot) 에서 부정되어야 하지만
  • 잘못 계산하여 list_move_tail()로 갔었다.
    // &chunk->list: &dchunk->list, nslot: 11, &pcpu_slot[11]
    list_move_tail(&chunk->list, &pcpu_slot[nslot]);
  • 이렇게되면 percpu.c::pcpu_alloc()에서
  • if (pcpu_populate_chunk(chunk, off, size)) 에 의해서 잘못 진행한 것을 발경하였다.
  • 이후 오류를 발견하여 if문: if (chunk != pcpu_reserved_chunk && oslot != nslot) 이 부정되므로
  • pcpu_chunk_relocate(chunk, oslot)는
  • pcpu_slot[11] 에 연결된 dchunk list의 위치를 변경하여 재배열한것으로 된다.

percpu.c::pcpu_alloc_area()

        // chuck: &pcpu_slot[11]: dchunk: 4K만큼 할당 받은 주소, oslot: 11
        pcpu_chunk_relocate(chunk, oslot);
        // pcpu_slot[11] 에 연결된 dchunk list의 위치를 변경하여 재배열함

        // [loop 2] off: 0x1d00 + 0x2000
        return off;
        // [loop 2] return off: 0x1d00 + 0x2000
retrun off가 반환됨.

percpu.c::pcpu_alloc()

off 가 0x1d00 + 0x2000한 값이므로 if(off >= 0)에 의해서 goto area_found로 이동.
            off = pcpu_alloc_area(chunk, size, align);
            // off: 0x1d00 + 0x2000
            if (off >= 0)
                goto area_found;
                // area_found 위치로 이동
        }
    }
...
area_found:
    spin_unlock_irqrestore(&pcpu_lock, flags);
    // pcpu_lock의 spin unlock 을 수행하고 flags에 저장되어있던 cpsr 복원

    /* populate, map and clear the area */
    // chuck: &pcpu_slot[11]: dchunk: 4K만큼 할당 받은 주소,
    // off: 0x1d00 + 0x2000, size: 16
    // pcpu_populate_chunk(&pcpu_slot[11], 0x3d00, 16): 0
    if (pcpu_populate_chunk(chunk, off, size)) {
        spin_lock_irqsave(&pcpu_lock, flags);
        pcpu_free_area(chunk, off);
        err = "failed to populate";
        goto fail_unlock;
    }
if (pcpu_populate_chunk(chunk, off, size))

percpu-vm.c::pcpu_populate_chunk()

define문에 의해서 percpu-vm으로 간다.
// ARM10C 20140607
// chuck: &pcpu_slot[11]: dchunk: 4K만큼 할당 받은 주소
// off: 0x1d00 + 0x2000, size: 16
static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
{
    // off: 0x1d00 + 0x2000, PFN_DOWN(0x1d00 + 0x2000): 0x3
    int page_start = PFN_DOWN(off);
    // page_start: 0x3
    // off: 0x1d00 + 0x2000, size: 16, PFN_UP(0x1d00 + 0x2000 + 16): 0x4
    int page_end = PFN_UP(off + size);
    // page_end: 0x4
    // page_start: 0x3
    int free_end = page_start, unmap_end = page_start;
    // free_end: 0x3, unmap_end: 0x3
    struct page **pages;
    unsigned long *populated;
    unsigned int cpu;
    int rs, re, rc;

    /* quick path, check whether all pages are already there */
    // page_start: 0x3
    rs = page_start;
    // rs: 0x3

    // chuck: &pcpu_slot[11]: dchunk: 4K만큼 할당 받은 주소, rs: 0x3, page_end: 0x4
    pcpu_next_pop(chunk, &rs, &re, page_end);
// chunk: &pcpu_slot[11], dchunk: 4k만큼 할당받은 주소, rs: 0x3, page_end: 0x4
pcpu_next_pop(chunk, &rs, &re, page_end);

percpu.c:: pcpu_next_pop()

// ARM10C 20140607
// chuck: &pcpu_slot[11]: dchunk: 4K만큼 할당 받은 주소, rs: 0x3, re, page_end: 0x4
static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
                     int *rs, int *re, int end)
{
    // chunk->populated: dchunk->populated[0]: 0xff, end: 0x4, *rs: 0x3
    *rs = find_next_bit(chunk->populated, end, *rs);
// chunk->populated: dchunk->populated[0]: 0xff, end: 0x4, *rs: 0x3
*rs = find_next_bit(chunk->populated, end, *rs);

bitops.h::find_next_bit()

// ARM10C 20140607
// chunk->populated: dchunk->populated[0]: 0xff, end: 0x4, *rs: 0x3
#define find_next_bit(p,sz,off)        _find_next_bit_le(p,sz,off)
// chunk->populated: dchunk->populated[0]: 0xff, end: 0x4, *rs: 0x3
_find_next_bit_le(p,sz,off)

findbit.S::_find_next_bit_le()

// ARM10C 20140607
// chunk->populated: dchunk->populated[0]: 0xff, end: 0x4, *rs: 0x3
ENTRY(_find_next_bit_le)
        teq    r1, #0
        beq    3b
        ands    ip, r2, #7
        beq    1b            @ If new byte, goto old routine
 ARM(        ldrb    r3, [r0, r2, lsr #3]    )
 THUMB(        lsr    r3, r2, #3        )
 THUMB(        ldrb    r3, [r0, r3]        )
        movs    r3, r3, lsr ip        @ shift off unused bits
        bne    .L_found
        orr    r2, r2, #7        @ if zero, then no bits here
        add    r2, r2, #1        @ align bit pointer
        b    2b            @ loop for next bit
ENDPROC(_find_next_bit_le)
L_found:
#if __LINUX_ARM_ARCH__ >= 5
        // r3: 0xF8, r0 = 0 - 0xF8: 0xFFFFFF08
        rsb    r0, r3, #0
        // r3: 0xF8 & 0xFFFFFF08: 0x8
        and    r3, r3, r0
        // r3: 0x00000008, r3: 28
        clz    r3, r3
        // r3: 31 - 28: 3
        rsb    r3, r3, #31
        // r0: r2 + 3
        add    r0, r2, r3
#else
...
#endif
        cmp    r1, r0            @ Clamp to maxbit
        movlo    r0, r1
        mov    pc, lr
*rs: 0x3

percpu.c:: pcpu_next_pop()

// ARM10C 20140607
// chuck: &pcpu_slot[11]: dchunk: 4K만큼 할당 받은 주소, rs: 0x3, re, page_end: 0x4
static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
                     int *rs, int *re, int end)
{
    // chunk->populated: dchunk->populated[0]: 0xff, end: 0x4, *rs: 0x3
    *rs = find_next_bit(chunk->populated, end, *rs);
    // *rs: 0x3

    // chunk->populated: dchunk->populated[0]: 0xff, end: 0x4, *rs: 0x3
    *re = find_next_zero_bit(chunk->populated, end, *rs + 1);
// chunk->populated: dchunk->populated[0]: 0xff, end: 0x4, *rs: 0x3
re = find_next_zero_bit(chunk->populated, end, rs + 1);

bitops.h::find_next_zero_bit()

// ARM10C 20140607
// chunk->populated: dchunk->populated[0]: 0xff, end: 0x4, *rs: 0x4
#define find_next_zero_bit(p,sz,off)    _find_next_zero_bit_le(p,sz,off)
// chunk->populated: dchunk->populated[0]: 0xff, end: 0x4, *rs: 0x4
_find_next_zero_bit_le(p,sz,off)

findbit.S::_find_next_zero_bit_le()

// ARM10C 20140607
// chunk->populated: dchunk->populated[0]: 0xff, end: 0x4, *rs: 0x4
ENTRY(_find_next_zero_bit_le)
        teq    r1, #0
        beq    3b
        ands    ip, r2, #7
        beq    1b            @ If new byte, goto old routine
 ARM(        ldrb    r3, [r0, r2, lsr #3]    )
 THUMB(        lsr    r3, r2, #3        )
 THUMB(        ldrb    r3, [r0, r3]        )
        eor    r3, r3, #0xff        @ now looking for a 1 bit
        movs    r3, r3, lsr ip        @ shift off unused bits
        bne    .L_found
        orr    r2, r2, #7        @ if zero, then no bits here
        add    r2, r2, #1        @ align bit pointer
        b    2b            @ loop for next bit
ENDPROC(_find_next_zero_bit_le)
// ARM10C 20131207
L_found:
#if __LINUX_ARM_ARCH__ >= 5
        // r3: 0xF8, r0 = 0 - 0xF8: 0xFFFFFF08
        rsb    r0, r3, #0
        // r3: 0xF8 & 0xFFFFFF08: 0x8
        and    r3, r3, r0
        // r3: 0x00000008, r3: 28
        clz    r3, r3
        // r3: 31 - 28: 3
        rsb    r3, r3, #31
        // r0: r2 + 3
        add    r0, r2, r3
#else
...
#endif
        cmp    r1, r0            @ Clamp to maxbit
        movlo    r0, r1
        mov    pc, lr
// chunk->populated: dchunk->populated[0]: 0xff, end: 0x4, *rs: 0x3
return 0x4

percpu-vm.d::pcpu_populate_chunk()

static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
{
...
    // chuck: &pcpu_slot[11]: dchunk: 4K만큼 할당 받은 주소, rs: 0x3, page_end: 0x4
    pcpu_next_pop(chunk, &rs, &re, page_end);
    // rs: 3, re: 4

    // rs: 3, page_start: 0x3, re: 4, page_end: 0x4
    if (rs == page_start && re == page_end)
        goto clear;
        // clear 심볼로 점프
clear:
    // nr_cpu_ids: 4, cpu_possible_mask: cpu_possible_bits[1]
    // cpumask_next((-1), cpu_possible_bits[1]): 0
    for_each_possible_cpu(cpu)
    // for ((cpu) = -1; (cpu) = cpumask_next((cpu), (cpu_possible_mask)), (cpu) < nr_cpu_ids; )
        // chuck: &pcpu_slot[11]: dchunk: 4K만큼 할당 받은 주소, cpu: 0
        // pcpu_chunk_addr(&pcpu_slot[11], 0, 0): 128K 만큼 물리주소 0x5FFFFFFF 근처에 할당받은 주소
        // off: 0x3d00, size: 16
        memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
        // pcpu 0~3의 128K 물리주소에서 offset: 0x3d00 만큼 떨어진 16 bytes 를 0으로 set

    return 0;
    // return 0
리턴 값
return 0

percpu.c::pcpu_alloc()

static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
{
..
area_found:
    // pcpu_populate_chunk(&pcpu_slot[11], 0x3d00, 16): 0
    if (pcpu_populate_chunk(chunk, off, size)) {
    // pcpu_populate_chunk:0 으로 부정됨
        spin_lock_irqsave(&pcpu_lock, flags);
        pcpu_free_area(chunk, off);
        err = "failed to populate";
        goto fail_unlock;
    }

    mutex_unlock(&pcpu_alloc_mutex);
    // pcpu_alloc_mutex의 mutex unlock을 수행

    /* return address relative to base address */
    // chunk->base_addr: dchunk->base_addr: 128K 만큼 물리주소 0x5FFFFFFF 근처에 할당받은 주소
    // off: 0x3d00
    // __addr_to_pcpu_ptr(128K 만큼 물리주소 0x5FFFFFFF 근처에 할당받은 주소+0x3d00): 0xc0502d00
    ptr = __addr_to_pcpu_ptr(chunk->base_addr + off);
// chunk->base_addr: dchunk->base_addr: 128K 만큼 물리주소 0x5FFFFFFF 근처에 할당받은 주소
// off: 0x3d00
// __addr_to_pcpu_ptr(128K 만큼 물리주소 0x5FFFFFFF 근처에 할당받은 주소+0x3d00): 0xc0502d00
ptr = __addr_to_pcpu_ptr(chunk->base_addr + off);

percpu.c::__addr_to_pcpu_ptr()

// ARM10C 20140607
// chunk->base_addr: dchunk->base_addr: 128K 만큼 물리주소 0x5FFFFFFF 근처에 할당받은 주소 + 0x3d00
//
// pcpu_base_addr: 128K 만큼 물리주소 0x5FFFFFFF 근처에 할당받은 주소
#define __addr_to_pcpu_ptr(addr)                    \
    (void __percpu *)((unsigned long)(addr) -            \
              (unsigned long)pcpu_base_addr    +        \
              (unsigned long)__per_cpu_start)
#endif
리턴값 0xc05002d00

percpu.c::pcpu_alloc()

static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
{
..
area_found:
...
    ptr = __addr_to_pcpu_ptr(chunk->base_addr + off);
    // ptr: 0xc0502d00

    // ptr: 0xc0502d00, size: 16
    kmemleak_alloc_percpu(ptr, size); // null function

    // ptr: 0xc0502d00
    return ptr;
    // return 0xc0502d00
리턴값 ptr : 0xc0502d00

percpu.c::__alloc_percpu()

// ARM10C 20140531
// __alloc_percpu(16, 8)
void __percpu *__alloc_percpu(size_t size, size_t align)
{
    // size: 16, align: 8
    return pcpu_alloc(size, align, false);
    // return 0xc0502d00
}
리턴값
return pcpu_alloc(size, align, false);
// return 0xc0502d00

slub.c::alloc_kmem_cache_cpus()

s->clu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu), 2 sizeof(void ));
의 값을 구했다. 이후 동작을 계속 분석해 보자.
static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
{
    // s->cpu_slab: (&boot_kmem_cache_node)->cpu_slab
    // sizeof(struct kmem_cache_cpu): 16 bytes, sizeof(void *): 8 bytes
    // __alloc_percpu(16, 8): 0xc0502d00
    s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
                     2 * sizeof(void *));
    // s->cpu_slab: (&boot_kmem_cache_node)->cpu_slab: 0xc0502d00

    // s->cpu_slab: (&boot_kmem_cache_node)->cpu_slab: 0xc0502d00
    if (!s->cpu_slab)
        return 0;

    // s: &boot_kmem_cache_node
    init_kmem_cache_cpus(s);
// s: &boot_kmem_cache_node
init_kmem_cache_cpus(s);

slub.c::init_kmem_cache_cpus(s)

// ARM10C 20140607
// s: &boot_kmem_cache_node
static void init_kmem_cache_cpus(struct kmem_cache *s)
{
    int cpu;

    // nr_cpu_ids: 4, cpu_possible_mask: cpu_possible_bits[1]
    // cpumask_next((-1), cpu_possible_bits[1]): 0
    for_each_possible_cpu(cpu)
    // for ((cpu) = -1; (cpu) = cpumask_next((cpu), (cpu_possible_mask)), (cpu) < nr_cpu_ids; )
        // s->cpu_slab: (&boot_kmem_cache_node)->cpu_slab: 0xc0502d00, cpu: 0
        // per_cpu_ptr((&boot_kmem_cache_node)->cpu_slab, 0):
        // (&boot_kmem_cache_node)->cpu_slab + (pcpu_unit_offsets[0] + __per_cpu_start에서의 pcpu_base_addr의 옵셋)
        // init_tid(0): 0
        per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
        // ((&boot_kmem_cache_node)->cpu_slab + (pcpu_unit_offsets[0] + __per_cpu_start에서의pcpu_base_addr의 옵셋))->tid: 0

        // 할당받은 pcpu 들의 16 byte 공간에 각 cpu에 사용하는 kmem_cache_cpu의 tid 맵버를 설정
}

slub.c::alloc_kmem_cache_cpus()

static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
{
...
    // s: &boot_kmem_cache_node
    init_kmem_cache_cpus(s);
    // 할당받은 pcpu 들의 16 byte 공간에 각 cpu에 사용하는 kmem_cache_cpu의 tid 맵버를 설정

    return 1;
}

slub.c::kmem_cache_open()

static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
{
...
    // s: &boot_kmem_cache_node, alloc_kmem_cache_cpus(&boot_kmem_cache_node): 1
    if (alloc_kmem_cache_cpus(s))
        // alloc_kmem_cache_cpus 한일:
        // 할당받은 pcpu 들의 16 byte 공간 (&boot_kmem_cache_node)->cpu_slab 에
        // 각 cpu에 사용하는 kmem_cache_cpu의 tid 맵버를 설정
        return 0;
        // return 0
...        
}
리턴값 0
  • kmem_cache_open 가 한일:
    • // boot_kmem_cache_node.flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
    • // boot_kmem_cache_node.reserved: 0
    • // boot_kmem_cache_node.min_partial: 5
    • // boot_kmem_cache_node.cpu_partial: 30
    • //
    • // migratetype이 MIGRATE_UNMOVABLE인 page 할당 받음
    • // page 맴버를 셋팅함
    • // page->slab_cache: &boot_kmem_cache_node 주소를 set
    • // page->flags에 7 (PG_slab) bit를 set
    • // page->freelist: UNMOVABLE인 page 의 object의 시작 virtual address + 64
    • // page->inuse: 1, page->frozen: 0 page 맴버를 셋팅함
    • // slab 의 objects 들의 freepointer를 맵핑함
    • // 할당받은 slab object를 kmem_cache_node 로 사용하고 kmem_cache_node의 멤버 필드를 초기화함
    • // kmem_cache_node->nr_partial: 1
    • // kmem_cache_node->list_lock: spinlock 초기화 수행
    • // kmem_cache_node->slabs: 1, kmem_cache_node->total_objects: 64 로 세팀함
    • // kmem_cache_node->full: 리스트 초기화
    • // kmem_cache_node의 partial 맴버에 현재 page의 lru 리스트를 추가함
    • //
    • // kmem_cache_node 가 boot_kmem_cache_node.node[0]에 할당됨
    • //
    • // 할당받은 pcpu 들의 16 byte 공간 (&boot_kmem_cache_node)->cpu_slab 에
    • // 각 cpu에 사용하는 kmem_cache_cpu의 tid 맵버를 설정

slub.c::__kmem_cache_create()

int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
{
    int err;

    // s: &boot_kmem_cache_node, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
    // kmem_cache_open(&boot_kmem_cache_node, 0x00002000UL): 0
    err = kmem_cache_open(s, flags);
    // err: 0

    // err: 0
    if (err)
        return err;

    /* Mutex is not taken during early boot */
    // slab_state: DOWN: 0, UP: 4
    if (slab_state <= UP)
        return 0;
        // return 0
}
리턴값 0
  • __kmem_cache_create 가 한일:
    • // boot_kmem_cache_node.flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
    • // boot_kmem_cache_node.reserved: 0
    • // boot_kmem_cache_node.min_partial: 5
    • // boot_kmem_cache_node.cpu_partial: 30
    • //
    • // migratetype이 MIGRATE_UNMOVABLE인 page 할당 받음
    • // page 맴버를 셋팅함
    • // page->slab_cache: &boot_kmem_cache_node 주소를 set
    • // page->flags에 7 (PG_slab) bit를 set
    • // page->freelist: UNMOVABLE인 page 의 object의 시작 virtual address + 64
    • // page->inuse: 1, page->frozen: 0 page 맴버를 셋팅함
    • // slab 의 objects 들의 freepointer를 맵핑함
    • // 할당받은 slab object를 kmem_cache_node 로 사용하고 kmem_cache_node의 멤버 필드를 초기화함
    • // kmem_cache_node->nr_partial: 1
    • // kmem_cache_node->list_lock: spinlock 초기화 수행
    • // kmem_cache_node->slabs: 1, kmem_cache_node->total_objects: 64 로 세팀함
    • // kmem_cache_node->full: 리스트 초기화
    • // kmem_cache_node의 partial 맴버에 현재 page의 lru 리스트를 추가함
    • //
    • // 할당받은 pcpu 들의 16 byte 공간 (&boot_kmem_cache_node)->cpu_slab 에
    • // 각 cpu에 사용하는 kmem_cache_cpu의 tid 맵버를 설정

slab_common.c::create_boot_create()

void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
        unsigned long flags)
{
...
    // s: &boot_kmem_cache_node, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
    // __kmem_cache_create(&boot_kmem_cache_node, 0x00002000UL): 0
    err = __kmem_cache_create(s, flags);
    // err: 0

    // err: 0
    if (err)
        panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n",
                    name, size, err);

    // s->refcount: boot_kmem_cache_node.refcount
    s->refcount = -1;    /* Exempt from merging for now */
    // s->refcount: boot_kmem_cache_node.refcount: -1
}
  • create_boot_cache의 kmem_cache_node가 한일:
    • // boot_kmem_cache_node.flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
    • // boot_kmem_cache_node.reserved: 0
    • // boot_kmem_cache_node.min_partial: 5
    • // boot_kmem_cache_node.cpu_partial: 30
    • // boot_kmem_cache_node.refcount: -1
    • //
    • // migratetype이 MIGRATE_UNMOVABLE인 page 할당 받음
    • // page 맴버를 셋팅함
    • // page->slab_cache: &boot_kmem_cache_node 주소를 set
    • // page->flags에 7 (PG_slab) bit를 set
    • // page->freelist: UNMOVABLE인 page 의 object의 시작 virtual address + 64
    • // page->inuse: 1, page->frozen: 0 page 맴버를 셋팅함
    • // slab 의 objects 들의 freepointer를 맵핑함
    • // 할당받은 slab object를 kmem_cache_node 로 사용하고 kmem_cache_node의 멤버 필드를 초기화함
    • // kmem_cache_node->nr_partial: 1
    • // kmem_cache_node->list_lock: spinlock 초기화 수행
    • // kmem_cache_node->slabs: 1, kmem_cache_node->total_objects: 64 로 세팀함
    • // kmem_cache_node->full: 리스트 초기화
    • // kmem_cache_node의 partial 맴버에 현재 page의 lru 리스트를 추가함
    • //
    • // kmem_cache_node: &boot_kmem_cache_node 임
    • //
    • // 할당받은 pcpu 들의 16 byte 공간 (&boot_kmem_cache_node)->cpu_slab 에
    • // 각 cpu에 사용하는 kmem_cache_cpu의 tid 맵버를 설정

slub.c::kmem_cache_init()

void __init kmem_cache_init(void)
{
...
    // &boot_kmem_cache_node, "kmem_cache_node", sizeof(struct kmem_cache_node): 44 byte,
    // SLAB_HWCACHE_ALIGN: 0x00002000UL
    // create_boot_cache(&boot_kmem_cache_node, "kmem_cache_node", 44, 0x00002000UL)
    create_boot_cache(kmem_cache_node, "kmem_cache_node",
        sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN);

    register_hotmemory_notifier(&slab_memory_callback_nb); // null function

    /* Able to allocate the per node structures */
    // slab_state: DOWN
    slab_state = PARTIAL;
    // slab_state: PARTIAL

    // slab_state 의미:
    // slab을 초기화한 단계를 나타냄, PARTIAL은 kmem_cache_node 만 사용이 가능함

// 2014/06/07 종료
slab_state는 slab이 어느정도 활성화 되었는지를 나타낸다.
지금까지 우리는 // slab_state: DOWN 에서 분석을 했고,
이제는 slab_state = PARTIAL; 로 바뀌어 분석을 한다.
여기서 // slab_state의 의미는 slab을 초기화한 단계를 의미한다.
slab_stat = PARTIAL은 kmem_cache_node만 사용이 가능을 의미한다.
계속해서 slab_stat = PARTIAL로 해서 create_boot_cache를 다시 분석한다.
    // kmem_cache: &boot_kmem_cache,
    // offsetof(struct kmem_cache, node): 128, nr_node_ids: 1
    // sizeof(struct kmem_cache_node *): 4 SLAB_HWCACHE_ALIGN: 0x00002000UL
    // create_boot_cache(&boot_kmem_cache, "kmem_cache", 132, 0x00002000UL)
    create_boot_cache(kmem_cache, "kmem_cache",
            offsetof(struct kmem_cache, node) +
                nr_node_ids * sizeof(struct kmem_cache_node *),
               SLAB_HWCACHE_ALIGN);
// create_boot_cache(&boot_kmem_cache, “kmem_cache”, 132, 0x00002000UL)
create_boot_cache(kmem_cache, “kmem_cache”,
offsetof(struct kmem_cache, node) +
nr_node_ids sizeof(struct kmem_cache_node ),
SLAB_HWCACHE_ALIGN);

slab_common.c::create_boot_cache()

// create_boot_cache(&boot_kmem_cache, “kmem_cache”, 132, 0x00002000UL)
//
// offsetof(struct kmem_cache, node) : (&boot_kmem_cache)->node : 128Byte
//
// sizeof(struct kmem_cache): 132Byte
// sizeof(struct kmem_cache_node): 128Byte
// sizeof(struct kobject): 52Byte
void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
        unsigned long flags)
{
    int err;

    // s->name: boot_kmem_cache_node.name: NULL
    s->name = name;
    // s->name: boot_kmem_cache_node.name: "kmem_cache_node"

    // s->object_size: boot_kmem_cache_node.object_size: 0
    s->size = s->object_size = size;
    // s->object_size: boot_kmem_cache_node.object_size: 44

    // s->align: boot_kmem_cache_node.align: 0
    s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
    // s->align: boot_kmem_cache_node.align: 64

    // s: &boot_kmem_cache_node, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
    // __kmem_cache_create(&boot_kmem_cache_node, 0x00002000UL): 0
    err = __kmem_cache_create(s, flags);

cscope 태크가 깨지는 것을 막는 방법.

KBUILD_SRC=’pwd’ 로 해주면, cscope가 절대경로로 지정되어 깨지는 것을 막을 수 있다.

git log 1st

Fast-forward
 include/linux/list.h   |  17 +++++++-
 include/linux/percpu.h |   1 +
 lib/string.c           |   1 +
 mm/percpu-vm.c         |   5 +++
 mm/percpu.c            | 116 +++++++++++++++++++++++++++++++++++++++----------

git log 2nd

5fa86bf..06ca064 master -> origin/master
Updating 5fa86bf..06ca064
Fast-forward
arch/arm/include/asm/bitops.h | 6 
arch/arm/lib/findbit.S | 4 

include/asm-generic/percpu.h | 14 +
include/linux/bitops.h | 2 +
include/linux/compiler-gcc.h | 1 +
include/linux/cpumask.h | 4 +-
include/linux/kernel.h | 1 +
include/linux/kmemleak.h | 1 +
include/linux/kobject.h | 4 +-
include/linux/kref.h | 1 +
include/linux/list.h | 13 ——
include/linux/memory.h | 4 +-
include/linux/nodemask.h | 1 +
include/linux/notifier.h | 2 +
include/linux/numa.h | 1 +
include/linux/percpu.h | 15 ++-
include/linux/pfn.h | 2 +
include/linux/slab.h | 1 +
include/linux/slub_def.h | 10 
-
include/linux/types.h | 1 +
include/uapi/asm-generic/posix_types.h | 2 +
kernel/cpu.c | 2 +
mm/percpu-vm.c | 27 +++-
mm/percpu.c | 42 +++
mm/slab_common.c | 32 +-
mm/slub.c | 112 ++++-
26 files changed, 273 insertions(+), 32 deletions(-)

댓글 없음:

댓글 쓰기