2015년 9월 12일 토요일

[Linux Kernel] 116주차(2015.09.12) setup_per_cpu_pageset()

ARM10C : 116 주차
일시 : 2015.09.12 (116 주차 스터디 진행)
모임명 : NAVER_개발자커뮤니티지원_10차ARM-C
장소 : 토즈 타워점
장소지원 : NAVER 개발자 커뮤니티 지원 프로그램
참여인원 : 3명

2주간 각자 휴식을 가지고 다시 커널을 분석합니다.

116 주차 진도

  • 8월 8일 진도로 다시 복습을 했습니다.
  • page_cgroup_init(); // null function
  • debug_objects_mem_init(); // null function
  • kmemleak_init(); // null function
  • 그리고 나서 setup_per_cpu_pageset(); 부터 시작합니다. 
  • start_kernel 1 ~/kernel/iamroot/linux-stable/init/main.c
    • setup_per_cpu_pageset();

main.c::start_kernel()

// ARM10C 20130824
// asmlinkage의 의미
// http://www.spinics.net/lists/arm-kernel/msg87677.html
asmlinkage void __init start_kernel(void)
{
    char * command_line;
    extern const struct kernel_param __start___param[], __stop___param[];
    // ATAG,DTB 정보로 사용
...

// 2015/08/08 시작

    page_cgroup_init(); // null function
    debug_objects_mem_init(); // null function
    kmemleak_init(); // null function

// 2015/08/22 종료

    setup_per_cpu_pageset();

page_alloc.c::setup_per_cpu_pageset()

  • call: start_kernel() 1 ~/kernel/iamroot/linux-stable/init/main.c
    • setup_per_cpu_pageset();
void __init setup_per_cpu_pageset(void)
{
    struct zone *zone;

    for_each_populated_zone(zone)
        setup_zone_pageset(zone);
}

page_alloc.c::setup_zone_pageset()

  • call: start_kernel() 1 ~/kernel/iamroot/linux-stable/init/main.c
    • setup_per_cpu_pageset();
      • setup_zone_pageset();
static void __meminit setup_zone_pageset(struct zone *zone)
{
    int cpu;
    zone->pageset = alloc_percpu(struct per_cpu_pageset);
    // 
    for_each_possible_cpu(cpu)
        zone_pageset_init(zone, cpu);
}
kmem_cache#26-o0 (512B)에서 한 Slub을 per_cpu로 나누어 사용하는데 이 할당자가 alloc_percpu()함수이다. 여기서는 struct per_set_cpu 구조체가 72Byte이므로 512B중 72B를 할당 받는다.
  • for_each_possible_cpu(cpu)
#define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask)
// #define for_each_cpu(cpu, cpu_online_mask)
//  for ((cpu) = -1; (cpu) = cpumask_next((cpu), (cpu_online_mask)), (cpu) < nr_cpu_ids; )
여기서 nr_cpu_ids가 4로 루프가 4번 반복한다.
  • zone_pageset_init()
static void __meminit zone_pageset_init(struct zone *zone, int cpu)
{
    struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu);

    pageset_init(pcp);
    pageset_set_high_and_batch(zone, pcp);
}
zone: &(&...
#define per_cpu_ptr(ptr, cpu)   SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
SHIFT_PERCPU_PTR()
#define SHIFT_PERCPU_PTR(__p, __offset) ({              \
    __verify_pcpu_ptr((__p));                   \
    RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \
})
#endif
  • pageset_init(pcp)
// ARM10C 20140308
// p: (&boot_pageset + __per_cpu_offset[0])
static void pageset_init(struct per_cpu_pageset *p)
{
    struct per_cpu_pages *pcp;
    int migratetype;

    // p: (&boot_pageset + __per_cpu_offset[0]), sizeof(*p): 66
    memset(p, 0, sizeof(*p));

    // p->pcp: [pcpu0] boot_pageset->pcp
    pcp = &p->pcp;
    // pcp: [pcpu0] boot_pageset->pcp

    pcp->count = 0;
    // [pcpu0] boot_pageset->pcp->count: 0

    // MIGRATE_PCPTYPES: 3
    for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++)
        // [pcpu0] boot_pageset->pcp->lists[0..2]
        INIT_LIST_HEAD(&pcp->lists[migratetype]);
}
여기서 pcp에 해당하는 메모리를 할당하고 참조 카운터를 0으로 초기화하며, 속성을 초기화했다.
  • pageset_set_high_and_batch(zone, pcp)
static void __meminit pageset_set_high_and_batch(struct zone *zone,
        struct per_cpu_pageset *pcp)
{
    if (percpu_pagelist_fraction)
        pageset_set_high(pcp,
            (zone->managed_pages /
                percpu_pagelist_fraction));
    else
        pageset_set_batch(pcp, zone_batchsize(zone));
}
  • pageset_set_batch()을 호출
// op->pcp: [pcpu0] (&(&contig_page_data)->node_zones[0]->pageset, 186,31
static void pageset_set_batch(struct per_cpu_pageset *p, unsigned long batch)
{
    // p->pcp: [pcpu0] boot_pageset->pcp, batch: 0, 1
    pageset_update(&p->pcp, 6 * batch, max(1UL, 1 * batch));
}
따라서 pageset_set_batch에서 한일을 정리하면. batch로 속성으로 시작은 1부터 최대 (high) 186까지 31배치구분을 하도록 초기화한다. 그리고 list[0...2]까지 각 core마다 loop를 실행해서 zone_page를 초기화 해준다.

main.c::start_kernel()

    setup_per_cpu_pageset();
    numa_policy_init();
    // late_time_init(): NULL
    if (late_time_init)
        late_time_init();
    sched_clock_init();
    // shced_clock_running: 0->1 로 바꿈
    calibrate_delay();

calibrate.c::calibrate_delay()

  • called: start_kernel()
    • calibrate_delay()
void calibrate_delay(void)
{
    unsigned long lpj;
    static bool printed;
    // printed: 0

    // smp_processor_id: 0
    int this_cpu = smp_processor_id();
    // this_cpu: 0

    // preset_lpj: 0, lpj_fine: 
    if (per_cpu(cpu_loops_per_jiffy, this_cpu)) {
        lpj = per_cpu(cpu_loops_per_jiffy, this_cpu);
        if (!printed)
            pr_info("Calibrating delay loop (skipped) "
                "already calibrated this CPU");
    } else if (preset_lpj) {
        lpj = preset_lpj;
        if (!printed)
            pr_info("Calibrating delay loop (skipped) "
                "preset value.. ");
    } else if ((!printed) && lpj_fine) {
        lpj = lpj_fine;
        pr_info("Calibrating delay loop (skipped), "
            "value calculated using timer frequency.. ");
    } else if ((lpj = calibrate_delay_is_known())) {
        ;
    } else if ((lpj = calibrate_delay_direct()) != 0) {
        if (!printed)
            pr_info("Calibrating delay using timer "
                "specific routine.. ");
    } else {
        // printed: 0
        if (!printed)
            pr_info("Calibrating delay loop... ");
        lpj = calibrate_delay_converge();
    }
    per_cpu(cpu_loops_per_jiffy, this_cpu) = lpj;
    if (!printed)
        pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
            lpj/(500000/HZ),
            (lpj/(5000/HZ)) % 100, lpj);

    loops_per_jiffy = lpj;
    printed = true;
}
  • calibrate_delay_converge();
static unsigned long calibrate_delay_converge(void)
{
    /* First stage - slowly accelerate to find initial bounds */
    unsigned long lpj, lpj_base, ticks, loopadd, loopadd_base, chop_limit;
    int trials = 0, band = 0, trial_in_band = 0;
    // trials: 0, band: 0, trial_in_bank: 0

    lpj = (1<<12);
    // lpj: 0x1000

    /* wait for "start of" clock tick */
    ticks = jiffies;
    while (ticks == jiffies)
        ; /* nothing */
    /* Go .. */
    ticks = jiffies;
    do {
        if (++trial_in_band == (1<<band)) {
            ++band;
            trial_in_band = 0;
        }
        __delay(lpj * band);
        trials += band;
    } while (ticks == jiffies);
    /*
     * We overshot, so retreat to a clear underestimate. Then estimate
     * the largest likely undershoot. This defines our chop bounds.
     */
    trials -= band;
    loopadd_base = lpj * band;
    lpj_base = lpj * trials;

recalibrate:
    lpj = lpj_base;
    loopadd = loopadd_base;

    /*
     * Do a binary approximation to get lpj set to
     * equal one clock (up to LPS_PREC bits)
     */
    chop_limit = lpj >> LPS_PREC;
    while (loopadd > chop_limit) {
        lpj += loopadd;
        ticks = jiffies;
        while (ticks == jiffies)
            ; /* nothing */
        ticks = jiffies;
        __delay(lpj);
        if (jiffies != ticks)   /* longer than 1 tick */
            lpj -= loopadd;
        loopadd >>= 1;
    }
    /*
     * If we incremented every single time possible, presume we've
     * massively underestimated initially, and retry with a higher
     * start, and larger range. (Only seen on x86_64, due to SMIs)
     */
    if (lpj + loopadd * 2 == lpj_base + loopadd_base * 2) {
        lpj_base = lpj;
        loopadd_base <<= 2;
        goto recalibrate;
    }

    return lpj;
}

main.c::start_kernel()

    calibrate_delay();
    pidmap_init();

pid.c::pidmap_init()

  • start_kernel()
    • pidmap_init()
void __init pidmap_init(void)
{
    /* Veryify no one has done anything silly */
    // PID_MAX_LIMIT: 0x8000
    // PIDNS_HASH_ADDING: 080000000
    BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);

    /* bump default and minimum pid_max based on number of cpus */
    // pid_max_max: 0x8000, pid_max: 0x8000, PIDS_PER_CPU_DEFAULT: 1024
    // num_possible_cpus: 4, max_t(int, 0x8000, 4 * 1024): 0x8000
    pid_max = min(pid_max_max, max_t(int, pid_max,
                PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
    // pid_max: 0x8000

    // pid_max_min: 301, RESERVED_PIDS +1: 300 +1 : 301
    // max_t(int, pid_max_min: 301, PIDS_PER_CPU_MIN: 8 * 4)
    pid_max_min = max_t(int, pid_max_min,
                PIDS_PER_CPU_MIN * num_possible_cpus());
    // pid_max_min: 301

    pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
    // pid_max: default: 0x8000 minimum: 0x12d\n

    // PAGE_SIZE: 0x1000, GFP_KERNEL 0xD0
    init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
    /* Reserve PID 0. We never call free_pidmap(0) */
    set_bit(0, init_pid_ns.pidmap[0].page);
    atomic_dec(&init_pid_ns.pidmap[0].nr_free);

    init_pid_ns.pid_cachep = KMEM_CACHE(pid,
            SLAB_HWCACHE_ALIGN | SLAB_PANIC);
}

main.c::start_kernel()

...
    setup_per_cpu_pageset();
    numa_policy_init();
    if (late_time_init)
        late_time_init();
    sched_clock_init();
    calibrate_delay();
    pidmap_init();

log

  • 1st log
76a328c..274a47c  master     -> origin/master
Updating 76a328c..274a47c
Fast-forward
include/linux/cpumask.h |   3 +++
include/linux/mmzone.h  |   7 +++++++
include/linux/percpu.h  |  17 +++++++++++++++
init/main.c             |   3 +++
kernel/bounds.c         |   1 +
mm/bootmem.c            |   1 +
mm/mmzone.c             |   2 ++
mm/page_alloc.c         | 166 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
mm/percpu.c             |   6 ++++--
9 files changed, 175 insertions(+), 31 deletions(-)
  • 2nd log
274a47c..18884c1  master     -> origin/master
Updating 274a47c..18884c1
Fast-forward
arch/arm/include/asm/atomic.h |  3 +++
arch/arm/include/asm/page.h   |  1 +
arch/arm/lib/delay.c          |  7 +++++++
include/linux/cpumask.h       |  1 +
include/linux/gfp.h           |  1 +
include/linux/kernel.h        |  1 +
include/linux/mempolicy.h     |  1 +
include/linux/percpu-defs.h   |  1 +
include/linux/pid.h           |  7 +++++++
include/linux/pid_namespace.h |  6 ++++++
include/linux/slab.h          |  7 +++++++
include/linux/slub_def.h      |  1 +
include/linux/smp.h           |  1 +
include/linux/threads.h       | 11 +++++++++++
include/linux/types.h         |  4 +++-
init/calibrate.c              | 47 +++++++++++++++++++++++++++++++++++++++++++++++
init/main.c                   | 14 +++++++++++++-
kernel/pid.c                  | 40 ++++++++++++++++++++++++++++++++++++++++
kernel/sched/clock.c          |  6 +++++-
mm/slab_common.c              |  7 +++++++
20 files changed, 164 insertions(+), 3 deletions(-)

댓글 없음:

댓글 쓰기