2015년 9월 19일 토요일

[Linux Kernel] 117주차(2015.09.19) anon_vma_init(), cred_init(), fork_init(), proc_cache_init()

ARM10C : 117 주차
일시 : 2015.09.19 (117 주차 스터디 진행)
모임명 : NAVER_개발자커뮤니티지원_10차ARM-C
장소 : 토즈 타워점
장소지원 : NAVER 개발자 커뮤니티 지원 프로그램
참여인원 : 3명

cgroup 초기화 분석을 마치고, 2주간 각자 휴식을 쉬했습니다. 다시 리눅스 커널을 분석합니다.

117 주차 진도

  • 9월 19일 진도로 다시 복습을 했습니다.
  • setup_per_cpu_pageset();
    • // per cpu가 사용하는 pageset의 각각의 zone 맴버값 초기화 수행
  • numa_policy_init(); // null function
    • // late_time_init: NULL
  • sched_clock_init();
    • // sched_clock_running 값을 1 로 초기화 수행
  • calibrate_delay();
    • // BogoMIPS값을 결정하기위한 계산을 수행하고 결과를 출력함
  • pidmap_init();
    • // pidmap 을 사용하기 위한 초기화 수행

main.c::start_kernel()

// ARM10C 20130824
// asmlinkage의 의미
// http://www.spinics.net/lists/arm-kernel/msg87677.html
asmlinkage void __init start_kernel(void)
{
    char * command_line;
    extern const struct kernel_param __start___param[], __stop___param[];
    // ATAG,DTB 정보로 사용
...

// 2015/08/08 시작

    page_cgroup_init(); // null function
    debug_objects_mem_init(); // null function
    kmemleak_init(); // null function

// 2015/08/22 종료
// 2015/09/12 시작

    setup_per_cpu_pageset();
    // per cpu가 사용하는 pageset의 각각의 zone 맴버값 초기화 수행

    numa_policy_init(); // null function

    // late_time_init: NULL
    if (late_otime_init)
        late_time_init();

    sched_clock_init();
    // sched_clock_running 값을 1 로 초기화 수행

    calibrate_delay();
    // BogoMIPS값을 결정하기위한 계산을 수행하고 결과를 출력함

    pidmap_init();
    // pidmap 을 사용하기 위한 초기화 수행

// 2015/09/12 종료
// 2015/09/19 시작

    anon_vma_init();

rmap.c::anon_vma_init()

  • call: start_kernel()
    • anon_vma_init()
// ARM10C 20150919
void __init anon_vma_init(void)
{
    // sizeof(struct anon_vma): 40 bytes, SLAB_DESTROY_BY_RCU: 0x00080000UL, SLAB_PANIC: 0x00040000UL
    // kmem_cache_create("anon_vma", 40, 0, 0x000C0000, anon_vma_ctor): kmem_cache#18
    anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
            0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
    // anon_vma_cachep: kmem_cache#18

    // SLAB_PANIC: 0x00040000UL
    // KMEM_CACHE(anon_vma_chain, 0x00040000):
    // kmem_cache_create("anon_vma_chain", sizeof(struct anon_vma_chain), __alignof__(struct anon_vma_chain), (0x00040000), NULL): kmem_cache#17
    anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
    // anon_vma_chain_cachep: kmem_cache#17
}
  • anon_vma_cachep 할당
    • sizeof(struct anon_vma): 40Byte
      • #define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */
    • #define SLAB_PANIC 0x00040000UL /* Panic if kmem_cache_create() fails */
    • kmem_cache_create()
      • "anon_vma", sizeof(struct anon_vma): 40Byte, 0, 0x000C0000, anon_vma_ctor
    • kmem_cache_crate()에 의해서 #18번이 새로 할당된다.
    • 참조로 #22 번에서 많으 것을 할당하였고, 이번(#18)에 anon_vma_cache를 할당한다.
      • #21: idr_layer_cache
    • #20: radix_tree_node
    • #19: pid
    • #18: anon_vma_cache
kmem_cache_create(const char *name, size_t size, size_t align,
          unsigned long flags, void (*ctor)(void *))
{
o   // name: "anon_vma", size: 40, align: 0, flags: 0x000C0000, ctor: anon_vma_ctor
    // kmem_cache_create_memcg(NULL, "anon_vma", 40, 0, 0x000C0000, anon_vma_ctor): kmem_cache#18
    return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL);
    // return kmem_cache#18
}
EXPORT_SYMBOL(kmem_cache_create);
  • anon_vma_chain_cachep 할당
    • struct list_head anon_vma_chain; /* Serialized by mmap_sem & page_table_lock
    • #define SLAB_PANIC 0x00040000UL /* Panic if kmem_cache_create() fails */ 
    • KMEM_CACHE(anon_vma_chain, 0x00040000):
// kmem_cache_create("anon_vma_chain", sizeof(struct anon_vma_chain), __alignof__(struct anon_vma_chain), (0x00040000), NULL)
#define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\
        sizeof(struct __struct), __alignof__(struct __struct),\
        (__flags), NULL)
  • name: "anon_vma_chain", size: 32, align: 4, flags: 0x00040000, ctor: NULL
    • kmem_cache_create_memcg(NULL, "anon_vma_chain", 32, 4, 0x00040000, NULL): kmem_cache#17 b - return kmem_cache#17
kmem_cache_create(const char *name, size_t size, size_t align,
          unsigned long flags, void (*ctor)(void *))
{
    // "anon_vma_chain", sizeof(struct anon_vma_chain): 32Byte, 4, 0x00040000, NULL
    return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL);
    // return kmem_cache#17
}
EXPORT_SYMBOL(kmem_cache_create);
  • anon_vma_init() 정리
    • anon_vma를 사용하기 위한 kmem_cache#18을 할당했다.
    • anon_vma_chain을 사용하기 위한 kmem_cache#18을 할당했다.

main.c::start_kernel()

// ARM10C 20130824
asmlinkage void __init start_kernel(void)
{
    char * command_line;
    extern const struct kernel_param __start___param[], __stop___param[];
    // ATAG,DTB 정보로 사용
...

// 2015/09/19 시작

    anon_vma_init();
#ifdef CONFIG_X86 // CONFIG_X86=n
    if (efi_enabled(EFI_RUNTIME_SERVICES))
        efi_enter_virtual_mode();
#endif
    thread_info_cache_init();

fork.c::thread_info_cache_init()

  • call: start_kernel()
    • thread_info_cache_init()
# if THREAD_SIZE >= PAGE_SIZE
void __init __weak thread_info_cache_init(void)
{
}
  • THREAD_SIZE 8192 (0x2000)
  • PAGE_SIZE 4096 (0x1000)
  • 여기서 __weak인데
    • THREAD_SIZE: 8192 >= PAGE_SIZE : 4096 이므로 weak로 가서 NULL Function이다.
    • 아래 선언된 부분은 THREAD_SIZE가 PAGE_SIZE보다 작은 경우에
    • 메모리를 kmem_cache로 할당 받는 것을 말한다.
    • 우리 분석 시스템에서는 적용하지 않는다.
    • 따라서 null function이다. 

main.c::start_kernel()

// ARM10C 20130824
asmlinkage void __init start_kernel(void)
{
    char * command_line;
    extern const struct kernel_param __start___param[], __stop___param[];
    // ATAG,DTB 정보로 사용
...

// 2015/09/19 시작

    anon_vma_init();
#ifdef CONFIG_X86 // CONFIG_X86=n
    if (efi_enabled(EFI_RUNTIME_SERVICES))
        efi_enter_virtual_mode();
#endif
    thread_info_cache_init(); // null function
    cred_init();

cred.c::cred_init()

  • call: start_kernel()
    • cred_init()
    • credentials - 인증에 사용되는 프로세스의 식별자를 말함.
  • sizeof(struct cred): 92Byte
struct cred {
    atomic_t    usage;
#ifdef CONFIG_DEBUG_CREDENTIALS
    atomic_t    subscribers;    /* number of processes subscribed */
    void        *put_addr;
    unsigned    magic;
#define CRED_MAGIC  0x43736564
#define CRED_MAGIC_DEAD 0x44656144
#endif
    kuid_t      uid;        /* real UID of the task */
    kgid_t      gid;        /* real GID of the task */
    kuid_t      suid;       /* saved UID of the task */
    kgid_t      sgid;       /* saved GID of the task */
    kuid_t      euid;       /* effective UID of the task */
    kgid_t      egid;       /* effective GID of the task */
    kuid_t      fsuid;      /* UID for VFS ops */
    kgid_t      fsgid;      /* GID for VFS ops */
    unsigned    securebits; /* SUID-less security management */
    kernel_cap_t    cap_inheritable; /* caps our children can inherit */
    kernel_cap_t    cap_permitted;  /* caps we're permitted */
    kernel_cap_t    cap_effective;  /* caps we can actually use */
    kernel_cap_t    cap_bset;   /* capability bounding set */
#ifdef CONFIG_KEYS
    unsigned char   jit_keyring;    /* default keyring to attach requested
                     * keys to */
    struct key __rcu *session_keyring; /* keyring inherited over fork */
    struct key  *process_keyring; /* keyring private to this process */
    struct key  *thread_keyring; /* keyring private to this thread */
    struct key  *request_key_auth; /* assumed request_key authority */
#endif
#ifdef CONFIG_SECURITY
    void        *security;  /* subjective LSM security */
#endif
    struct user_struct *user;   /* real user ID subscription */
    struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
    struct group_info *group_info;  /* supplementary groups for euid/fsgid */
    struct rcu_head rcu;        /* RCU deletion hook */
};
// ARM10C 20150919
void __init cred_init(void)
{
    /* allocate a slab in which we can store credentials */
    // sizeof(struct cred): 92 bytes, SLAB_HWCACHE_ALIGN: 0x00002000UL, SLAB_PANIC: 0x00040000UL
    // kmem_cache_create("cred_jar", 92, 0, 0x00042000, NULL): kmem_cache#16
    cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred),
                     0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
    // cred_jar: kmem_cache#16
}
kmem_cache_create(const char *name, size_t size, size_t align,
          unsigned long flags, void (*ctor)(void *))
{
o   // name: "anon_vma", size: 40, align: 0, flags: 0x000C0000, ctor: anon_vma_ctor
    // kmem_cache_create_memcg(NULL, "anon_vma", 40, 0, 0x000C0000, anon_vma_ctor): kmem_cache#18
    return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL);
    // return kmem_cache#18
}
EXPORT_SYMBOL(kmem_cache_create);
  • cred_jar: kmem_cache_create() // name: "cred_jar", size: 92, align: 0, flags: 0x00042000, ctor: NULL // kmem_cache_create_memcg(NULL, "cred_jar", 92, 0, 0x00042000, NULL): kmem_cache#16
    • kmem_cache_create()
      • "anon_vma", sizeof(struct anon_vma): 40Byte, 0, 0x000C0000, anon_vma_ctor
    • kmem_cache_crate()에 의해서 #18번이 새로 할당된다.
    • 참조로 #22 번에서 많으 것을 할당하였고, 이번(#16)에 cred_jar를 할당한다.
      • #21: idr_layer_cache
    • #20: radix_tree_node
    • #19: pid
    • #18: anon_vma
    • #17: anon_vma_chain
    • #16: cred_jar 
  • cred_init() 정리
    • cred_jar를 사용하기 위한 kmem_cache#16을 할당했다.

main.c::start_kernel()

// ARM10C 20130824
asmlinkage void __init start_kernel(void)
{
    char * command_line;
    extern const struct kernel_param __start___param[], __stop___param[];
    // ATAG,DTB 정보로 사용
...

// 2015/09/19 시작

    anon_vma_init();
    // anon vma 를 사용하기 위한 kmem_cache 할당자 초기화 수행

#ifdef CONFIG_X86 // CONFIG_X86=n
    if (efi_enabled(EFI_RUNTIME_SERVICES))
        efi_enter_virtual_mode();
#endif
    thread_info_cache_init(); // null function
    cred_init();
    // credentials 를 사용하기 위한 kmem_cache 할당자 초기화 수행

    // totalram_pages: 총 free된 page 수
    fork_init(totalram_pages);

fork.c::fork_init()

  • call: start_kernel()
    • fork_init()
    • totalram_pages: 총 free된 page수 
  • totalram_pages
    • // totalram_pages: 총 free된 page 수+ 0x6 + 1
    • free_all_bootmem() : 총 free된 page 수 + 0x6
    • free_high_mem(): 총 free된 page 수 + 0x6 +1
unsigned long totalram_pages __read_mostly;
// ARM10C 20150919
// totalram_pages: 총 free된 page 수
void __init fork_init(unsigned long mempages)
{
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR // CONFIG_ARCH_TASK_STRUCT_ALLOCATOR=n
#ifndef ARCH_MIN_TASKALIGN
// L1_CACHE_BYTES: 64
// ARCH_MIN_TASKALIGN: 64
#define ARCH_MIN_TASKALIGN  L1_CACHE_BYTES
#endif
    /* create a slab on which task_structs can be allocated */
    // FIXME:
    // sizeof(struct task_struct) 계산 필요, 임시로 XXX bytes로 명명함

    // ARCH_MIN_TASKALIGN: 64, SLAB_PANIC: 0x00040000UL, SLAB_NOTRACK: 0x00000000UL, sizeof(struct task_struct): XXX
    // kmem_cache_create("task_struct", XXX, 64, 0x00040000, NULL): kmem_cache#15
    task_struct_cachep =
        kmem_cache_create("task_struct", sizeof(struct task_struct),
            ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
    // task_struct_cachep: kmem_cache#15
#endif
  • kmem_cache_create()
    • task_struct_cachep
    • kmem_cache_create()
    • "task_struct", sizeof(struct task_struct),
    • ARCH_MIN_TASKALIGN: 64, SLAB_PANIC | SLAB_NOTRACK : 0x0040000, NULL);
// ARM10C 20150919
// "task_struct", sizeof(struct task_struct): XXX, 64, 0x00040000, NULL
struct kmem_cache *
kmem_cache_create(const char *name, size_t size, size_t align,
          unsigned long flags, void (*ctor)(void *))
{

    // name: "task_struct", size: XXX, align: 64, flags: 0x00040000, ctor: NULL
    // kmem_cache_create_memcg(NULL, "task_struct", XXX, 64, 0x00040000, NULL): kmem_cache#15
    return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL);
    // return kmem_cache#15
}
  • 이번(#15)에 task_struct_cachep를 할당한다.
    • #21: idr_layer_cache
    • #20: radix_tree_node
    • #19: pid
    • #18: anon_vma
    • #17: anon_vma_chain
    • #16: cred_jar 
    • #15: task_struct_cachep
// ARM10C 20150919
// totalram_pages: 총 free된 page 수
void __init fork_init(unsigned long mempages)
{
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR // CONFIG_ARCH_TASK_STRUCT_ALLOCATOR=n
#ifndef ARCH_MIN_TASKALIGN
// L1_CACHE_BYTES: 64
// ARCH_MIN_TASKALIGN: 64
#define ARCH_MIN_TASKALIGN  L1_CACHE_BYTES
#endif
    /* create a slab on which task_structs can be allocated */
    // FIXME:
    // sizeof(struct task_struct) 계산 필요, 임시로 XXX bytes로 명명함

    // ARCH_MIN_TASKALIGN: 64, SLAB_PANIC: 0x00040000UL, SLAB_NOTRACK: 0x00000000UL, sizeof(struct task_struct): XXX
    // kmem_cache_create("task_struct", XXX, 64, 0x00040000, NULL): kmem_cache#15
    task_struct_cachep =
        kmem_cache_create("task_struct", sizeof(struct task_struct),
            ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
    // task_struct_cachep: kmem_cache#15
#endif

    /* do the arch specific task caches init */
    arch_task_cache_init(); // null function

    /*
     * The default maximum number of threads is set to a safe
     * value: the thread structures can take up at most half
     * of memory.
     */
    // mempages: 총 free된 page 수, THREAD_SIZE: 0x2000, PAGE_SIZE: 0x1000
    max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);
    // max_threads: 총 free된 page 수 / 16

    /*
     * we need to allow at least 20 threads to boot a system
     */
    // max_threads: 총 free된 page 수 / 16
    if (max_threads < 20)
        max_threads = 20;

    // max_threads: 총 free된 page 수 / 16, RLIMIT_NPROC: 6
    init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
    // init_task.signal->rlim[6].rlim_cur: 총 free된 page 수 / 32

    // max_threads: 총 free된 page 수 / 16, RLIMIT_NPROC: 6
    init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
    // init_task.signal->rlim[6].rlim_max: 총 free된 page 수 / 32

    // RLIMIT_SIGPENDING: 11
    init_task.signal->rlim[RLIMIT_SIGPENDING] =
        init_task.signal->rlim[RLIMIT_NPROC];
    // init_task.signal->rlim[11].rlim_cur: 총 free된 page 수 / 32
    // init_task.signal->rlim[11].rlim_max: 총 free된 page 수 / 32
}
  • THREAD_SIZE: 0x2000, PAGE_SIZE: 0x1000
    • max_threads : 총 free된 page 수 / 16
    • 최소는 20개 이상
  • RLIMIT_NPROC
# define RLIMIT_NPROC       6   /* max number of processes */
  • init_task `
// ARM10C 20130831
// .cpus_allowed : 3
// ARM10C 20140315
// ARM10C 20140510
// PF_KTHREAD: 0x00200000
// MAX_PRIO: 140
// SCHED_NORMAL: 0
// ARM10C 20150808
// ATOMIC_INIT(2): { (2) }
// PF_KTHREAD: 0x00200000
// MAX_PRIO: 140
// SCHED_NORMAL: 0
// CPU_MASK_ALL: (cpumask_t) { { [0] = 0xf } }
// NR_CPUS: 4
// LIST_HEAD_INIT(init_task.se.group_node):
// { &(init_task.se.group_node), &(init_task.se.group_node) }
// LIST_HEAD_INIT(init_task.rt.run_list):
// { &(init_task.rt.run_list), &(init_task.rt.run_list) }
// RR_TIMESLICE: 10
// LIST_HEAD_INIT(init_task.tasks):
// { &(init_task.tasks), &(init_task.tasks) }
// INIT_PUSHABLE_TASKS(init_task):
// .pushable_tasks =
// {
//     .prio  = (140),
//     .prio_list = { &((init_task.pushable_tasks).prio_list), &((init_task.pushable_tasks).prio_list) },
//     .node_list = { &((init_task.pushable_tasks).node_list), &((init_task.pushable_tasks).node_list) },
// },
// INIT_CGROUP_SCHED(init_task):
// .sched_task_group = &root_task_group,
// LIST_HEAD_INIT(init_task.ptraced):
// { &(init_task.ptraced), &(init_task.ptraced) }
// LIST_HEAD_INIT(init_task.ptrace_entry):
// { &(init_task.ptrace_entry), &(init_task.ptrace_entry) }
// LIST_HEAD_INIT(init_task.children):
// { &(init_task.children), &(init_task.children) }
// LIST_HEAD_INIT(init_task.sibling):
// { &(init_task.sibling), &(init_task.sibling) }
// RCU_POINTER_INITIALIZER(real_cred, &init_cred):
// .real_cred = (typeof(*&init_cred) __force __rcu *)(&init_cred)
// RCU_POINTER_INITIALIZER(cred, &init_cred):
// .cred = (typeof(*&init_cred) __force __rcu *)(&init_cred)
// INIT_TASK_COMM: "swapper"
// INIT_THREAD: {}
// LIST_HEAD_INIT(init_task.pending.list):
// { &(init_task.pending.list), &(init_task.pending.list) }
// __SPIN_LOCK_UNLOCKED(init_task.alloc_lock):
// (spinlock_t )
// { { .rlock =
//     {
//       .raw_lock = { { 0 } },
//       .magic = 0xdead4ead,
//       .owner_cpu = -1,
//       .owner = 0xffffffff,
//     }
// } }
// INIT_CPU_TIMERS(init_task.cpu_timers):
// {
//     { &(init_task.cpu_timers[0]), &(init_task.cpu_timers[0]) },
//     { &(init_task.cpu_timers[1]), &(init_task.cpu_timers[1]) },
//     { &(init_task.cpu_timers[2]), &(init_task.cpu_timers[2]) },
// }
// __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock):
// (raw_spinlock_t)
// {
//    .raw_lock = { { 0 } },
//    .magic = 0xdead4ead,
//    .owner_cpu = -1,
//    .owner = 0xffffffff,
// }
// PIDTYPE_PID: 0
// PIDTYPE_PGID: 1
// PIDTYPE_SID: 2
// INIT_PID_LINK(PIDTYPE_PID):
// {
//     .node = {
//         .next = NULL,
//         .pprev = NULL,
//     },
//     .pid = &init_struct_pid,
// }
// INIT_PID_LINK(PIDTYPE_PGID):
// {
//     .node = {
//         .next = NULL,
//         .pprev = NULL,
//     },
//     .pid = &init_struct_pid,
// }
// INIT_PID_LINK(PIDTYPE_SID):
// {
//     .node = {
//         .next = NULL,
//         .pprev = NULL,
//     },
//     .pid = &init_struct_pid,
// }
// LIST_HEAD_INIT(init_task.thread_group):
// { &(init_task.thread_group), &(init_task.thread_group) }
// INIT_TRACE_RECURSION: .trace_recursion = 0,
// INIT_TASK_RCU_PREEMPT(init_task):
// .rcu_read_lock_nesting = 0,
// .rcu_read_unlock_special = 0,
// .rcu_node_entry = { &(init_task.rcu_node_entry), &(init_task.rcu_node_entry) },
// .rcu_blocked_node = NULL,
//
// #define INIT_TASK(init_task):
// {
//    .state            = 0,
//    .stack            = &init_thread_info,
//    .usage            = { (2) },
//    .flags            = 0x00200000,
//    .prio             = 120,
//    .static_prio      = 120,
//    .normal_prio      = 120,
//    .policy           = 0,
//    .cpus_allowed     = (cpumask_t) { { [0] = 0xf } },
//    .nr_cpus_allowed  = 4,
//    .mm               = NULL,
//    .active_mm        = &init_mm,
//    .se               = {
//        .group_node = { &(init_task.se.group_node), &(init_task.se.group_node) }
//    },
//    .rt               = {
//        .run_list     = { &(init_task.rt.run_list), &(init_task.rt.run_list) },
//        .time_slice   = 10,
//    },
//    .tasks            = { &(init_task.tasks), &(init_task.tasks) },
//    .pushable_tasks   =
//    {
//        .prio  = (140),
//        .prio_list = { &((init_task.pushable_tasks).prio_list), &((init_task.pushable_tasks).prio_list) },
//        .node_list = { &((init_task.pushable_tasks).node_list), &((init_task.pushable_tasks).node_list) },
//    },
//    .sched_task_group = &root_task_group,
//    .ptraced          = { &(init_task.ptraced), &(init_task.ptraced) },
//    .ptrace_entry     = { &(init_task.ptrace_entry), &(init_task.ptrace_entry) },
//    .real_parent      = &init_task,
//    .parent           = &init_task,
//    .children         = { &(init_task.children), &(init_task.children) },
//    .sibling          = { &(init_task.sibling), &(init_task.sibling) },
//    .group_leader     = &init_task,
//    .real_cred        = (typeof(*&init_cred) __force __rcu *)(&init_cred),
//    .cred             = (typeof(*&init_cred) __force __rcu *)(&init_cred),
//    .comm             = "swapper",
//    .thread           = {},
//    .fs               = &init_fs,
//    .files            = &init_files,
//    .signal           = &init_signals,
//    .sighand          = &init_sighand,
//    .nsproxy          = &init_nsproxy,
//    .pending          = {
//        .list = { &(init_task.pending.list), &(init_task.pending.list) },
//        .signal = {{0}}
//    },
//    .blocked          = {{0}},
//    .alloc_lock       =
//    (spinlock_t )
//    { { .rlock =
//        {
//          .raw_lock = { { 0 } },
//          .magic = 0xdead4ead,
//          .owner_cpu = -1,
//          .owner = 0xffffffff,
//        }
//    } },
//    .journal_info     = NULL,
//    .cpu_timers       =
//    {
//        { &(init_task.cpu_timers[0]), &(init_task.cpu_timers[0]) },
//        { &(init_task.cpu_timers[1]), &(init_task.cpu_timers[1]) },
//        { &(init_task.cpu_timers[2]), &(init_task.cpu_timers[2]) },
//    },
//    .pi_lock          =
//    (raw_spinlock_t)
//    {
//       .raw_lock = { { 0 } },
//       .magic = 0xdead4ead,
//       .owner_cpu = -1,
//       .owner = 0xffffffff,
//    },
//    .timer_slack_ns   = 50000,
//    .pids             = {
//        [0]  =
//        {
//            .node = {
//                .next = NULL,
//                .pprev = NULL,
//            },
//            .pid = &init_struct_pid,
//        },
//        [1]  =
//        {
//            .node = {
//                .next = NULL,
//                .pprev = NULL,
//            },
//            .pid = &init_struct_pid,
//        },
//        [2]  =
//        {
//            .node = {
//                .next = NULL,
//                .pprev = NULL,
//            },
//            .pid = &init_struct_pid,
//        },
//    },
//    .thread_group     = { &(init_task.thread_group), &(init_task.thread_group) },
//    .trace_recursion  = 0,
//    .rcu_read_lock_nesting = 0,
//    .rcu_read_unlock_special = 0,
//    .rcu_node_entry   = { &(init_task.rcu_node_entry), &(init_task.rcu_node_entry) },
//    .rcu_blocked_node = NULL,
// }
#define RLIMIT_SIGPENDING   11  /* max number of pending signals */
  • init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
    • init_task.signal->rlim[RLIMIT_NPROC: 6 ].rlim_cur : 총 free된 page수 /2
  • init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
    • init_task.signal->rlim[RLIMIT_NPROC: 6 ].rlim_max : 총 free된 page수 /2
  • init_task.signal->rlim[RLIMIT_SIGPENDING] = init_task.signal->rlim[RLIMIT_NPROC];
    • init_task.signal->rlim[RLIMIT_SIGPENDING: 11 ] =
      • init_task.signal->rlim[RLIMIT_NPROC: 6];
  • fork_init(totalram_pages);에서 한일
    • task_struct를 사용하기 위한 kmem_cache 할당
    • max_threads 계산하여 init_task 에 threads 값의 limit 값을 설정.

main.c::start_kernel()

// ARM10C 20130824
asmlinkage void __init start_kernel(void)
{
    char * command_line;
    extern const struct kernel_param __start___param[], __stop___param[];
...

// 2015/09/19 시작

    anon_vma_init();
    // anon vma 를 사용하기 위한 kmem_cache 할당자 초기화 수행

#ifdef CONFIG_X86 // CONFIG_X86=n
    if (efi_enabled(EFI_RUNTIME_SERVICES))
        efi_enter_virtual_mode();
#endif
    thread_info_cache_init(); // null function
    cred_init();
    // credentials 를 사용하기 위한 kmem_cache 할당자 초기화 수행

    // totalram_pages: 총 free된 page 수
    fork_init(totalram_pages);
    // task_struct 를 사용하기 위한 kmem_cache 할당자 초기화 수행
    // max_threads값을 계산하여 init_task에 threads값의 limit 값 설정함

    proc_caches_init();

fork.c::proc_cache_init()

  • start_kernel()
    • fork_init()
    • proc_caches_init()
// ARM10C 20150919
void __init proc_caches_init(void)
{
    // sizeof(struct sighand_struct): 1324 bytes, 0xc2000
    // kmem_cache_create("sighand_cache", 1324, 0, 0xc2000, sighand_ctor): kmem_cache#14
    sighand_cachep = kmem_cache_create("sighand_cache",
            sizeof(struct sighand_struct), 0,
            SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
            SLAB_NOTRACK, sighand_ctor);
    // sighand_cachep: kmem_cache#14

    signal_cachep = kmem_cache_create("signal_cache",
            sizeof(struct signal_struct), 0,
            SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
    files_cachep = kmem_cache_create("files_cache",
            sizeof(struct files_struct), 0,
            SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
    fs_cachep = kmem_cache_create("fs_cache",
            sizeof(struct fs_struct), 0,
            SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
    /*
     * FIXME! The "sizeof(struct mm_struct)" currently includes the
     * whole struct cpumask for the OFFSTACK case. We could change
     * this to *only* allocate as much of it as required by the
     * maximum number of CPU's we can ever have.  The cpumask_allocation
     * is at the end of the structure, exactly for that reason.
     */
    mm_cachep = kmem_cache_create("mm_struct",
            sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
            SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
    vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);

    mmap_init();

    // mmap_init에서 한일:
    // (&(&(&(&vm_committed_as)->lock)->wait_lock)->rlock)->raw_lock: { { 0 } }
    // (&(&(&(&vm_committed_as)->lock)->wait_lock)->rlock)->magic: 0xdead4ead
    // (&(&(&(&vm_committed_as)->lock)->wait_lock)->rlock)->owner: 0xffffffff
    // (&(&(&(&vm_committed_as)->lock)->wait_lock)->rlock)->owner_cpu: 0xffffffff
    // (&(&vm_committed_as)->list)->next: &(&vm_committed_as)->list
    // (&(&vm_committed_as)->list)->prev: &(&vm_committed_as)->list
    // (&vm_committed_as)->count: 0
    // (&vm_committed_as)->counters: kmem_cache#26-o0 에서 할당된 4 bytes 메모리 주소
    // list head 인 &percpu_counters에 &(&vm_committed_as)->list를 연결함

// 2015/09/19 종료

    nsproxy_cache_init();
}
struct sighand_struct {
    atomic_t        count;
    struct k_sigaction  action[_NSIG];
    spinlock_t      siglock;
    wait_queue_head_t   signalfd_wqh;
};
struct k_sigaction {
    struct sigaction sa;
#ifdef __ARCH_HAS_KA_RESTORER
    __sigrestore_t ka_restorer;
#endif
};
struct sigaction {
    union {
      __sighandler_t _sa_handler;
      void (*_sa_sigaction)(int, struct siginfo *, void *);
    } _u;
    sigset_t sa_mask;
    unsigned long sa_flags;
    void (*sa_restorer)(void);
};
  • __signalfn_t
typedef __signalfn_t __user *__sighandler_t;
  • struct siginfo
typedef struct siginfo {
    int si_signo;
    int si_errno;
    int si_code;

    union {
        int _pad[SI_PAD_SIZE];

        /* kill() */
        struct {
            __kernel_pid_t _pid;    /* sender's pid */
            __ARCH_SI_UID_T _uid;   /* sender's uid */
        } _kill;

        /* POSIX.1b timers */
        struct {
            __kernel_timer_t _tid;  /* timer id */
            int _overrun;       /* overrun count */
            char _pad[sizeof( __ARCH_SI_UID_T) - sizeof(int)];
            sigval_t _sigval;   /* same as below */
            int _sys_private;       /* not to be passed to user */
        } _timer;

        /* POSIX.1b signals */
        struct {
            __kernel_pid_t _pid;    /* sender's pid */
            __ARCH_SI_UID_T _uid;   /* sender's uid */
            sigval_t _sigval;
        } _rt;

        /* SIGCHLD */
        struct {
            __kernel_pid_t _pid;    /* which child */
            __ARCH_SI_UID_T _uid;   /* sender's uid */
            int _status;        /* exit code */
            __ARCH_SI_CLOCK_T _utime;
            __ARCH_SI_CLOCK_T _stime;
        } _sigchld;

        /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
        struct {
            void __user *_addr; /* faulting insn/memory ref. */
#ifdef __ARCH_SI_TRAPNO
            int _trapno;    /* TRAP # which caused the signal */
#endif
            short _addr_lsb; /* LSB of the reported address */
        } _sigfault;

        /* SIGPOLL */
        struct {
            __ARCH_SI_BAND_T _band; /* POLL_IN, POLL_OUT, POLL_MSG */
            int _fd;
        } _sigpoll;

        /* SIGSYS */
        struct {
            void __user *_call_addr; /* calling user insn */
            int _syscall;   /* triggering system call number */
            unsigned int _arch; /* AUDIT_ARCH_* of syscall */
        } _sigsys;
    } _sifields;
} __ARCH_SI_ATTRIBUTES siginfo_t;
  • sighand_cachep: kmem_cache_create()
    • "sighand_cache", sizeof(struct sighand_struct), 0,
    • SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
    • SLAB_NOTRACK, sighand_ctor);
    • kmem_cache#14: sighand_cachep
  • signal_cachep: kmem_cache_create()
    • "signal_cache",sizeof(struct signal_struct), 0,
    • SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
  • files_cachep: kmem_cache_create(
    • "files_cache", sizeof(struct files_struct), 0,
    • SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
  • fs_cachep: kmem_cache_create(
    • "fs_cache", sizeof(struct fs_struct), 0,
    • SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
  • mm_cachep: kmem_cache_create(
    • "mm_struct", sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
    • SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
  • vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
void __init proc_caches_init(void)
{
...
    mmap_init();
    nsproxy_cache_init();
}
  • mmap_init();

mmamp.c::mmap_init()

  • call: start_kernel()
    • proc_cache_init()
    • mmap_init()
// ARM10C 20150919
void __init mmap_init(void)
{
    int ret;

    // percpu_counter_init(&vm_committed_as, 0): 0
    ret = percpu_counter_init(&vm_committed_as, 0);
    // ret: 0

    // percpu_counter_init에서 한일:
    // (&(&(&(&vm_committed_as)->lock)->wait_lock)->rlock)->raw_lock: { { 0 } }
    // (&(&(&(&vm_committed_as)->lock)->wait_lock)->rlock)->magic: 0xdead4ead
    // (&(&(&(&vm_committed_as)->lock)->wait_lock)->rlock)->owner: 0xffffffff
    // (&(&(&(&vm_committed_as)->lock)->wait_lock)->rlock)->owner_cpu: 0xffffffff
    // (&(&vm_committed_as)->list)->next: &(&vm_committed_as)->list
    // (&(&vm_committed_as)->list)->prev: &(&vm_committed_as)->list
    // (&vm_committed_as)->count: 0
    // (&vm_committed_as)->counters: kmem_cache#26-o0 에서 할당된 4 bytes 메모리 주소
    // list head 인 &percpu_counters에 &(&vm_committed_as)->list를 연결함

    // ret: 0
    VM_BUG_ON(ret);
}

percpu_counter.h

  • start_kernel()
    • proc_cache_init()
      • mmap_init()
        • percpu_counter_init()
// ARM10C 20150919
// &vm_committed_as, 0
#define percpu_counter_init(fbc, value)             \
    ({                                              \
        static struct lock_class_key __key;         \
                                                    \
        __percpu_counter_init(fbc, value, &__key);  \
    })

percpu_counter.c::__percpu_counter_init()

  • start_kernel()
    • proc_cache_init()
      • mmap_init()
        • percpu_counter_init()
          • __percpu_counter_init()
// ARM10C 20150919
// &vm_committed_as, 0, &__key
int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
              struct lock_class_key *key)
{
    // &fbc->lock: &(&vm_committed_as)->lock
    raw_spin_lock_init(&fbc->lock);

    // raw_spin_lock_init에서 한일:
    // (&(&(&(&vm_committed_as)->lock)->wait_lock)->rlock)->raw_lock: { { 0 } }
    // (&(&(&(&vm_committed_as)->lock)->wait_lock)->rlock)->magic: 0xdead4ead
    // (&(&(&(&vm_committed_as)->lock)->wait_lock)->rlock)->owner: 0xffffffff
    // (&(&(&(&vm_committed_as)->lock)->wait_lock)->rlock)->owner_cpu: 0xffffffff

    // &fbc->lock: &(&vm_committed_as)->lock, key: &__key
    lockdep_set_class(&fbc->lock, key); // null function

    // fbc->count: (&vm_committed_as)->count, amount: 0
    fbc->count = amount;
    // fbc->count: (&vm_committed_as)->count: 0

    // fbc->counters: (&vm_committed_as)->counters,
    // alloc_percpu(s32): kmem_cache#26-o0 에서 할당된 4 bytes 메모리 주소
    fbc->counters = alloc_percpu(s32);
    // fbc->counters: (&vm_committed_as)->counters: kmem_cache#26-o0 에서 할당된 4 bytes 메모리 주소

    // fbc->counters: (&vm_committed_as)->counters: kmem_cache#26-o0 에서 할당된 4 bytes 메모리 주소
    if (!fbc->counters)
        return -ENOMEM;

    // fbc: &vm_committed_as
    debug_percpu_counter_activate(fbc); // null function

#ifdef CONFIG_HOTPLUG_CPU // CONFIG_HOTPLUG_CPU=y
    // &fbc->list: &(&vm_committed_as)->list
    INIT_LIST_HEAD(&fbc->list);

    // INIT_LIST_HEAD에서 한일:
    // (&(&vm_committed_as)->list)->next: &(&vm_committed_as)->list
    // (&(&vm_committed_as)->list)->prev: &(&vm_committed_as)->list

    spin_lock(&percpu_counters_lock);

    // spin_lock에서 한일:
    // &percpu_counters_lock을 사용한 spin lock 수행

    // &fbc->list: &(&vm_committed_as)->list
    list_add(&fbc->list, &percpu_counters);

    // list_add에서 한일:
    // list head 인 &percpu_counters에 &(&vm_committed_as)->list를 연결함

    spin_unlock(&percpu_counters_lock);

    // spin_lock에서 한일:
    // &percpu_counters_lock을 사용한 spin unlock 수행
#endif
    return 0;
}
EXPORT_SYMBOL(__percpu_counter_init);
  • alloc_percpu(s32)
    • kmem_cache@26-o0 에서 할당된 4Byte 메모리 주소.
#define alloc_percpu(type)  \
    (typeof(type) __percpu *)__alloc_percpu(sizeof(type), __alignof__(type))
  • debug_percpu_counter_activate(fbc);
    • // null function
  • INIT_LIST_HEAD(&fbc->list);
    • &fbc->list: &(&(vm_committed_as)->list
  • spin_lock(&percpu_counters_lock);
  • list_add(&fbc->list, &percpu_counters);
  • spin_unlock(&percpu_counters_lock);
  • nsproxy_cache_init();
int __init nsproxy_cache_init(void)
{
    nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
    return 0;
}

log

  • 1st log
18884c1..bd5956f  master     -> origin/master
Updating 18884c1..bd5956f
Fast-forward
arch/arm/include/asm/page.h            |  1 +
arch/arm/include/asm/thread_info.h     |  4 ++++
include/linux/capability.h             |  5 +++++
include/linux/cred.h                   |  8 +++++---
include/linux/rbtree.h                 |  2 ++
include/linux/rmap.h                   |  9 +++++++--
include/linux/rwsem-spinlock.h         |  4 +++-
include/linux/rwsem.h                  |  2 +-
include/linux/slab.h                   |  6 ++++++
include/linux/slub_def.h               |  1 +
include/linux/spinlock_types.h         |  1 +
include/linux/types.h                  |  3 +++
include/linux/uidgid.h                 |  4 +++-
include/uapi/asm-generic/posix_types.h |  1 +
include/uapi/linux/capability.h        |  2 ++
init/main.c                            | 12 +++++++++---
kernel/cred.c                          |  5 +++++
kernel/fork.c                          |  4 ++--
mm/rmap.c                              | 12 ++++++++++++
mm/slab_common.c                       | 21 +++++++++++++++++++++
20 files changed, 94 insertions(+), 13 deletions(-)
  • 2nd log
bd5956f..a53fe59  master     -> origin/master
Updating bd5956f..a53fe59
Fast-forward
arch/arm/include/asm/cache.h           |  1 +
arch/arm/include/asm/signal.h          | 11 +++++++++++
arch/arm/include/uapi/asm/signal.h     |  2 ++
include/linux/list.h                   |  5 +++++
include/linux/lockdep.h                |  1 +
include/linux/percpu.h                 |  2 ++
include/linux/percpu_counter.h         |  9 +++++++--
include/linux/sched.h                  | 17 +++++++++++------
include/linux/signal.h                 |  4 +++-
include/linux/slab.h                   |  1 +
include/linux/spinlock.h               |  5 +++++
include/linux/spinlock_types.h         |  2 ++
include/linux/wait.h                   |  1 +
include/uapi/asm-generic/resource.h    |  4 ++++
include/uapi/asm-generic/signal-defs.h |  1 +
init/main.c                            |  4 ++++
kernel/fork.c                          | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++---
kernel/locking/spinlock_debug.c        |  2 ++
lib/percpu_counter.c                   | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
mm/mmap.c                              | 17 +++++++++++++++++
mm/page_alloc.c                        |  1 +
mm/percpu.c                            |  2 ++
mm/slab_common.c                       | 24 +++++++++++++++++++-----
23 files changed, 212 insertions(+), 22 deletions(-)

2015년 9월 12일 토요일

[Linux Kernel] 116주차(2015.09.12) setup_per_cpu_pageset()

ARM10C : 116 주차
일시 : 2015.09.12 (116 주차 스터디 진행)
모임명 : NAVER_개발자커뮤니티지원_10차ARM-C
장소 : 토즈 타워점
장소지원 : NAVER 개발자 커뮤니티 지원 프로그램
참여인원 : 3명

2주간 각자 휴식을 가지고 다시 커널을 분석합니다.

116 주차 진도

  • 8월 8일 진도로 다시 복습을 했습니다.
  • page_cgroup_init(); // null function
  • debug_objects_mem_init(); // null function
  • kmemleak_init(); // null function
  • 그리고 나서 setup_per_cpu_pageset(); 부터 시작합니다. 
  • start_kernel 1 ~/kernel/iamroot/linux-stable/init/main.c
    • setup_per_cpu_pageset();

main.c::start_kernel()

// ARM10C 20130824
// asmlinkage의 의미
// http://www.spinics.net/lists/arm-kernel/msg87677.html
asmlinkage void __init start_kernel(void)
{
    char * command_line;
    extern const struct kernel_param __start___param[], __stop___param[];
    // ATAG,DTB 정보로 사용
...

// 2015/08/08 시작

    page_cgroup_init(); // null function
    debug_objects_mem_init(); // null function
    kmemleak_init(); // null function

// 2015/08/22 종료

    setup_per_cpu_pageset();

page_alloc.c::setup_per_cpu_pageset()

  • call: start_kernel() 1 ~/kernel/iamroot/linux-stable/init/main.c
    • setup_per_cpu_pageset();
void __init setup_per_cpu_pageset(void)
{
    struct zone *zone;

    for_each_populated_zone(zone)
        setup_zone_pageset(zone);
}

page_alloc.c::setup_zone_pageset()

  • call: start_kernel() 1 ~/kernel/iamroot/linux-stable/init/main.c
    • setup_per_cpu_pageset();
      • setup_zone_pageset();
static void __meminit setup_zone_pageset(struct zone *zone)
{
    int cpu;
    zone->pageset = alloc_percpu(struct per_cpu_pageset);
    // 
    for_each_possible_cpu(cpu)
        zone_pageset_init(zone, cpu);
}
kmem_cache#26-o0 (512B)에서 한 Slub을 per_cpu로 나누어 사용하는데 이 할당자가 alloc_percpu()함수이다. 여기서는 struct per_set_cpu 구조체가 72Byte이므로 512B중 72B를 할당 받는다.
  • for_each_possible_cpu(cpu)
#define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask)
// #define for_each_cpu(cpu, cpu_online_mask)
//  for ((cpu) = -1; (cpu) = cpumask_next((cpu), (cpu_online_mask)), (cpu) < nr_cpu_ids; )
여기서 nr_cpu_ids가 4로 루프가 4번 반복한다.
  • zone_pageset_init()
static void __meminit zone_pageset_init(struct zone *zone, int cpu)
{
    struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu);

    pageset_init(pcp);
    pageset_set_high_and_batch(zone, pcp);
}
zone: &(&...
#define per_cpu_ptr(ptr, cpu)   SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
SHIFT_PERCPU_PTR()
#define SHIFT_PERCPU_PTR(__p, __offset) ({              \
    __verify_pcpu_ptr((__p));                   \
    RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \
})
#endif
  • pageset_init(pcp)
// ARM10C 20140308
// p: (&boot_pageset + __per_cpu_offset[0])
static void pageset_init(struct per_cpu_pageset *p)
{
    struct per_cpu_pages *pcp;
    int migratetype;

    // p: (&boot_pageset + __per_cpu_offset[0]), sizeof(*p): 66
    memset(p, 0, sizeof(*p));

    // p->pcp: [pcpu0] boot_pageset->pcp
    pcp = &p->pcp;
    // pcp: [pcpu0] boot_pageset->pcp

    pcp->count = 0;
    // [pcpu0] boot_pageset->pcp->count: 0

    // MIGRATE_PCPTYPES: 3
    for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++)
        // [pcpu0] boot_pageset->pcp->lists[0..2]
        INIT_LIST_HEAD(&pcp->lists[migratetype]);
}
여기서 pcp에 해당하는 메모리를 할당하고 참조 카운터를 0으로 초기화하며, 속성을 초기화했다.
  • pageset_set_high_and_batch(zone, pcp)
static void __meminit pageset_set_high_and_batch(struct zone *zone,
        struct per_cpu_pageset *pcp)
{
    if (percpu_pagelist_fraction)
        pageset_set_high(pcp,
            (zone->managed_pages /
                percpu_pagelist_fraction));
    else
        pageset_set_batch(pcp, zone_batchsize(zone));
}
  • pageset_set_batch()을 호출
// op->pcp: [pcpu0] (&(&contig_page_data)->node_zones[0]->pageset, 186,31
static void pageset_set_batch(struct per_cpu_pageset *p, unsigned long batch)
{
    // p->pcp: [pcpu0] boot_pageset->pcp, batch: 0, 1
    pageset_update(&p->pcp, 6 * batch, max(1UL, 1 * batch));
}
따라서 pageset_set_batch에서 한일을 정리하면. batch로 속성으로 시작은 1부터 최대 (high) 186까지 31배치구분을 하도록 초기화한다. 그리고 list[0...2]까지 각 core마다 loop를 실행해서 zone_page를 초기화 해준다.

main.c::start_kernel()

    setup_per_cpu_pageset();
    numa_policy_init();
    // late_time_init(): NULL
    if (late_time_init)
        late_time_init();
    sched_clock_init();
    // shced_clock_running: 0->1 로 바꿈
    calibrate_delay();

calibrate.c::calibrate_delay()

  • called: start_kernel()
    • calibrate_delay()
void calibrate_delay(void)
{
    unsigned long lpj;
    static bool printed;
    // printed: 0

    // smp_processor_id: 0
    int this_cpu = smp_processor_id();
    // this_cpu: 0

    // preset_lpj: 0, lpj_fine: 
    if (per_cpu(cpu_loops_per_jiffy, this_cpu)) {
        lpj = per_cpu(cpu_loops_per_jiffy, this_cpu);
        if (!printed)
            pr_info("Calibrating delay loop (skipped) "
                "already calibrated this CPU");
    } else if (preset_lpj) {
        lpj = preset_lpj;
        if (!printed)
            pr_info("Calibrating delay loop (skipped) "
                "preset value.. ");
    } else if ((!printed) && lpj_fine) {
        lpj = lpj_fine;
        pr_info("Calibrating delay loop (skipped), "
            "value calculated using timer frequency.. ");
    } else if ((lpj = calibrate_delay_is_known())) {
        ;
    } else if ((lpj = calibrate_delay_direct()) != 0) {
        if (!printed)
            pr_info("Calibrating delay using timer "
                "specific routine.. ");
    } else {
        // printed: 0
        if (!printed)
            pr_info("Calibrating delay loop... ");
        lpj = calibrate_delay_converge();
    }
    per_cpu(cpu_loops_per_jiffy, this_cpu) = lpj;
    if (!printed)
        pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
            lpj/(500000/HZ),
            (lpj/(5000/HZ)) % 100, lpj);

    loops_per_jiffy = lpj;
    printed = true;
}
  • calibrate_delay_converge();
static unsigned long calibrate_delay_converge(void)
{
    /* First stage - slowly accelerate to find initial bounds */
    unsigned long lpj, lpj_base, ticks, loopadd, loopadd_base, chop_limit;
    int trials = 0, band = 0, trial_in_band = 0;
    // trials: 0, band: 0, trial_in_bank: 0

    lpj = (1<<12);
    // lpj: 0x1000

    /* wait for "start of" clock tick */
    ticks = jiffies;
    while (ticks == jiffies)
        ; /* nothing */
    /* Go .. */
    ticks = jiffies;
    do {
        if (++trial_in_band == (1<<band)) {
            ++band;
            trial_in_band = 0;
        }
        __delay(lpj * band);
        trials += band;
    } while (ticks == jiffies);
    /*
     * We overshot, so retreat to a clear underestimate. Then estimate
     * the largest likely undershoot. This defines our chop bounds.
     */
    trials -= band;
    loopadd_base = lpj * band;
    lpj_base = lpj * trials;

recalibrate:
    lpj = lpj_base;
    loopadd = loopadd_base;

    /*
     * Do a binary approximation to get lpj set to
     * equal one clock (up to LPS_PREC bits)
     */
    chop_limit = lpj >> LPS_PREC;
    while (loopadd > chop_limit) {
        lpj += loopadd;
        ticks = jiffies;
        while (ticks == jiffies)
            ; /* nothing */
        ticks = jiffies;
        __delay(lpj);
        if (jiffies != ticks)   /* longer than 1 tick */
            lpj -= loopadd;
        loopadd >>= 1;
    }
    /*
     * If we incremented every single time possible, presume we've
     * massively underestimated initially, and retry with a higher
     * start, and larger range. (Only seen on x86_64, due to SMIs)
     */
    if (lpj + loopadd * 2 == lpj_base + loopadd_base * 2) {
        lpj_base = lpj;
        loopadd_base <<= 2;
        goto recalibrate;
    }

    return lpj;
}

main.c::start_kernel()

    calibrate_delay();
    pidmap_init();

pid.c::pidmap_init()

  • start_kernel()
    • pidmap_init()
void __init pidmap_init(void)
{
    /* Veryify no one has done anything silly */
    // PID_MAX_LIMIT: 0x8000
    // PIDNS_HASH_ADDING: 080000000
    BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);

    /* bump default and minimum pid_max based on number of cpus */
    // pid_max_max: 0x8000, pid_max: 0x8000, PIDS_PER_CPU_DEFAULT: 1024
    // num_possible_cpus: 4, max_t(int, 0x8000, 4 * 1024): 0x8000
    pid_max = min(pid_max_max, max_t(int, pid_max,
                PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
    // pid_max: 0x8000

    // pid_max_min: 301, RESERVED_PIDS +1: 300 +1 : 301
    // max_t(int, pid_max_min: 301, PIDS_PER_CPU_MIN: 8 * 4)
    pid_max_min = max_t(int, pid_max_min,
                PIDS_PER_CPU_MIN * num_possible_cpus());
    // pid_max_min: 301

    pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
    // pid_max: default: 0x8000 minimum: 0x12d\n

    // PAGE_SIZE: 0x1000, GFP_KERNEL 0xD0
    init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
    /* Reserve PID 0. We never call free_pidmap(0) */
    set_bit(0, init_pid_ns.pidmap[0].page);
    atomic_dec(&init_pid_ns.pidmap[0].nr_free);

    init_pid_ns.pid_cachep = KMEM_CACHE(pid,
            SLAB_HWCACHE_ALIGN | SLAB_PANIC);
}

main.c::start_kernel()

...
    setup_per_cpu_pageset();
    numa_policy_init();
    if (late_time_init)
        late_time_init();
    sched_clock_init();
    calibrate_delay();
    pidmap_init();

log

  • 1st log
76a328c..274a47c  master     -> origin/master
Updating 76a328c..274a47c
Fast-forward
include/linux/cpumask.h |   3 +++
include/linux/mmzone.h  |   7 +++++++
include/linux/percpu.h  |  17 +++++++++++++++
init/main.c             |   3 +++
kernel/bounds.c         |   1 +
mm/bootmem.c            |   1 +
mm/mmzone.c             |   2 ++
mm/page_alloc.c         | 166 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
mm/percpu.c             |   6 ++++--
9 files changed, 175 insertions(+), 31 deletions(-)
  • 2nd log
274a47c..18884c1  master     -> origin/master
Updating 274a47c..18884c1
Fast-forward
arch/arm/include/asm/atomic.h |  3 +++
arch/arm/include/asm/page.h   |  1 +
arch/arm/lib/delay.c          |  7 +++++++
include/linux/cpumask.h       |  1 +
include/linux/gfp.h           |  1 +
include/linux/kernel.h        |  1 +
include/linux/mempolicy.h     |  1 +
include/linux/percpu-defs.h   |  1 +
include/linux/pid.h           |  7 +++++++
include/linux/pid_namespace.h |  6 ++++++
include/linux/slab.h          |  7 +++++++
include/linux/slub_def.h      |  1 +
include/linux/smp.h           |  1 +
include/linux/threads.h       | 11 +++++++++++
include/linux/types.h         |  4 +++-
init/calibrate.c              | 47 +++++++++++++++++++++++++++++++++++++++++++++++
init/main.c                   | 14 +++++++++++++-
kernel/pid.c                  | 40 ++++++++++++++++++++++++++++++++++++++++
kernel/sched/clock.c          |  6 +++++-
mm/slab_common.c              |  7 +++++++
20 files changed, 164 insertions(+), 3 deletions(-)