2014년 6월 14일 토요일

[Linux Kernel] 58주차(2014.06.13)

ARM10C 58주차 후기

일시 : 2014.06.14 (58주차)
모임명 : NAVER개발자커뮤니티지원_IAMROOT.ORG_10차ARM-C
장소 : 토즈 타워점
장소지원 : NAVER 개발자 커뮤니티 지원 프로그램
참여인원 : 4명

스터디 진도 :

  • mem_init()을 계속 분석합니다.
  • start_kernel()-> mm_init()->kmem_cache_init()->create_boot_cache() 분석중

main.c:: mm_init()

// ARM10C 20140329
static void __init mm_init(void)
{
    /*
     * page_cgroup requires contiguous pages,
     * bigger than MAX_ORDER unless SPARSEMEM.
     */
    page_cgroup_init_flatmem(); // null function
    mem_init();
    // bootmem으로 관리하던 메모리를 buddy로 이관.
    // 각 section 메모리 크기를 출력.

    // mm/Makefile 에서 CONFIG_SLUB 설정으로 slub.c 로 jump
    kmem_cache_init();
// mm/Makefile 에서 CONFIG_SLUB 설정으로 slub.c 로 jump
kmem_cache_init()으로 이동.

slub.c::kmem_cache_init()

create_boot_cache()를 2번째 실행한다.
이 함수를 1번째 실행할때는 slab_state: DOWN에서 실행을 했었고.
slab_state: PARTIAL로 바꾸어서 실행하는 것이다.
slab_state 의미:
slab을 초기화한 단계를 나타냄, PARTIAL은 kmem_cache_node 만 사용이 가능함
void __init kmem_cache_init(void)
{
...
    // slab_state: DOWN
    slab_state = PARTIAL;
    // slab_state: PARTIAL
...
// 2014/06/14 시작
    // kmem_cache: &boot_kmem_cache,
    // offsetof(struct kmem_cache, node): 128, nr_node_ids: 1
    // sizeof(struct kmem_cache_node *): 4 SLAB_HWCACHE_ALIGN: 0x00002000UL
    // create_boot_cache(&boot_kmem_cache, "kmem_cache", 132, 0x00002000UL)
    create_boot_cache(kmem_cache, "kmem_cache",
            offsetof(struct kmem_cache, node) +
                nr_node_ids * sizeof(struct kmem_cache_node *),
               SLAB_HWCACHE_ALIGN);
create_boot_cache(kmem_cache, “kmem_cache”,
offsetof(struct kmem_cache, node) +
nr_node_ids sizeof(struct kmem_cache_node ),
SLAB_HWCACHE_ALIGN);

slab_common.c::create_boot_cache()

// ARM10C 20140614
// &boot_kmem_cache, “kmem_cache”, 132, SLAB_HWCACHE_ALIGN: 0x00002000UL
void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
        unsigned long flags)
{
    int err;

    // s->name: boot_kmem_cache_node.name: NULL
    // s->name: boot_kmem_cache.name: NULL
    s->name = name;
    // s->name: boot_kmem_cache_node.name: "kmem_cache_node"
    // s->name: boot_kmem_cache.name: "kmem_cache"

    // s->size: boot_kmem_cache_node.size: 0
    // s->object_size: boot_kmem_cache_node.object_size: 0
    // s->size: boot_kmem_cache.size: 0
    // s->object_size: boot_kmem_cache.object_size: 0
    s->size = s->object_size = size;
    // s->size: boot_kmem_cache_node.size: 44
    // s->object_size: boot_kmem_cache_node.object_size: 44
    // s->size: boot_kmem_cache.size: 132
    // s->object_size: boot_kmem_cache.object_size: 132

    // flags: SLAB_HWCACHE_ALIGN: 0x00002000UL, ARCH_KMALLOC_MINALIGN: 64, size: 44
    // s->align: boot_kmem_cache_node.align: 0
    // flags: SLAB_HWCACHE_ALIGN: 0x00002000UL, ARCH_KMALLOC_MINALIGN: 64, size: 132
    // s->align: boot_kmem_cache.align: 0
    s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
    // s->align: boot_kmem_cache_node.align: 64
    // s->align: boot_kmem_cache.align: 64

    // s: &boot_kmem_cache_node, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
    // __kmem_cache_create(&boot_kmem_cache_node, 0x00002000UL): 0
    // s: &boot_kmem_cache, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
    // __kmem_cache_create(&boot_kmem_cache, 0x00002000UL): 0
    err = __kmem_cache_create(s, flags);
// s: &boot_kmem_cache_node, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
// __kmem_cache_create(&boot_kmem_cache_node, 0x00002000UL): 0
err = __kmem_cache_create(s, flags);

slub.c::__kmem_cache_create()

// ARM10C 20140614
// s: &boot_kmem_cache, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
{
    int err;

    // s: &boot_kmem_cache_node, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
    // kmem_cache_open(&boot_kmem_cache_node, 0x00002000UL): 0
    // s: &boot_kmem_cache, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
    // kmem_cache_open(&boot_kmem_cache, 0x00002000UL): 0
    err = kmem_cache_open(s, flags);
// kmem_cache_open(&boot_kmem_cache, 0x00002000UL): 0
err = kmem_cache_open(s, flags);

slub.c::kmem_cache_open()

// ARM10C 20140614
// s: &boot_kmem_cache, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
{
    // s->size: boot_kmem_cache_node.size: 44, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL,
    // s->name: boot_kmem_cache_node.name: "kmem_cache_node", s->ctor: boot_kmem_cache_node.ctor: NULL
    // s->size: boot_kmem_cache.size: 132, flags: SLAB_HWCACHE_ALIGN: 0x00002000UL,
    // s->name: boot_kmem_cache.name: "kmem_cache", s->ctor: boot_kmem_cache.ctor: NULL
    s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
    // s->flags: boot_kmem_cache_node.flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
    // s->flags: boot_kmem_cache.flags: SLAB_HWCACHE_ALIGN: 0x00002000UL

    // s->reserved: boot_kmem_cache_node.reserved: 0
    // s->reserved: boot_kmem_cache.reserved: 0
    s->reserved = 0;
    // s->reserved: boot_kmem_cache_node.reserved: 0
    // s->reserved: boot_kmem_cache.reserved: 0

    // need_reserve_slab_rcu: 0, s->flags: boot_kmem_cache_node.flags: SLAB_HWCACHE_ALIGN
    // need_reserve_slab_rcu: 0, s->flags: boot_kmem_cache.flags: SLAB_HWCACHE_ALIGN
    if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
        s->reserved = sizeof(struct rcu_head);

    // s: &boot_kmem_cache_node, -1, calculate_sizes(&boot_kmem_cache_node, -1): 1
    // s: &boot_kmem_cache, -1, calculate_sizes(&boot_kmem_cache, -1): 1
    if (!calculate_sizes(s, -1))
        goto error;
// s: &boot_kmem_cache, -1, calculate_sizes(&boot_kmem_cache, -1): 1
if (!calculate_sizes(s, -1))

slub.c::calculate_sizes()

// ARM10C 20140614
// s: &boot_kmem_cache, -1
static int calculate_sizes(struct kmem_cache *s, int forced_order)
{
    // s->flags: boot_kmem_cache_node.flags: SLAB_HWCACHE_ALIGN
    // s->flags: boot_kmem_cache.flags: SLAB_HWCACHE_ALIGN
    unsigned long flags = s->flags;
    // flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
    // flags: SLAB_HWCACHE_ALIGN: 0x00002000UL

    // s->object_size: boot_kmem_cache_node.object_size: 44
    // s->object_size: boot_kmem_cache.object_size: 132
    unsigned long size = s->object_size;
    // size: 44
    // size: 132
    int order;

    /*
     * Round up object size to the next word boundary. We can only
     * place the free pointer at word boundaries and this determines
     * the possible location of the free pointer.
     */
    // size: 44, sizeof(void *): 4
    // size: 132, sizeof(void *): 4
    size = ALIGN(size, sizeof(void *));
    // size: 44
    // size: 132

    // flags: SLAB_HWCACHE_ALIGN: 0x00002000UL, SLAB_POISON: 0x00000800UL
    // SLAB_DESTROY_BY_RCU: 0x00080000UL
    // flags: SLAB_HWCACHE_ALIGN: 0x00002000UL, SLAB_POISON: 0x00000800UL
    // SLAB_DESTROY_BY_RCU: 0x00080000UL
    if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
            !s->ctor)
        s->flags |= __OBJECT_POISON;
    else
        // s->flags: boot_kmem_cache_node.flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
        // __OBJECT_POISON : 0x80000000UL
        // s->flags: boot_kmem_cache.flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
        // __OBJECT_POISON : 0x80000000UL
        s->flags &= ~__OBJECT_POISON;
        // s->flags: boot_kmem_cache_node.flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
        // s->flags: boot_kmem_cache.flags: SLAB_HWCACHE_ALIGN: 0x00002000UL


    /*
     * If we are Redzoning then check if there is some space between the
     * end of the object and the free pointer. If not then add an
     * additional word to have some bytes to store Redzone information.
     */
    // flags: SLAB_HWCACHE_ALIGN: 0x00002000UL, SLAB_RED_ZONE: 0x00000400UL, size: 44,
    // s->object_size: boot_kmem_cache_node.object_size: 44
    // flags: SLAB_HWCACHE_ALIGN: 0x00002000UL, SLAB_RED_ZONE: 0x00000400UL, size: 132,
    // s->object_size: boot_kmem_cache.object_size: 132
    if ((flags & SLAB_RED_ZONE) && size == s->object_size)
        size += sizeof(void *);
    /*
     * With that we have determined the number of bytes in actual use
     * by the object. This is the potential offset to the free pointer.
     */
    // s->inuse: boot_kmem_cache_node.inuse: 0, size: 44
    // s->inuse: boot_kmem_cache.inuse: 0, size: 132
    s->inuse = size;
    // s->inuse: boot_kmem_cache_node.inuse: 44
    // s->inuse: boot_kmem_cache.inuse: 132

    // flags: SLAB_HWCACHE_ALIGN: 0x00002000UL, SLAB_DESTROY_BY_RCU: 0x00080000UL,
    // SLAB_POISON: 0x00000800UL, s->ctor: boot_kmem_cache_node.ctor: NULL
    // flags: SLAB_HWCACHE_ALIGN: 0x00002000UL, SLAB_DESTROY_BY_RCU: 0x00080000UL,
    // SLAB_POISON: 0x00000800UL, s->ctor: boot_kmem_cache.ctor: NULL
    if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
        s->ctor)) {
        /*
         * Relocate free pointer after the object if it is not
         * permitted to overwrite the first word of the object on
         * kmem_cache_free.
         *
         * This is the case if we do RCU, have a constructor or
         * destructor or are poisoning the objects.
         */
        s->offset = size;
        size += sizeof(void *);
    }

#ifdef CONFIG_SLUB_DEBUG // CONFIG_SLUB_DEBUG=y
    // flags: SLAB_HWCACHE_ALIGN: 0x00002000UL, SLAB_STORE_USER: 0x00010000UL
    // flags: SLAB_HWCACHE_ALIGN: 0x00002000UL, SLAB_STORE_USER: 0x00010000UL
    if (flags & SLAB_STORE_USER)
        /*
         * Need to store information about allocs and frees after
         * the object.
         */
        size += 2 * sizeof(struct track);

    // flags: SLAB_HWCACHE_ALIGN: 0x00002000UL, SLAB_RED_ZONE: 0x00000400UL
    // flags: SLAB_HWCACHE_ALIGN: 0x00002000UL, SLAB_RED_ZONE: 0x00000400UL
    if (flags & SLAB_RED_ZONE)
        /*
         * Add some empty padding so that we can catch
         * overwrites from earlier objects rather than let
         * tracking information or the free pointer be
         * corrupted if a user writes before the start
         * of the object.
         */
        size += sizeof(void *);
#endif

    /*
     * SLUB stores one object immediately after another beginning from
     * offset 0. In order to align the objects we have to simply size
     * each object to conform to the alignment.
     */
    // size: 44, s->align: boot_kmem_cache_node.align: 64
    // size: 132, s->align: boot_kmem_cache.align: 64
    size = ALIGN(size, s->align);
    // size: 64
    // size: 192

    // s->size: boot_kmem_cache_node.size: 44, size: 64
    // s->size: boot_kmem_cache.size: 132, size: 192
    s->size = size;
    // s->size: boot_kmem_cache_node.size: 64
    // s->size: boot_kmem_cache.size: 192

    // forced_order: -1
    // forced_order: -1
    if (forced_order >= 0)
        order = forced_order;
    else
        // size: 64, s->reserved: boot_kmem_cache_node.reserved: 0
        // size: 192, s->reserved: boot_kmem_cache.reserved: 0
        order = calculate_order(size, s->reserved);
// size: 192, s->reserved: boot_kmem_cache.reserved: 0
order = calculate_order(size, s->reserved);

slub.c::caculate_order()

// ARM10C 20140614
// size: 192, s->reserved: boot_kmem_cache.reserved: 0
static inline int calculate_order(int size, int reserved)
{
    int order;
    int min_objects;
    int fraction;
    int max_objects;

    /*
     * Attempt to find best configuration for a slab. This
     * works by first attempting to generate a layout with
     * the best configuration and backing off gradually.
     *
     * First we reduce the acceptable waste in a slab. Then
     * we reduce the minimum objects required in a slab.
     */
    // slub_min_objects: 0
    // slub_min_objects: 0
    min_objects = slub_min_objects;
    // min_objects: 0
    // min_objects: 0

    if (!min_objects)
        // nr_cpu_ids: 4, fls(4): 3
        // nr_cpu_ids: 4, fls(4): 3
        min_objects = 4 * (fls(nr_cpu_ids) + 1);
        // min_objects: 16
        // min_objects: 16

    // slub_max_order: 3, size: 64, reserved: 0
    // slub_max_order: 3, size: 192, reserved: 0
    max_objects = order_objects(slub_max_order, size, reserved);
// slub_max_order: 3, size: 192, reserved: 0
max_objects = order_objects(slub_max_order, size, reserved);

slub.c::order_objects()

// ARM10C 20140614
// slub_max_order: 3, size: 192, reserved: 0
// min_order: 0, size: 192, reserved: 0
static inline int order_objects(int order, unsigned long size, int reserved)
{
    // ARM10C 20140614
    // order: 3, PAGE_SIZE: 0x1000, (0x1000 << 3): 0x8000, reserved: 0, size: 192
    return ((PAGE_SIZE << order) - reserved) / size;
    // ARM10C 20140614
    // return 0xaa
}
// retrun 0xaa

slub.c::caculate_order()

// slub_max_order: 3, size: 64, reserved: 0
max_objects = order_objects(slub_max_order, size, reserved);
// max_objects: 0xaa
static inline int calculate_order(int size, int reserved)
{
...
    // slub_max_order: 3, size: 64, reserved: 0
    max_objects = order_objects(slub_max_order, size, reserved);
    // max_objects: 0xaa

    // min_objects: 16, max_objects: 0xaa
    min_objects = min(min_objects, max_objects);
    // min_objects: 16

    // min_objects: 16
    while (min_objects > 1) {
        fraction = 16;

        // fraction: 16
        // fraction: 16
        while (fraction >= 4) {
            // size: 64, min_objects: 16, slub_max_order: 3
            // fraction: 16, reserved: 0
            // size: 192, min_objects: 16, slub_max_order: 3
            // fraction: 16, reserved: 0
            order = slab_order(size, min_objects,
                    slub_max_order, fraction, reserved);
            // order: 0
            // order: 0

            // order: 0, slub_max_order: 3
            // order: 0, slub_max_order: 3
            if (order <= slub_max_order)
                // order: 0
                // order: 0
                return order;
                // return 0
                // return 0

            fraction /= 2;

            // fraction /= 2 의 의미?:
            // 내부 단편화를 줄이기 위해 사용하는 fraction을 줄여서 계산된
            // order 값이 max_order 값보다 작도록 함
        }
        min_objects--;

        // min_objects-- 의 의미?:
        // fraction 값을 줄여도 계산된 order가 max_order 보다 클 경우 min_objects 수를 줄임
    }
// size: 192, min_objects: 16, slub_max_order: 3
// fraction: 16, reserved: 0
order = slab_order(size, min_objects,

slub.c::slab_order()

// ARM10C 20140614
// size: 192, min_objects: 16, slub_max_order: 3
// fraction: 16, reserved: 0
static inline int slab_order(int size, int min_objects,
                int max_order, int fract_leftover, int reserved)
{
    int order;
    int rem;
    // slub_min_order: 0
    // slub_min_order: 0
    int min_order = slub_min_order;
    // min_order: 0
    // min_order: 0

    // min_order: 0, size: 64, reserved: 0
    // order_objects(0, 64, 0): 0x40, MAX_OBJS_PER_PAGE: 32767 (0x7fff)
    // min_order: 0, size: 192, reserved: 0
    // order_objects(0, 192, 0): 0x15, MAX_OBJS_PER_PAGE: 32767 (0x7fff)
    if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
        return get_order(size * MAX_OBJS_PER_PAGE) - 1;
// min_order: 0, size: 192, reserved: 0
// order_objects(0, 192, 0): 0x15, MAX_OBJS_PER_PAGE: 32767 (0x7fff)
if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
return get_order(size * MAX_OBJS_PER_PAGE) - 1;

slub.c::order_objects()

// min_order: 0, size: 192, reserved: 0
// order_objects(0, 192, 0): 0x15, MAX_OBJS_PER_PAGE: 32767 (0x7fff)
static inline int order_objects(int order, unsigned long size, int reserved)
{
    // order: 0, PAGE_SIZE: 0x1000, (0x1000 << 0): 0x1000, reserved: 0, size: 192
    return ((PAGE_SIZE << order) - reserved) / size;
    // return 0x15
}
// return 0x15
// order_objects 하는 일:
// 현재 order 에서 reserved를 제외한 공간에서 요청된 size를 가진 사용할 수있는 object의 수를 구함

slub.c::slab_order()

// return 0x15
static inline int slab_order(int size, int min_objects,
                int max_order, int fract_leftover, int reserved)
{
...
    // min_order: 0, size: 192, reserved: 0
    // order_objects(0, 192, 0): 0x15, MAX_OBJS_PER_PAGE: 32767 (0x7fff)
    if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
        return get_order(size * MAX_OBJS_PER_PAGE) - 1;

    // min_order: 0, min_objects: 16, size: 192, fls(0xbff): 12, PAGE_SHIFT: 12
    // order: 0, max_order: 3
    for (order = max(min_order,
                fls(min_objects * size - 1) - PAGE_SHIFT);
            order <= max_order; order++) {
        // PAGE_SIZE: 0x1000, order: 0
        unsigned long slab_size = PAGE_SIZE << order;
        // slab_size: 0x1000

        // slab_size: 0x1000, min_objects: 16, size: 192, reserved: 0
        if (slab_size < min_objects * size + reserved)
            continue;

        // slab_size: 0x1000, reserved: 0, size: 192
        rem = (slab_size - reserved) % size;
        // rem: 64

        // rem: 64, slab_size: 0x1000, fract_leftover: 16
        if (rem <= slab_size / fract_leftover)
            break;
            // 빠져나감

        // if (rem <= slab_size / fract_leftover) 의미?:
        // slab object를 나누고 남은 나머지의 공간이 slab_size의 fract_leftover 로 나눈 값보다 크면
        // 메모리 내부 단편화 문제 발생함. 그럴바에는 order 올려서 내부 단편화를 없애도록 함
    }

    // max(min_order, fls(min_objects * size - 1) - PAGE_SHIFT) 의미?:
    // 최소 min_objects개수와 size 를 곱한 계산 결과에 따른 buddy order의 값을 구함

    // order: 0
    return order;
    // return 0
}
// return 0

slub.c::calclulate_order()

static inline int calculate_order(int size, int reserved)
{
...
    // min_objects: 16
    while (min_objects > 1) {
        fraction = 16;

        // fraction: 16
        while (fraction >= 4) {
            // size: 192, min_objects: 16, slub_max_order: 3
            // fraction: 16, reserved: 0
            order = slab_order(size, min_objects,
                    slub_max_order, fraction, reserved);
            // order: 0

            // order: 0, slub_max_order: 3
            if (order <= slub_max_order)
                // order: 0
                return order;
                // return 0

            fraction /= 2;

            // fraction /= 2 의 의미?:
            // 내부 단편화를 줄이기 위해 사용하는 fraction을 줄여서 계산된
            // order 값이 max_order 값보다 작도록 함

        }
        min_objects--;

        // min_objects-- 의 의미?:
        // fraction 값을 줄여도 계산된 order가 max_order 보다 클 경우 min_objects 수를 줄임
    }
order_objects가 하는 일:
현재 order 에서 reserved는 제외하고 요청한 size인 사용할 수 있는 object의 수를 구함
order_objects는 page에서 reserved를 뺀 것에서
몇개의 objects가 들어가는지 계산하는 함수 이다.
order 0: 4Kb
  • size 64 : retrun 64
  • size 192: retrun 31
order 1: 8KB
  • size 64: return 128개
  • size 192: return 62개
// fraction /= 2 의 의미?:
// 내부 단편화를 줄이기 위해 사용하는 fraction을 줄여서 계산된
// order 값이 max_order 값보다 작도록 함

slub.c::slab_order()

static inline int slab_order(int size, int min_objects,
                int max_order, int fract_leftover, int reserved)
{
...
    // slab_size: 0x1000, min_objects: 16, size: 64, reserved: 0
    if (slab_size < min_objects * size + reserved)
        continue;

    // min_order: 0, min_objects: 16, size: 64, fls(0x3ff): 10, PAGE_SHIFT: 12
    // order: 0, max_order: 3
    for (order = max(min_order,
                fls(min_objects * size - 1) - PAGE_SHIFT);
            order <= max_order; order++) {
        // PAGE_SIZE: 0x1000, order : 0    
        unsigned long slab_size = PAGE_SIZE << order;
        // slab_size: 0x1000    

        // slab_size: 0x1000, min_objects: 16, size: 64, reserved: 0
        if (slab_size < min_objects * size + reserved)
            continue;

        // slab_size: 0x1000, reserved: 0, size: 64
        rem = (slab_size - reserved) % size;
        // rem: 0    

        // rem: 0, slab_size: 0x1000, fract_leftover: 16
        if (rem <= slab_size / fract_leftover)
            break;
            // 빠져나감

    }
    // order: 0
    return order;
    // return 0
}
여기서 size를 512로 해보자.
min_objects * size한 것이 몇 개의 page를 차지 하는 지 구한다.
여기서 몇개의 page가 order이다.
// min_order: 0, min_objects: 16, size: 64, fls(0x3ff): 10, PAGE_SHIFT: 12
// order: 0, max_order: 3
max(min_order, fls(min_objects size -1) - PAGE_SHIFT);의미
최소 min_objects 개수와 size를 곱한 계산 결과가에 따른 buddy_order를 구한다.
for (order = max(min_order, fls(min_objects 
size - 1) - PAGE_SHIFT);
static int calculate_sizes(struct kmem_cache *s, int forced_order)
{
...
    // forced_order: -1
    if (forced_order >= 0)
        order = forced_order;
    else
        // size: 64, s->reserved: boot_kmem_cache_node.reserved: 0
        order = calculate_order(size, s->reserved);
        // order: 0

slab을 계산하는 방법

1.먼저 order_object()에 가서 reserved 빼고 계산한 object의 최대수를 계산하고
  1. slab_order()에 가서 내가 필요한 object size와 min_objects 수를 곱한 값이 몆개의 order가 되는지 계산한다.
PAGE_ALLOC_COSTLY_ORDER 3으로 정해져 있다.
이 3으로 최대 order 수가 결정된다.
다시말하면 내가 필요한 object size와 min_objects수를 곱한 값과 reserved를 뺀것이 최대 order 3까지 될수 있다.
min_object = 4 (fls(nr_cpu_ids) +1)
= 4 
4
cpu core당 최소 min_object는 4개 이다.
max_object = 이 object를 최대 만들수 있는 object수.
min_object = min(min_object, max_object)
max_object가 큰 값이라면 min_object로 해서 슬랩할당을 한다.
fraction : 16
if(rem <= slab_size / fract_leftover)
슬랩으로 할당하고 남은 메모리 사이즈가 slab_size의 16분의 1보다 크면 order를 올려야 한다.
내부 단편화를 없에기 위해서 .
    // min_objects: 16
    while (min_objects > 1) {
        fraction = 16;

        // fraction: 16
        while (fraction >= 4) {
            // size: 64, min_objects: 16, slub_max_order: 3
            // fraction: 16, reserved: 0
            order = slab_order(size, min_objects,
                    slub_max_order, fraction, reserved);
            // order: 0

            // order: 0, slub_max_order: 3
            if (order <= slub_max_order)
                // order: 0
                return order;
                // return 0

            fraction /= 2;
        }
        min_objects--;
    }
내부 단편화가 발생하면 order를 늘려라.
근대 order가 4라면 if (order <= slub_max_order)에서 3이라로 유지하기 위해서
fraction 을 16에서 8로 낮춘다(4로 낮춘다)
내부 단편화의 범위를 늘려서 해보아도 order가 3이 아닌 4가 된다면
min_objects— 로 해서 min_object를 줄여라.
fraction /= 2;
내부 단편화를 줄이기 위해 사용하는 fraction 사이즈를 줄여도 계산된 order 값이
max_order(3)보다 크면
    order = slab_order(size, 1, slub_max_order, 1, reserved);
    if (order <= slub_max_order)
        return order;
slab의 object를 1개만 요청하여 사용하도록 함
    order = slab_order(size, 1, MAX_ORDER, 1, reserved);
    if (order < MAX_ORDER)
        return order;
계산된 order가 MAX_ORDER보다 크다면 최대 order의 조건을 max_order로 주어 다시 order를 계산.
    // forced_order: -1
    if (forced_order >= 0)
        order = forced_order;
    else
        // size: 64, s->reserved: boot_kmem_cache_node.reserved: 0
        order = calculate_order(size, s->reserved);
        // order: 0
  • order를 계산하는 방법
    • calculate_order()에서 하는 일
    • order_object가 하는 일
      • 현재 order에서 reserved를 제외한 요청한 size를 가지는 사용할 수 있는 object의 수를 구함
    • if(rem <= slab_size / fract_leftover) 의미?
      • slab object를 나누고 남은 나머지 공간이 slab_size의 fract_leftover로 나눈 값보다 크면
      • 메모리 내부 단편화 문제 발생함. 그럴바에는 order를 올려서 내부 단편화를 없애도록 함
    • max(min_order,fls…)의미?
      • 최소 min_objec..
    • fraction /2 의 의미
    • slab object를 1개만 요청하여 사용하로록 함.
    // order: 0
    if (order)
        s->allocflags |= __GFP_COMP;

    // s->flags: boot_kmem_cache_node.flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
    // SLAB_CACHE_DMA: 0x00004000UL
    if (s->flags & SLAB_CACHE_DMA)
        s->allocflags |= GFP_DMA;

    // s->flags: boot_kmem_cache_node.flags: SLAB_HWCACHE_ALIGN: 0x00002000UL
    // SLAB_RECLAIM_ACCOUNT: 0x00020000UL
    if (s->flags & SLAB_RECLAIM_ACCOUNT)
        s->allocflags |= __GFP_RECLAIMABLE;
    // order: 0, size: 64, s->reserved: boot_kmem_cache_node.reserved: 0
    s->oo = oo_make(order, size, s->reserved);
    // s->oo: boot_kmem_cache_node.oo.x: 64

    // size: 64, get_order(64): 0, s->reserved: boot_kmem_cache_node.reserved: 0
    // size: 192, get_order(64): 0, s->reserved: boot_kmem_cache.reserved: 0    
    s->min = oo_make(get_order(size), size, s->reserved);
    // s->min: boot_kmem_cache_node.min.x: 64

slub.c::oo_make()

// order: 0, size: 64, s->reserved: boot_kmem_cache_node.reserved: 0
static inline struct kmem_cache_order_objects oo_make(int order,
        unsigned long size, int reserved)
{
    struct kmem_cache_order_objects x = {
        // order: 0, OO_SHIFT: 16, size: 64, reserved: 0
        // order_objects(0, 64, 0): 0x40
        // order: 0, OO_SHIFT: 16, size: 64, reserved: 0
        // order_objects(0, 192, 0): 0x15
        (order << OO_SHIFT) + order_objects(order, size, reserved)
    };
    // x.x: 64
    // x.x: 21

    return x;
}
// return 21

slub.c:calculate_sizes()

    s->oo = oo_make(order, size, s->reserved);
    // s->oo: boot_kmem_cache_node.oo.x: 64
    // s->oo: boot_kmem_cache_node.oo.x: 21

    // size: 64, get_order(64): 0, s->reserved: boot_kmem_cache_node.reserved: 0
    // size: 192, get_order(192): 0, s->reserved: boot_kmem_cache_node.reserved: 0
    s->min = oo_make(get_order(size), size, s->reserved);
    // s->min: boot_kmem_cache_node.min.x: 64
    // s->min: boot_kmem_cache.min.x: 21

    // s->oo: boot_kmem_cache_node.oo, s->max: boot_kmem_cache_node.max
    // oo_objects(boot_kmem_cache_node.oo): 64, oo_objects(boot_kmem_cache_node.max): 0
    // s->oo: boot_kmem_cache.oo, s->max: boot_kmem_cache_node.max
    // oo_objects(boot_kmem_cache.oo): 21, oo_objects(boot_kmem_cache_node.max): 0
    if (oo_objects(s->oo) > oo_objects(s->max))
        // s->oo: boot_kmem_cache_node.oo.x: 64
        // s->oo: boot_kmem_cache.oo.x: 21
        s->max = s->oo;
        // s->max: boot_kmem_cache_node.max.x: 64
        // s->max: boot_kmem_cache.max.x: 21

    // s->oo: boot_kmem_cache_node.oo, oo_objects(boot_kmem_cache_node.oo): 64
    // s->oo: boot_kmem_cache.oo, oo_objects(boot_kmem_cache.oo): 21
    return !!oo_objects(s->oo);
    // return 1
}
// return 1

slub.c::kmem_cache_open()

static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
{
...
    // s: &boot_kmem_cache_node, -1, calculate_sizes(&boot_kmem_cache_node, -1): 1
    if (!calculate_sizes(s, -1))
        goto error;
// calculate_size가 하는일?
// object size 값에 맞게
// 내부 단편화 문제를 고려하여 최적은 order를 계산한.
// kmem_cache의 맴버 inuse, size, allocflags, min, oo, max 값을 초기화해줌.
// s->inuse: boot_kem_cache.inuse: 132
// s->size: boot_kmem_cache.size: 192
// s->allocflags: boot_kmem_cache.allocflags: 0
// s->min: boot_kmem_cache.min.x: 21
// s->max: boot_kmem_cache.max.x: 21
// s->oo: boot_kmem_cache.oo.x: 21
// 내부 단편화를 줄이면서 slab order를 계산함.
    // s->size: boot_kmem_cache_node.size: 64, ilog2(64): 6
    // s: &boot_kmem_cache_node, 3
    // s->size: boot_kmem_cache.size: 192, ilog2(192): 7
    // s: &boot_kmem_cache, 3
    set_min_partial(s, ilog2(s->size) / 2);

slub.c::set_min_partial()

static void set_min_partial(struct kmem_cache *s, unsigned long min)
{
    // min: 3, MIN_PARTIAL: 5
    // min: 3, MIN_PARTIAL: 5    
    if (min < MIN_PARTIAL)
        min = MIN_PARTIAL;
        // min : 5
        // min : 5        
    else if (min > MAX_PARTIAL)
        min = MAX_PARTIAL;

    // s->min_partial: boot_kmem_cache_node.min_partial: 0, min: 5
    // s->min_partial: boot_kmem_cache.min_partial: 0, min: 5    
    s->min_partial = min;
    // s->min_partial: boot_kmem_cache_node.min_partial: 5
    // s->min_partial: boot_kmem_cache.min_partial: 5    
}

slub.c::kmem_cache_open()

    // s: &boot_kmem_cache_node, kmem_cache_has_cpu_partial(&boot_kmem_cache_node): 1
    // s->size: boot_kmem_cache_node.size: 64, PAGE_SIZE: 0x1000
    // s: &boot_kmem_cache, kmem_cache_has_cpu_partial(&boot_kmem_cache): 1
    // s->size: boot_kmem_cache.size: 192, PAGE_SIZE: 0x1000
    if (!kmem_cache_has_cpu_partial(s))
        s->cpu_partial = 0;
    else if (s->size >= PAGE_SIZE)
        s->cpu_partial = 2;
    else if (s->size >= 1024)
        s->cpu_partial = 6;
    else if (s->size >= 256)
        s->cpu_partial = 13;
    else
        // s->cpu_partial: boot_kmem_cache_node.cpu_partial: 0
        // s->cpu_partial: boot_kmem_cache.cpu_partial: 0        
        s->cpu_partial = 30;
        // boot_kmem_cache_node.cpu_partial: 30
        // boot_kmem_cache.cpu_partial: 30

    // s: &boot_kmem_cache_node, init_kmem_cache_nodes(&boot_kmem_cache_node): 1
    if (!init_kmem_cache_nodes(s))
        goto error;
// s: &boot_kmem_cache_node, init_kmem_cache_nodes(&boot_kmem_cache_node): 1
if (!init_kmem_cache_nodes(s))

slub.c::init_kmem_cache_nodes()

static int init_kmem_cache_nodes(struct kmem_cache *s)
{
    int node;

    // N_NORMAL_MEMORY: 2
    for_each_node_state(node, N_NORMAL_MEMORY) {
    // for ( (node) = 0; (node) == 0; (node) = 1)

        struct kmem_cache_node *n;

        // slab_state: DOWN: 0
        // slab_state: PARTIAL: 1        
        if (slab_state == DOWN) {
        ...
        }
        // kmem_cache_node: &boot_kmem_cache_node 
        // GFP_KERNEL : 0xD0 , node: 0
        n = kmem_cache_alloc_node(kmem_cache_node,
                        GFP_KERNEL, node);
// kmem_cache_node: &boot_kmem_cache_node
// GFP_KERNEL : 0xD0 , node: 0
n = kmem_cache_alloc_node(kmem_cache_node,
GFP_KERNEL, node);

slab.h::kmem_cache_alloc_node()

static __always_inline void *
kmem_cache_alloc_node_trace(struct kmem_cache *s,
                  gfp_t gfpflags,
                  int node, size_t size)
{
    return kmem_cache_alloc_trace(s, gfpflags, size);
}

slub.c::kmem_cache_alloc_node

void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
{
    void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);

    trace_kmem_cache_alloc_node(_RET_IP_, ret,
                    s->object_size, s->size, gfpflags, node);

    return ret;
}

slub.c::slab_alloc_node()

static __always_inline void *slab_alloc_node(struct kmem_cache *s,
        gfp_t gfpflags, int node, unsigned long addr)
{
    void **object;
    struct kmem_cache_cpu *c;
    struct page *page;
    unsigned long tid;

    if (slab_pre_alloc_hook(s, gfpflags))
        return NULL;

slub.c::slab_pre_alloc_hook()

static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
{
    // flags: GPF_KERNEL: 0xD0, gfp_allowed_mask: 0x1ffff2f
    // gfp_allowed_mask: 0x1ffff2f
    // gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
    flags &= gfp_allowed_mask;
    // flags: 0x0

    lockdep_trace_alloc(flags);
    // null function
    might_sleep_if(flags & __GFP_WAIT);
    // ...

    // s->object_size: boot_kmem_cache_node.object_size : 64,
    // flags: 0x0, s->flags: boot_kmem_cache_node: flags : SLAB_HWCACHE_ALIGN: 0x00002000
    // should_fails(64, 0, 0x00002000UL) : 0
    return should_failslab(s->object_size, flags, s->flags);
}
// return 0
static inline bool should_failslab(size_t size, gfp_t gfpflags,
                unsigned long flags)
{
    return false;
}

slub.c::slab_alloc_node()

static __always_inline void *slab_alloc_node(struct kmem_cache *s,
        gfp_t gfpflags, int node, unsigned long addr)
{
    void **object;
    struct kmem_cache_cpu *c;
    struct page *page;
    unsigned long tid;

    // slab_pre_alloc_hook(): 0
    if (slab_pre_alloc_hook(s, gfpflags))
        return NULL;

    s = memcg_kmem_get_cache(s, gfpflags);
    // s: &boot_kmem_cache_node

redo:
    preempt_disable();
    // 선점 카운트 증가, barrier 적용
    // s->cpu_slab: (&boot_kmem_cache_node)->cpu_slab: 0xc0502d00
    c = __this_cpu_ptr(s->cpu_slab);
// s->cpu_slab: (&boot_kmem_cache_node)->cpu_slab: 0xc0502d00
c = __this_cpu_ptr(s->cpu_slab);

percpu.h::__this_cpu_ptr()

// c: (&boot_kmem_cache_node)->cpub_slab + (pcpu_unit_offset[0] + __per_cpu_start에서의 pcpu_base_addr의 옵셋)

slub.c::slab_alloc_node()

static __always_inline void *slab_alloc_node(struct kmem_cache *s,
        gfp_t gfpflags, int node, unsigned long addr)
{
...
redo:
    preempt_disable();
    // 선점 카운트 증가, barrier 적용
    // s->cpu_slab: (&boot_kmem_cache_node)->cpu_slab: 0xc0502d00
    c = __this_cpu_ptr(s->cpu_slab);
    // 

    //
    tid = c->tid;
    // tid: 0    
    preempt_enable();
    // barrier적용, 선점 카운트 감소, should_resched 수행

    // c->freelist의 값을 초기화?
    // pcpu_populate_chunk에서 초기화 하고 왔음. 
    object = c->freelist;
    // object: 0
    // c->page: ...
    page = c->page;
    // page: 0

    // object: 0, page: 0, node: -1
    if (unlikely(!object || !node_match(page, node)))
        object = __slab_alloc(s, gfpflags, node, addr, c);

slub.c::node_match()

static inline int node_match(struct page *page, int node)
{
#ifdef CONFIG_NUMA
    if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node))
        return 0;
#endif
    return 1;
}

slub.c::slab_alloc_node()

static __always_inline void *slab_alloc_node(struct kmem_cache *s,
        gfp_t gfpflags, int node, unsigned long addr)
{
...
redo:
...
    object = c->freelist;
    page = c->page;

    if (unlikely(!object || !node_match(page, node)))
    //
        // addr: _RET_IP
        object = __slab_alloc(s, gfpflags, node, addr, c);
    object = __slab_alloc(s, gfpflags, node, addr, c);

slub.c::__slab_alloc()

static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
              unsigned long addr, struct kmem_cache_cpu *c)
{
    void *freelist;
    struct page *page;
    unsigned long flags;

    local_irq_save(flags);
    // cpsr을 flags에 저장.

#ifdef CONFIG_PREEMPT
    /*
     * We may have been preempted and rescheduled on a different
     * cpu before disabling interrupts. Need to reload cpu area
     * pointer.
     */
    // ...
    c = this_cpu_ptr(s->cpu_slab);
    // (&boot_kmem_cahce...)
#endif

    // c->page:
    page = c->page;
    if (!page)
        goto new_slab;
redo:
...

new_slab:

    if (c->partial) {
        page = c->page = c->partial;
        c->partial = page->next;
        stat(s, CPU_PARTIAL_ALLOC);
        c->freelist = NULL;
        goto redo;
    }

    // s: &boot... 
    freelist = new_slab_objects(s, gfpflags, node, &c);
freelist = new_slab_objects(s, gfpflags, node, &c);

slub.c::new_slab_objects()

static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
            int node, struct kmem_cache_cpu **pc)
{
    void *freelist;
    //
    struct kmem_cache_cpu *c = *pc;
    // 
    struct page *page;
    //

    // s:
    freelist = get_partial(s, flags, node, c);
freelist = get_partial(s, flags, node, c);

slub.c::get_partial()

static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
        struct kmem_cache_cpu *c)
{
    void *object;
    int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;

    object = get_partial_node(s, get_node(s, searchnode), c, flags);
    if (object || node != NUMA_NO_NODE)
        return object;

    return get_any_partial(s, flags, c);
}
numa_node_id()

topology.h::

static inline int numa_node_id(void)
{
    return __this_cpu_read(numa_node);
}

slub.c::get_partial()

static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
        struct kmem_cache_cpu *c)
{
    void *object;
    int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;

    // get_node()
    // c: ..
    // flags: GFP_KERNEL: 0xD0
    object = get_partial_node(s, get_node(s, searchnode), c, flags);

slub.c::get_partial_node()

static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
                struct kmem_cache_cpu *c, gfp_t flags)
{
    struct page *page, *page2;
    void *object = NULL;
    int available = 0;
    int objects;

    /*
     * Racy check. If we mistakenly see no partial slabs then we
     * just allocate an empty slab. If we mistakenly try to get a
     * partial slab and there is none available then get_partials()
     * will return NULL.
     */
    // n->nr_partial: 1 
    if (!n || !n->nr_partial)
        return NULL;
// n->nr_partial: 1  early_kmem_cache_node_alloc() 초기화 한후에 1로 넣었음.
    spin_lock(&n->list_lock);
    // (&boot_kmem_cache_node)->node[0].list_lock의 spinlock획득

    list_for_each_entry_safe(page, page2, &n->partial, lru) {
//    for (page = list_first_entry(&n->partial, typeof(*page), lru),    \
//        page2 = list_next_entry(page, lru);            \
//         &page->lru != (&n->partial);                     \
//         page = page2, page2 = list_next_entry(page2, lru))

        void *t;

        if (!pfmemalloc_match(page, flags))
            continue;

        t = acquire_slab(s, n, page, object == NULL, &objects);
        if (!t)
            break;

        available += objects;
        if (!object) {
            c->page = page;
            stat(s, ALLOC_FROM_PARTIAL);
            object = t;
        } else {
            put_cpu_partial(s, page, 0);
            stat(s, CPU_PARTIAL_NODE);
        }
        if (!kmem_cache_has_cpu_partial(s)
            || available > s->cpu_partial / 2)
            break;

    }
    spin_unlock(&n->list_lock);
    return object;
}
다음시간에 계속해서 list_for_each_entry_safe()루프를 분석할 예정입니다.

댓글 없음:

댓글 쓰기