Daniel Jslin

May the source be with you

核心參數解析

Linux 核心啟動時會解析由 bootloader 傳來的參數,來設定核心運作時的行為。

傳給核心的參數是以空格分隔的字串,通常的型式如下:

param[=value_1][,value_2]...[,value_10]

"param" 是關鍵字,後面跟著設定值,以逗號分隔。一個關鍵字後面最多可以有 10 個值,但是可以透過再次使用同一關鍵字的方式,傳遞超過 10 個以上的參數。

舉例來說,bootloader 可以透過 "init=..." 指定系統初始化的程序;或是透過 "root=..." 指定 root filesystem device,如: "root=/dev/hda3"。

這個由 bootloader 傳給核心的參數字串也可以包含傳給 init 程序的參數,核心只會解析到 "--" 之前的字串,在 "--" 之後的字串會被當成傳給 init 程序的參數。

詳細的核心參數說明,請參考下面兩份文件:

Linux 核心跟據不同的架構對 bootloader 有不同的要求。以 x86 架構來說,核心會需要 bootloader 將 struct boot_params 的資料準備好,然後在核心啟動時傳給核心。核心參數字串就是包含在其中的 struct setup_header 結構內的 cmd_line_ptr。

struct setup_header {
    __u8    setup_sects;
    __u16   root_flags;
    __u32   syssize;
    __u16   ram_size;
    ... omitted ...

    __u32   cmd_line_ptr;
    ... omitted ...
    __u32   cmdline_size;
    ... omitted ...

} __attribute__((packed));

struct boot_params {
    struct screen_info screen_info;             /* 0x000 */
    struct apm_bios_info apm_bios_info;         /* 0x040 */
    __u8  _pad2[4];                             /* 0x054 */

    ... omitted ...

    struct setup_header hdr;    /* setup header */  /* 0x1f1 */
    __u8  _pad7[0x290-0x1f1-sizeof(struct setup_header)];
    ... omitted ...
} __attribute__((packed));

ARM 架構則是要求 bootloader 以 struct tag 格式將資料傳給核心。

struct tag {
    struct tag_header hdr;
    union {
        struct tag_core     core;
        struct tag_mem32    mem;
        struct tag_videotext    videotext;
        struct tag_ramdisk  ramdisk;
        struct tag_initrd   initrd;
        struct tag_serialnr serialnr;
        struct tag_revision revision;
        struct tag_videolfb videolfb;
        struct tag_cmdline  cmdline;

        ... omitted ...
    } u;
};

核心會在 start_kernel() 內對傳到核心的參數字串進行解析。主要會分為兩個階段,分別由 parse_early_param()parse_args() 這兩個函式進行解析。

asmlinkage __visible void __init start_kernel(void)
{
    char *command_line;  // a pointer to the kernel command line
    char *after_dashes;

    ... omitted ...

    setup_arch(&command_line);        // architecture-specific setup

    setup_command_line(command_line); // store the untouched command line

    ... omitted ...


    pr_notice("Kernel command line: %s\n", boot_command_line);
    parse_early_param();                         // parse options for early_param()
    after_dashes = parse_args("Booting kernel",  // parse options for module_param(), module_param_named(), core_param()
                  static_command_line, __start___param,
                  __stop___param - __start___param,
                  -1, -1, &unknown_bootoption);  // parse options for __setup()
    if (!IS_ERR_OR_NULL(after_dashes))
        parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,  // after_dashes will be passed to the init process as argv
               set_init_arg);

    ... omitted ...

}

early_param

有些參數有較高的優先權,需要先被處理,這類的參數被稱為 early_param。舉例來說,像 log level 的設定會影響訊息的輸出,若太晚生效的話,有些除錯訊息可能就不會被看到。

Early option example

debug       [KNL] Enable kernel debugging (events log level).

quiet       [KNL] Disable most log messages

loglevel=   All Kernel Messages with a loglevel smaller than the
            console loglevel will be printed to the console. It can
            also be changed with klogd or other programs. The
            loglevels are defined as follows:

            0 (KERN_EMERG)      system is unusable
            1 (KERN_ALERT)      action must be taken immediately
            2 (KERN_CRIT)       critical conditions
            3 (KERN_ERR)        error conditions
            4 (KERN_WARNING)    warning conditions
            5 (KERN_NOTICE)     normal but significant condition
            6 (KERN_INFO)       informational
            7 (KERN_DEBUG)      debug-level messages

以 "debug" 這個參數為例,它的功用是把 log level 設到 debug-level,允許更多的訊息輸出方便除錯。為了處理 "debug" 參數的設定,在核心內部定義了一個 debug_kernel() 函式負責處理 "debug" 參數的設定,然後利用 early_param() 這個 macro 將 "debug" 和處理函式 debug_kernel 關聯起來。"quiet" 參數也是一樣的處理方式。

static int __init debug_kernel(char *str)
{
    console_loglevel = CONSOLE_LOGLEVEL_DEBUG;  // console_loglevel = 10
    return 0;
}

static int __init quiet_kernel(char *str)
{
    console_loglevel = CONSOLE_LOGLEVEL_QUIET;  // console_loglevel = 4
    return 0;
}

early_param("debug", debug_kernel);
early_param("quiet", quiet_kernel);

在 4.1 的核心中,主要有兩種方式處理核心參數,其中一種較單純,使用 struct obs_kernel_param 來記錄字串參數與處理函式的關聯,並透過 __setup_param() macro 來設定。early_param() 屬於這種方式。

struct obs_kernel_param {
    const char *str;             // param's name
    int (*setup_func)(char *);   // handler function
    int early;                   // true if it's a early_param
};

/*
 * Only for really core code.  See moduleparam.h for the normal way.
 *
 * Force the alignment so the compiler doesn't space elements of the
 * obs_kernel_param "array" too far apart in .init.setup.
 */
#define __setup_param(str, unique_id, fn, early)              \
    static const char __setup_str_##unique_id[] __initconst   \
        __aligned(1) = str;                                   \
    static struct obs_kernel_param __setup_##unique_id        \
        __used __section(.init.setup)                         \
        __attribute__((aligned((sizeof(long)))))              \
        = { __setup_str_##unique_id, fn, early }
#define __setup(str, fn)   \
    __setup_param(str, fn, fn, 0)

/*
 * NOTE: fn is as per module_param, not __setup!
 * Emits warning if fn returns non-zero.
 */
#define early_param(str, fn)   \
    __setup_param(str, fn, fn, 1)

值得注意的是: __setup_param() 會對 struct obs_kernel_param 附加 __section(.init.setup) 屬性,透過這個 macro 定義的資料會被放在 .init.setup section 之中。

parse_early_param

parse_early_param() 如其名,是核心用來解析 early_param 的函式。

init/main.c: parse_early_param()

/* Arch code calls this early on, or if not, just before other parsing. */
void __init parse_early_param(void)
{
    static int done __initdata;
    static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;

    if (done)
        return;

    /* All fall through to do_early_param. */
    strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
    parse_early_options(tmp_cmdline);
    done = 1;
}

void __init parse_early_options(char *cmdline)
{
    parse_args("early options", cmdline, /*kernel_param=*/NULL, 0, 0, 0, do_early_param);
}

上面的 parse_args() 會將 cmdline 拆成一組一組的 param, value,傳入 do_early_param()

/* Check for early params. */
static int __init do_early_param(char *param, char *val, const char *unused)
{
    const struct obs_kernel_param *p;

    for (p = __setup_start; p < __setup_end; p++) {
        if ((p->early && parameq(param, p->str)) ||          // if p->early is true
            (strcmp(param, "console") == 0 &&
             strcmp(p->str, "earlycon") == 0)
        ) {
            if (p->setup_func(val) != 0)                     // call param's setup_func()
                pr_warn("Malformed early option '%s'\n", param);
        }
    }
    /* We accept everything at this stage. */
    return 0;
}

還記得上面提到 __setup_param() 會附加 __section(.init.setup) 屬性到 struct obs_kernel_param 的資料上嗎?核心在這邊使用了一個技巧,透過這個屬性,讓 linker 把 struct obs_kernel_param 的資料集中在一起,然後利用 __setup_start 和 __setup_end 將這個區域標示出來。

__setup_start 和 __setup_end 實際上是被定義在 linker script include/asm-generic/vmlinux.lds.h 之中:

#define INIT_SETUP(initsetup_align)         \
    . = ALIGN(initsetup_align);             \
    VMLINUX_SYMBOL(__setup_start) = .;      \
    *(.init.setup)                          \
    VMLINUX_SYMBOL(__setup_end) = .;

觀察 System.map 可以了解實際的布局 (下面是 x86_64 的 System.map)

ffffffff81e7c9b0 T __setup_start
ffffffff81e7c9c8 t __setup_rdinit_setup
ffffffff81e7c9e0 t __setup_init_setup
ffffffff81e7c9f8 t __setup_loglevel
ffffffff81e7ca10 t __setup_quiet_kernel
ffffffff81e7ca28 t __setup_debug_kernel
...
ffffffff81e7e5b8 T __setup_end

了解這個技巧,do_early_param() 的程式碼就很容易理解了,基本上就是逐個比較 struct obs_kernel_param 是不是符合傳進來的 param name,若符合則將值傳入 setup_func() 中進行設定。

Module param

上面討論的 obs_kernel_param 的 obs 是 obsolete 的意思,看起來是打算讓它功成身退,但是一直沒動作,就這樣一直放著...。 而用來取代舊的參數處理結構的是升級後的 struct kernel_param。從它被放在 include/linux/moduleparam.h 大概可以猜測新的結構主要是以支援模組參數為主,擴增到一般性的核心參數。

struct kernel_param {
    const char *name;
    const struct kernel_param_ops *ops;
    u16 perm;
    s8 level;
    u8 flags;
    union {
        void *arg;
        const struct kparam_string *str;
        const struct kparam_array *arr;
    };
};

新的 struct kernel_param 更為一般化,物件化,kernel_param_ops 提供 set/get method,負責參數的設定與保存。除了在啟動時可以經由 bootloader 設定參數,開機之後,kernel_param 也會被掛載到 sysfs 上,可以透過 /sys/module/<module-name>/parameters/<param-name> 讀取或設定參數。

struct kernel_param_ops {
    /* How the ops should behave */
    unsigned int flags;
    /* Returns 0, or -errno.  arg is in kp->arg. */
    int (*set)(const char *val, const struct kernel_param *kp);
    /* Returns length written or -errno.  Buffer is 4k (ie. be short!) */
    int (*get)(char *buffer, const struct kernel_param *kp);
    /* Optional function to free kp->arg when module unloaded. */
    void (*free)(void *arg);
};

/* Special one for strings we want to copy into */
struct kparam_string {
    unsigned int maxlen;
    char *string;
};

/* Special one for arrays */
struct kparam_array
{
    unsigned int max;
    unsigned int elemsize;
    unsigned int *num;
    const struct kernel_param_ops *ops;
    void *elem;
};
/* This is the fundamental function for registering boot/module parameters. */
#define __module_param_call(prefix, name, ops, arg, perm, level, flags) \
    /* Default value instead of permissions? */                         \
    static const char __param_str_##name[] = prefix #name;              \
    static struct kernel_param __moduleparam_const __param_##name       \
    __used                                                              \
    __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \
    = { __param_str_##name, ops, VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } }

struct kernel_param 主要是透過 __module_param_call() 這個 macro 來設定,與 struct obs_kernel_param 類似,這個 macro 會將 __section__ ("__param") 的屬性設給 struct kernel_param 的資料,讓 linker 將資料集中在一起。

另外,在上面的 macro 中我們可以發現如下的參數名宣告:

static const char __param_str_##name[] = prefix #name;

這個字串會被指派給 struct kernel_param 的 name 欄位。新的 struct kernel_param 被引入的同時,參數的命名方式也做了調整,新的命名方式引入了 module name 當作前綴 (prefix),這是一種命名空間 (namespace) 的概念,新的命名規則讓各個模組更容易擁有屬於自已的參數,而不必太過煩腦命名衝突的問題。

模組參數通常以 module_param() 或是以 module_param_named() 來宣告。 以 "printk" 模組的 "time" 參數為例 (kernel/printk/printk.c: printk_time),宣告如下:

static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME);
module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
  • name: "time" => "printk.time" with KBUILD_MODNAME "printk" and a "." as prefix

  • value: printk_time (the actual lvalue to alter)

  • type: bool => param_ops_bool (the set & get operations for this parameter)

  • perm: S_IRUGO | S_IWUSR

    the parameter "printk.time" can also be found at /sys/module/printk/parameters/time

值的特別一提的是第三個參數 type: 新的 struct kernel_param 針對像 bool, int, byte ... 等基礎的通用型態提供共用的 struct kernel_param_ops 及其 set/get method,並以 param_ops_##type 命名。在上面例子中,"printk.time" 是 bool 型態的參數,會透過 param_ops_bool 存取設定。其他支援的型態請參照下面 module_param() 的註解說明。

/**
 * module_param - typesafe helper for a module/cmdline parameter
 * @value: the variable to alter, and exposed parameter name.
 * @type: the type of the parameter
 * @perm: visibility in sysfs.
 *
 * @value becomes the module parameter, or (prefixed by KBUILD_MODNAME and a
 * ".") the kernel commandline parameter.  Note that - is changed to _, so
 * the user can use "foo-bar=1" even for variable "foo_bar".
 *
 * @perm is 0 if the the variable is not to appear in sysfs, or 0444
 * for world-readable, 0644 for root-writable, etc.  Note that if it
 * is writable, you may need to use kparam_block_sysfs_write() around
 * accesses (esp. charp, which can be kfreed when it changes).
 *
 * The @type is simply pasted to refer to a param_ops_##type and a
 * param_check_##type: for convenience many standard types are provided but
 * you can create your own by defining those variables.
 *
 * Standard types are:
 *  byte, short, ushort, int, uint, long, ulong
 *  charp: a character pointer
 *  bool: a bool, values 0/1, y/n, Y/N.
 *  invbool: the above, only sense-reversed (N = true).
 */
#define module_param(name, type, perm)   \
    module_param_named(name, name, type, perm)


/**
 * module_param_named - typesafe helper for a renamed module/cmdline parameter
 * @name: a valid C identifier which is the parameter name.
 * @value: the actual lvalue to alter.
 * @type: the type of the parameter
 * @perm: visibility in sysfs.
 *
 * Usually it's a good idea to have variable names and user-exposed names the
 * same, but that's harder if the variable must be non-static or is inside a
 * structure.  This allows exposure under a different name.
 */
#define module_param_named(name, value, type, perm)            \
    param_check_##type(name, &(value));                        \
    module_param_cb(name, &param_ops_##type, &value, perm);    \
    __MODULE_PARM_TYPE(name, #type)


/**
 * module_param_cb - general callback for a module/cmdline parameter
 * @name: a valid C identifier which is the parameter name.
 * @ops: the set & get operations for this parameter.
 * @perm: visibility in sysfs.
 *
 * The ops can have NULL set or get functions.
 */
#define module_param_cb(name, ops, arg, perm)  \
    __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1, 0)

除了 module 外,struct kernel_param 也可用在一般核心的參數設定上,使用 core_param() macro 來宣告,因為不是給特定模組的參數,所以前綴 (prefix) 為空。

/**
 * core_param - define a historical core kernel parameter.
 * @name: the name of the cmdline and sysfs parameter (often the same as var)
 * @var: the variable
 * @type: the type of the parameter
 * @perm: visibility in sysfs
 *
 * core_param is just like module_param(), but cannot be modular and
 * doesn't add a prefix (such as "printk.").  This is for compatibility
 * with __setup(), and it makes sense as truly core parameters aren't
 * tied to the particular file they're in.
 */
#define core_param(name, var, type, perm)    \
    param_check_##type(name, &(var));        \
    __module_param_call("", name, &param_ops_##type, &var, perm, -1, 0)

parse_args

parse_args() 負責解析字串參數,處理屬於 struct kernel_param 的參數設定,在核心啟動時由 start_kernel() 處進入。

asmlinkage __visible void __init start_kernel(void)
{
    char *command_line;  // a pointer to the kernel command line
    char *after_dashes;

    ... omitted ...

    pr_notice("Kernel command line: %s\n", boot_command_line);
    parse_early_param();              // parse options for early_param()
    after_dashes = parse_args("Booting kernel", static_command_line,  // parse options for module_param(), module_param_named(), core_param()
                  __start___param,                   // array of struct kernel_param
                  __stop___param - __start___param,  // number of params
                  -1, -1, &unknown_bootoption);      // parse options for __setup()

    ... omitted ...

}

parse_args() 要求提供 kernel_param 的處理陣列。 這邊和上面提到的 do_early_param() 使用一樣的技巧, __start___param 和 __stop___param 被定義在 linker script include/asm-generic/vmlinux.lds.h 裡,中間是 __param section, 由 module_param(), module_param_named(), core_param() 等 macro 宣告的 struct kernel_param 參數處理結構。

/* Built-in module parameters. */             \
__param : AT(ADDR(__param) - LOAD_OFFSET) {   \
    VMLINUX_SYMBOL(__start___param) = .;      \
    *(__param)                                \
    VMLINUX_SYMBOL(__stop___param) = .;       \
}

來看一下 System.map (x86_64),觀察一下實際的布局情形:

ffffffff81ba3768 R __start___param
ffffffff81ba3788 r __param_dis_ucode_ldr
ffffffff81ba37a8 r __param_trace_pc
ffffffff81ba37c8 r __param_nommiotrace
ffffffff81ba37e8 r __param_filter_offset
ffffffff81ba3808 r __param_panic_on_warn
ffffffff81ba3828 r __param_pause_on_oops
ffffffff81ba3848 r __param_panic
...
ffffffff81ba55e8 R __stop___param

parse_args() 的實作:

  • 由 next_arg() 解析出下一對的 param, value 組合
  • 由 parse_one() 對應到符合的 struct kernel_param 資料
/* Args looks like "foo=bar,bar2 baz=fuz wiz". */
char *parse_args(const char *doing,  // doing message. e.g. "early options", "Booting kernel", "Setting init args"
         char *args,                 // command line string
         const struct kernel_param *params, unsigned num,  // array of struct kernel_param
         s16 min_level, s16 max_level,
         int (*unknown)(char *param, char *val, const char *doing))
{
    char *param, *val;

    /* Chew leading spaces */
    args = skip_spaces(args);

    if (*args)
        pr_debug("doing %s, parsing ARGS: '%s'\n", doing, args);

    while (*args) {
        int ret;
        int irq_was_disabled;

        args = next_arg(args, &param, &val);  // get the next [param, val] pair
        /* Stop at -- */
        if (!val && strcmp(param, "--") == 0)
            return args;
        irq_was_disabled = irqs_disabled();
        ret = parse_one(param, val, doing, params, num,
                min_level, max_level, unknown);
        if (irq_was_disabled && !irqs_disabled())
            pr_warn("%s: option '%s' enabled irq's!\n", doing, param);

        ... omitted ...
    }

    /* All parsed OK. */
    return NULL;
}

parse_one() 以線性搜尋的方式,比對出符合的 struct kernel_param。

static int parse_one(char *param, char *val,  // the [param, val] pair
             const char *doing,
             const struct kernel_param *params, unsigned num_params,
             s16 min_level, s16 max_level,
             int (*handle_unknown)(char *param, char *val, const char *doing))
{
    unsigned int i;
    int err;

    /* Find parameter */
    for (i = 0; i < num_params; i++) {
        if (parameq(param, params[i].name)) {     // match param's name with kernel_param
            if (params[i].level < min_level || params[i].level > max_level)
                return 0;
            /* No one handled NULL, so do it here. */
            if (!val && !(params[i].ops->flags & KERNEL_PARAM_OPS_FL_NOARG))
                return -EINVAL;
            pr_debug("handling %s with %p\n", param, params[i].ops->set);
            mutex_lock(&param_lock);
            param_check_unsafe(&params[i]);
            err = params[i].ops->set(val, &params[i]);   // call kernel_param_ops->set()
            mutex_unlock(&param_lock);
            return err;
        }
    }

    if (handle_unknown) {   // handle_unknown = do_early_param(), or unknown_bootoption(), ...
        pr_debug("doing %s: %s='%s'\n", doing, param, val);
        return handle_unknown(param, val, doing);
    }

    pr_debug("Unknown argument '%s'\n", param);
    return -ENOENT;
}

Comments