php内核学习(2)

开始的时候,找函数下断点找得够呛,网上查资料想起来,php内核的操作符都在zend的zend_operators.c里面,应该去这里面找的==操作符的函数下断点,然后进行调试,此模式为cgi模式

test.php

1
2
<?php
var_dump("0e12331"=="0e4543");

cgi_main.c main()

1
2
3
4
5
6
7
8
9
10
11
12
13
if((query_string = getenv("QUERY_STRING")) != NULL && strchr(query_string, '=') == NULL) {
/* we've got query string that has no = - apache CGI will pass it to command line */
unsigned char *p;
decoded_query_string = strdup(query_string);
php_url_decode(decoded_query_string, strlen(decoded_query_string));
for (p = (unsigned char *)decoded_query_string; *p && *p <= ' '; p++) {
/* skip all leading spaces */
}
if(*p == '-') {
skip_getopt = 1;
}
free(decoded_query_string);
}

url中查询中编码的字符串在此处解码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/*
we never take stdin if we're (f)cgi, always
rely on the web server giving us the info
we need in the environment.
*/
if (SG(request_info).path_translated || cgi || fastcgi) {
zend_stream_init_filename(&file_handle, SG(request_info).path_translated);
} else {
zend_stream_init_fp(&file_handle, stdin, "Standard input code");
}

/* request startup only after we've done all we can to
* get path_translated */
if (php_request_startup() == FAILURE) {
if (fastcgi) {
fcgi_finish_request(request, 1);
}
SG(server_context) = NULL;
php_module_shutdown();
return FAILURE;
}

将服务器传输过来的数据存入结构体

main.c php_execute_script()

这个和后面的查找函数,分析数据就和之前第一版的一样了,这里主要是研究php弱类型,所以这次我们进入zend_compile_file()的函数编译阶段

phar.c phar_compile_file()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
if (strstr(file_handle->filename, ".phar") && !strstr(file_handle->filename, "://")) {
if (SUCCESS == phar_open_from_filename((char*)file_handle->filename, strlen(file_handle->filename), NULL, 0, 0, &phar, NULL)) {
if (phar->is_zip || phar->is_tar) {
zend_file_handle f = *file_handle;

/* zip or tar-based phar */
spprintf(&name, 4096, "phar://%s/%s", file_handle->filename, ".phar/stub.php");
if (SUCCESS == phar_orig_zend_open((const char *)name, &f)) {

efree(name);
name = NULL;

f.filename = file_handle->filename;
if (f.opened_path) {
efree(f.opened_path);
}
f.opened_path = file_handle->opened_path;
f.free_filename = file_handle->free_filename;

switch (file_handle->type) {
case ZEND_HANDLE_STREAM:
if (file_handle->handle.stream.closer && file_handle->handle.stream.handle) {
file_handle->handle.stream.closer(file_handle->handle.stream.handle);
}
file_handle->handle.stream.handle = NULL;
break;
default:
break;
}
*file_handle = f;
}
} else if (phar->flags & PHAR_FILE_COMPRESSION_MASK) {
zend_file_handle_dtor(file_handle);
/* compressed phar */
file_handle->type = ZEND_HANDLE_STREAM;
/* we do our own reading directly from the phar, don't change the next line */
file_handle->handle.stream.handle = phar;
file_handle->handle.stream.reader = phar_zend_stream_reader;
file_handle->handle.stream.closer = NULL;
file_handle->handle.stream.fsizer = phar_zend_stream_fsizer;
file_handle->handle.stream.isatty = 0;
phar->is_persistent ?
php_stream_rewind(PHAR_G(cached_fp)[phar->phar_pos].fp) :
php_stream_rewind(phar->fp);
}
}
}

这里对phar的处理,就解释了为什么phar://phar.phar/xxx后面可以是任意的文件名了。

zend_language_scanner.l compile_file()

1
2
3
4
5
6
7
8
9
10
11
12
if (open_file_for_scanning(file_handle)==FAILURE) {
if (!EG(exception)) {
if (type==ZEND_REQUIRE) {
zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename);
zend_bailout();
} else {
zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename);
}
}
} else {
op_array = zend_compile(ZEND_USER_FUNCTION);
}

打开之前传人的文件,即获取文件内容,接着就是调用re2c和bison进行语法解析和词法解析,将代码解析成抽象语法树(ast树)的过程了。

zend_operators.c is_equal_function()

1
2
3
4
5
ZEND_API int ZEND_FASTCALL is_equal_function(zval *result, zval *op1, zval *op2) /* {{{ */
{
ZVAL_BOOL(result, zend_compare(op1, op2) == 0);
return SUCCESS;
}

此处主要是调用zend_compare()来进行比较

zend_operators.c zend_compare()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
while (1) {
switch (TYPE_PAIR(Z_TYPE_P(op1), Z_TYPE_P(op2))) {//根据两个数的类型来进行比较
case TYPE_PAIR(IS_LONG, IS_LONG):
return Z_LVAL_P(op1)>Z_LVAL_P(op2)?1:(Z_LVAL_P(op1)<Z_LVAL_P(op2)?-1:0);

case TYPE_PAIR(IS_DOUBLE, IS_LONG)://将long转成double
return ZEND_NORMALIZE_BOOL(Z_DVAL_P(op1) - (double)Z_LVAL_P(op2));

case TYPE_PAIR(IS_LONG, IS_DOUBLE):
return ZEND_NORMALIZE_BOOL((double)Z_LVAL_P(op1) - Z_DVAL_P(op2));

case TYPE_PAIR(IS_DOUBLE, IS_DOUBLE):
if (Z_DVAL_P(op1) == Z_DVAL_P(op2)) {
return 0;
} else {
return ZEND_NORMALIZE_BOOL(Z_DVAL_P(op1) - Z_DVAL_P(op2));
}

case TYPE_PAIR(IS_ARRAY, IS_ARRAY):
return zend_compare_arrays(op1, op2);

case TYPE_PAIR(IS_NULL, IS_NULL):
case TYPE_PAIR(IS_NULL, IS_FALSE):
case TYPE_PAIR(IS_FALSE, IS_NULL):
case TYPE_PAIR(IS_FALSE, IS_FALSE):
case TYPE_PAIR(IS_TRUE, IS_TRUE):
return 0;

case TYPE_PAIR(IS_NULL, IS_TRUE):
return -1;

case TYPE_PAIR(IS_TRUE, IS_NULL):
return 1;

case TYPE_PAIR(IS_STRING, IS_STRING):
if (Z_STR_P(op1) == Z_STR_P(op2)) {
return 0;
}
return zendi_smart_strcmp(Z_STR_P(op1), Z_STR_P(op2));

case TYPE_PAIR(IS_NULL, IS_STRING):
return Z_STRLEN_P(op2) == 0 ? 0 : -1;

case TYPE_PAIR(IS_STRING, IS_NULL):
return Z_STRLEN_P(op1) == 0 ? 0 : 1;

case TYPE_PAIR(IS_OBJECT, IS_NULL):
return 1;

case TYPE_PAIR(IS_NULL, IS_OBJECT):
return -1;

default:
if (Z_ISREF_P(op1)) {
op1 = Z_REFVAL_P(op1);
continue;
} else if (Z_ISREF_P(op2)) {
op2 = Z_REFVAL_P(op2);
continue;
}

if (Z_TYPE_P(op1) == IS_OBJECT
&& Z_TYPE_P(op2) == IS_OBJECT
&& Z_OBJ_P(op1) == Z_OBJ_P(op2)) {
return 0;
} else if (Z_TYPE_P(op1) == IS_OBJECT) {
return Z_OBJ_HANDLER_P(op1, compare)(op1, op2);
} else if (Z_TYPE_P(op2) == IS_OBJECT) {
return Z_OBJ_HANDLER_P(op2, compare)(op1, op2);
}

if (!converted) {
if (Z_TYPE_P(op1) < IS_TRUE) {
return zval_is_true(op2) ? -1 : 0;
} else if (Z_TYPE_P(op1) == IS_TRUE) {
return zval_is_true(op2) ? 0 : 1;
} else if (Z_TYPE_P(op2) < IS_TRUE) {
return zval_is_true(op1) ? 1 : 0;
} else if (Z_TYPE_P(op2) == IS_TRUE) {
return zval_is_true(op1) ? 0 : -1;
} else {
op1 = _zendi_convert_scalar_to_number(op1, &op1_copy);
op2 = _zendi_convert_scalar_to_number(op2, &op2_copy);
if (EG(exception)) {
return 1; /* to stop comparison of arrays */
}
converted = 1;
}
} else if (Z_TYPE_P(op1)==IS_ARRAY) {
return 1;
} else if (Z_TYPE_P(op2)==IS_ARRAY) {
return -1;
} else {
ZEND_ASSERT(0);
zend_throw_error(NULL, "Unsupported operand types");
return 1;
}
}
}

本次进入case TYPE_PAIR(IS_STRING, IS_STRING),然后跟进zendi_smart_strcmp()

zend_operators.c zendi_smart_strcmp()

1
2
if ((ret1 = is_numeric_string_ex(s1->val, s1->len, &lval1, &dval1, 0, &oflow1)) &&
(ret2 = is_numeric_string_ex(s2->val, s2->len, &lval2, &dval2, 0, &oflow2)))

跟进is_numeric_string_ex()

zend_operators.c _is_numeric_string_ex()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
/* Skip any whitespace
* This is much faster than the isspace() function */
while (*str == ' ' || *str == '\t' || *str == '\n' || *str == '\r' || *str == '\v' || *str == '\f') {
str++;
length--;
}
ptr = str;

if (*ptr == '-') {
neg = 1;
ptr++;
} else if (*ptr == '+') {
ptr++;
}

if (ZEND_IS_DIGIT(*ptr)) {
/* Skip any leading 0s */
while (*ptr == '0') {
ptr++;
}
/* Count the number of digits. If a decimal point/exponent is found,
* it's a double. Otherwise, if there's a dval or no need to check for
* a full match, stop when there are too many digits for a long */
for (type = IS_LONG; !(digits >= MAX_LENGTH_OF_LONG && (dval || allow_errors == 1)); digits++, ptr++) {
check_digits:
if (ZEND_IS_DIGIT(*ptr)) {
tmp_lval = tmp_lval * 10 + (*ptr) - '0';
continue;
} else if (*ptr == '.' && dp_or_e < 1) {
goto process_double;
} else if ((*ptr == 'e' || *ptr == 'E') && dp_or_e < 2) {//检查科学计数法
const char *e = ptr + 1;

if (*e == '-' || *e == '+') {
ptr = e++;
}
if (ZEND_IS_DIGIT(*e)) {
goto process_double;
}
}

break;
}

这是这个函数开始对字符串进行的过滤,过滤掉那些对字符串转数字有影响的字符。过滤完成以后再检查小数的位数,因为检查到了e,然后再检查后面的数字,其中local_dval = zend_strtod(str, &ptr);很关键,我们可以看看zned_strtod()的内容。

1
2
3
4
if (se)
*se = (char *)s;//此处将后面的数字转化为字符
return sign ? -dval(&rv) : dval(&rv);
}

在两个函数都执行完这一套流程后,就进行比较,最后返回true