Skip to content

type trees #7

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
initial type tree implementation
  • Loading branch information
withinboredom committed Mar 29, 2025
commit 155350dc1f9dda3466d8e3e3d13558a7b05ba989
226 changes: 223 additions & 3 deletions Zend/zend_API.c
Original file line number Diff line number Diff line change
Expand Up @@ -2898,7 +2898,223 @@ ZEND_API void zend_add_magic_method(zend_class_entry *ce, zend_function *fptr, z
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arg_info_toString, 0, 0, IS_STRING, 0)
ZEND_END_ARG_INFO()

static zend_always_inline void zend_normalize_internal_type(zend_type *type) {
static HashTable *interned_type_tree = NULL;

// todo: move to zend_types.h
#define ADD_TO_TREE(list, count, value) \
do { \
list = erealloc(list, sizeof(zend_type) * (count + 1)); \
list[count++] = value; \
} while (0)

static int compare_simple_types(zend_type a, zend_type b) {
uint32_t a_mask = ZEND_TYPE_FULL_MASK(a);
uint32_t b_mask = ZEND_TYPE_FULL_MASK(b);

if (a_mask != b_mask) {
return a_mask < b_mask ? -1 : 1;
}

bool a_has_name = ZEND_TYPE_HAS_NAME(a);
bool b_has_name = ZEND_TYPE_HAS_NAME(b);

if (a_has_name && b_has_name) {
zend_string *a_name = ZEND_TYPE_NAME(a);
zend_string *b_name = ZEND_TYPE_NAME(b);
int cmp = ZSTR_VAL(a_name) == ZSTR_VAL(b_name);
if (cmp != 0) {
return cmp;
}
}

bool a_nullable = ZEND_TYPE_ALLOW_NULL(a);
bool b_nullable = ZEND_TYPE_ALLOW_NULL(b);

if (a_nullable != b_nullable) {
return a_nullable ? 1 : -1;
}

// Types are equal
return 0;
}

static int compare_type_nodes(const void *a_, const void *b_) {
zend_type_node *a = *(zend_type_node **)a_;
zend_type_node *b = *(zend_type_node **)b_;

if (a->kind != b->kind) {
return a->kind - b->kind;
}

if (a->kind == ZEND_TYPE_SIMPLE) {
return compare_simple_types(a->simple_type, b->simple_type);
}

if (a->compound.num_types != b->compound.num_types) {
return (int)a->compound.num_types - (int)b->compound.num_types;
}

for (uint32_t i = 0; i < a->compound.num_types; i++) {
const int cmp = compare_type_nodes(&a->compound.types[i], &b->compound.types[i]);
if (cmp != 0) {
return cmp;
}
}

return 0;
}

zend_ulong zend_type_node_hash(zend_type_node *node) {
zend_ulong hash = 2166136261u; // FNV-1a offset basis

hash ^= (zend_ulong)node->kind;
hash *= 16777619;

switch (node->kind) {
case ZEND_TYPE_SIMPLE: {
zend_type type = node->simple_type;
hash ^= (zend_ulong)ZEND_TYPE_FULL_MASK(type);
hash *= 16777619;

if (ZEND_TYPE_HAS_NAME(type)) {
zend_string *name = ZEND_TYPE_NAME(type);
hash ^= zend_string_hash_val(name);
hash *= 16777619;
}

break;
}

case ZEND_TYPE_UNION:
case ZEND_TYPE_INTERSECTION: {
for (uint32_t i = 0; i < node->compound.num_types; ++i) {
zend_ulong child_hash = zend_type_node_hash(node->compound.types[i]);
hash ^= child_hash;
hash *= 16777619;
}
break;
}
}

return hash;
}

bool zend_type_node_equals(zend_type_node *a, zend_type_node *b) {
if (a == b) return true;
if (a->kind != b->kind) return false;

if (a->kind == ZEND_TYPE_SIMPLE) {
zend_type at = a->simple_type;
zend_type bt = b->simple_type;

if (ZEND_TYPE_FULL_MASK(at) != ZEND_TYPE_FULL_MASK(bt)) {
return false;
}

bool a_has_name = ZEND_TYPE_HAS_NAME(at);
bool b_has_name = ZEND_TYPE_HAS_NAME(bt);
if (a_has_name != b_has_name) {
return false;
}

if (a_has_name) {
zend_string *a_name = ZEND_TYPE_NAME(at);
zend_string *b_name = ZEND_TYPE_NAME(bt);
if (!zend_string_equals(a_name, b_name)) {
return false;
}
}

return true;
}

// Compound type: union or intersection
if (a->compound.num_types != b->compound.num_types) {
return false;
}

for (uint32_t i = 0; i < a->compound.num_types; ++i) {
if (!zend_type_node_equals(a->compound.types[i], b->compound.types[i])) {
return false;
}
}

return true;
}


static zend_type_node *intern_type_node(zend_type_node *node) {
zend_ulong hash = zend_type_node_hash(node);
zend_type_node *existing;

if (interned_type_tree == NULL) {
interned_type_tree = pemalloc(sizeof(HashTable), 1);
zend_hash_init(interned_type_tree, 64, NULL, NULL, 1);
}

if ((existing = zend_hash_index_find_ptr(interned_type_tree, hash))) {
if (zend_type_node_equals(existing, node)) {
return existing; // reuse interned node
}
}

zend_hash_index_add_new_ptr(interned_type_tree, hash, node);
return node;
}


ZEND_API zend_type_node *zend_type_to_interned_tree(zend_type type) {
if (type.type_mask == 0) {
return NULL;
}

if (!ZEND_TYPE_HAS_LIST(type)) {
zend_type_node *node = pemalloc(sizeof(zend_type_node), 1);
node->kind = ZEND_TYPE_SIMPLE;
node->simple_type = type;
return intern_type_node(node);
}

zend_type_list *list = ZEND_TYPE_LIST(type);
zend_type_node_kind kind = ZEND_TYPE_IS_INTERSECTION(type) ?
ZEND_TYPE_INTERSECTION : ZEND_TYPE_UNION;

zend_type_node **children = NULL;
uint32_t num_children = 0;

zend_type *subtype;

ZEND_TYPE_LIST_FOREACH(list, subtype) {
zend_type_node *child = zend_type_to_interned_tree(*subtype);

if (child->kind == kind) {
for (uint32_t i = 0; child->compound.num_types; i++) {
ADD_TO_TREE(children, num_children, child->compound.types[i]);
}
} else {
ADD_TO_TREE(children, num_children, child);
}
} ZEND_TYPE_LIST_FOREACH_END();

qsort(children, num_children, sizeof(zend_type_node*), compare_type_nodes);

size_t deduped_count = 0;
for (size_t i = 0; i < num_children; i++) {
if (i == 0 || compare_type_nodes(&children[i], &children[i - 1]) != 0) {
children[deduped_count++] = children[i];
}
}

zend_type_node *node = pemalloc(sizeof(zend_type_node), 1);
node->kind = kind;
node->compound.num_types = deduped_count;
node->compound.types = pemalloc(sizeof(zend_type_node *) * deduped_count, 1);
memcpy(node->compound.types, children, sizeof(zend_type_node *) * deduped_count);

return intern_type_node(node);
}

static zend_always_inline zend_type_node *zend_normalize_internal_type(zend_type *type) {
ZEND_ASSERT(!ZEND_TYPE_HAS_LITERAL_NAME(*type));
if (ZEND_TYPE_PURE_MASK(*type) != MAY_BE_ANY) {
ZEND_ASSERT(!ZEND_TYPE_CONTAINS_CODE(*type, IS_RESOURCE) && "resource is not allowed in a zend_type");
Expand All @@ -2921,6 +3137,8 @@ static zend_always_inline void zend_normalize_internal_type(zend_type *type) {
} ZEND_TYPE_FOREACH_END();
}
} ZEND_TYPE_FOREACH_END();

return zend_type_to_interned_tree(*type);
}

/* registers all functions in *library_functions in the function hash */
Expand Down Expand Up @@ -3190,7 +3408,7 @@ ZEND_API zend_result zend_register_functions(zend_class_entry *scope, const zend
new_arg_info[i].type = legacy_iterable;
}

zend_normalize_internal_type(&new_arg_info[i].type);
new_arg_info[i].type_tree = zend_normalize_internal_type(&new_arg_info[i].type);
}
}

Expand Down Expand Up @@ -4665,7 +4883,9 @@ ZEND_API zend_property_info *zend_declare_typed_property(zend_class_entry *ce, z
property_info->type = type;

if (is_persistent_class(ce)) {
zend_normalize_internal_type(&property_info->type);
property_info->type_tree = zend_normalize_internal_type(&property_info->type);
} else {
property_info->type_tree = zend_type_to_interned_tree(property_info->type);
}

zend_hash_update_ptr(&ce->properties_info, name, property_info);
Expand Down
40 changes: 21 additions & 19 deletions Zend/zend_API.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,46 +128,46 @@ typedef struct _zend_fcall_info_cache {

/* Arginfo structures without type information */
#define ZEND_ARG_INFO(pass_by_ref, name) \
{ #name, ZEND_TYPE_INIT_NONE(_ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), NULL },
{ #name, ZEND_TYPE_INIT_NONE(_ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), NULL, NULL },
#define ZEND_ARG_INFO_WITH_DEFAULT_VALUE(pass_by_ref, name, default_value) \
{ #name, ZEND_TYPE_INIT_NONE(_ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), default_value },
{ #name, ZEND_TYPE_INIT_NONE(_ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), NULL, default_value },
#define ZEND_ARG_VARIADIC_INFO(pass_by_ref, name) \
{ #name, ZEND_TYPE_INIT_NONE(_ZEND_ARG_INFO_FLAGS(pass_by_ref, 1, 0)), NULL },
{ #name, ZEND_TYPE_INIT_NONE(_ZEND_ARG_INFO_FLAGS(pass_by_ref, 1, 0)), NULL, NULL },

/* Arginfo structures with simple type information */
#define ZEND_ARG_TYPE_INFO(pass_by_ref, name, type_hint, allow_null) \
{ #name, ZEND_TYPE_INIT_CODE(type_hint, allow_null, _ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), NULL },
{ #name, ZEND_TYPE_INIT_CODE(type_hint, allow_null, _ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), NULL, NULL },
#define ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(pass_by_ref, name, type_hint, allow_null, default_value) \
{ #name, ZEND_TYPE_INIT_CODE(type_hint, allow_null, _ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), default_value },
{ #name, ZEND_TYPE_INIT_CODE(type_hint, allow_null, _ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), NULL, default_value },
#define ZEND_ARG_VARIADIC_TYPE_INFO(pass_by_ref, name, type_hint, allow_null) \
{ #name, ZEND_TYPE_INIT_CODE(type_hint, allow_null, _ZEND_ARG_INFO_FLAGS(pass_by_ref, 1, 0)), NULL },
{ #name, ZEND_TYPE_INIT_CODE(type_hint, allow_null, _ZEND_ARG_INFO_FLAGS(pass_by_ref, 1, 0)), NULL, NULL },

/* Arginfo structures with complex type information */
#define ZEND_ARG_TYPE_MASK(pass_by_ref, name, type_mask, default_value) \
{ #name, ZEND_TYPE_INIT_MASK(type_mask | _ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), default_value },
{ #name, ZEND_TYPE_INIT_MASK(type_mask | _ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), NULL, default_value },
#define ZEND_ARG_OBJ_TYPE_MASK(pass_by_ref, name, class_name, type_mask, default_value) \
{ #name, ZEND_TYPE_INIT_CLASS_CONST_MASK(#class_name, type_mask | _ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), default_value },
{ #name, ZEND_TYPE_INIT_CLASS_CONST_MASK(#class_name, type_mask | _ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), NULL, default_value },
#define ZEND_ARG_VARIADIC_OBJ_TYPE_MASK(pass_by_ref, name, class_name, type_mask) \
{ #name, ZEND_TYPE_INIT_CLASS_CONST_MASK(#class_name, type_mask | _ZEND_ARG_INFO_FLAGS(pass_by_ref, 1, 0)), NULL },
{ #name, ZEND_TYPE_INIT_CLASS_CONST_MASK(#class_name, type_mask | _ZEND_ARG_INFO_FLAGS(pass_by_ref, 1, 0)), NULL, NULL },

/* Arginfo structures with object type information */
#define ZEND_ARG_OBJ_INFO(pass_by_ref, name, class_name, allow_null) \
{ #name, ZEND_TYPE_INIT_CLASS_CONST(#class_name, allow_null, _ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), NULL },
{ #name, ZEND_TYPE_INIT_CLASS_CONST(#class_name, allow_null, _ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), NULL, NULL },
#define ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(pass_by_ref, name, class_name, allow_null, default_value) \
{ #name, ZEND_TYPE_INIT_CLASS_CONST(#class_name, allow_null, _ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), default_value },
{ #name, ZEND_TYPE_INIT_CLASS_CONST(#class_name, allow_null, _ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), NULL, default_value },
#define ZEND_ARG_VARIADIC_OBJ_INFO(pass_by_ref, name, class_name, allow_null) \
{ #name, ZEND_TYPE_INIT_CLASS_CONST(#class_name, allow_null, _ZEND_ARG_INFO_FLAGS(pass_by_ref, 1, 0)), NULL },
{ #name, ZEND_TYPE_INIT_CLASS_CONST(#class_name, allow_null, _ZEND_ARG_INFO_FLAGS(pass_by_ref, 1, 0)), NULL, NULL },

/* Legacy arginfo structures */
#define ZEND_ARG_ARRAY_INFO(pass_by_ref, name, allow_null) \
{ #name, ZEND_TYPE_INIT_CODE(IS_ARRAY, allow_null, _ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), NULL },
{ #name, ZEND_TYPE_INIT_CODE(IS_ARRAY, allow_null, _ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), NULL, NULL },
#define ZEND_ARG_CALLABLE_INFO(pass_by_ref, name, allow_null) \
{ #name, ZEND_TYPE_INIT_CODE(IS_CALLABLE, allow_null, _ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), NULL },
{ #name, ZEND_TYPE_INIT_CODE(IS_CALLABLE, allow_null, _ZEND_ARG_INFO_FLAGS(pass_by_ref, 0, 0)), NULL, NULL },

#define ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX2(name, return_reference, required_num_args, class_name, allow_null, is_tentative_return_type) \
static const zend_internal_arg_info name[] = { \
{ (const char*)(uintptr_t)(required_num_args), \
ZEND_TYPE_INIT_CLASS_CONST(#class_name, allow_null, _ZEND_ARG_INFO_FLAGS(return_reference, 0, is_tentative_return_type)), NULL },
ZEND_TYPE_INIT_CLASS_CONST(#class_name, allow_null, _ZEND_ARG_INFO_FLAGS(return_reference, 0, is_tentative_return_type)), NULL, NULL },

#define ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(name, return_reference, required_num_args, class_name, allow_null) \
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX2(name, return_reference, required_num_args, class_name, allow_null, 0)
Expand All @@ -180,7 +180,7 @@ typedef struct _zend_fcall_info_cache {

#define ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX2(name, return_reference, required_num_args, type, is_tentative_return_type) \
static const zend_internal_arg_info name[] = { \
{ (const char*)(uintptr_t)(required_num_args), ZEND_TYPE_INIT_MASK(type | _ZEND_ARG_INFO_FLAGS(return_reference, 0, is_tentative_return_type)), NULL },
{ (const char*)(uintptr_t)(required_num_args), ZEND_TYPE_INIT_MASK(type | _ZEND_ARG_INFO_FLAGS(return_reference, 0, is_tentative_return_type)), NULL, NULL },

#define ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(name, return_reference, required_num_args, type) \
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX2(name, return_reference, required_num_args, type, 0)
Expand All @@ -190,7 +190,7 @@ typedef struct _zend_fcall_info_cache {

#define ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX2(name, return_reference, required_num_args, class_name, type, is_tentative_return_type) \
static const zend_internal_arg_info name[] = { \
{ (const char*)(uintptr_t)(required_num_args), ZEND_TYPE_INIT_CLASS_CONST_MASK(#class_name, type | _ZEND_ARG_INFO_FLAGS(return_reference, 0, is_tentative_return_type)), NULL },
{ (const char*)(uintptr_t)(required_num_args), ZEND_TYPE_INIT_CLASS_CONST_MASK(#class_name, type | _ZEND_ARG_INFO_FLAGS(return_reference, 0, is_tentative_return_type)), NULL, NULL },

#define ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(name, return_reference, required_num_args, class_name, type) \
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX2(name, return_reference, required_num_args, class_name, type, 0)
Expand All @@ -200,7 +200,7 @@ typedef struct _zend_fcall_info_cache {

#define ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX2(name, return_reference, required_num_args, type, allow_null, is_tentative_return_type) \
static const zend_internal_arg_info name[] = { \
{ (const char*)(uintptr_t)(required_num_args), ZEND_TYPE_INIT_CODE(type, allow_null, _ZEND_ARG_INFO_FLAGS(return_reference, 0, is_tentative_return_type)), NULL },
{ (const char*)(uintptr_t)(required_num_args), ZEND_TYPE_INIT_CODE(type, allow_null, _ZEND_ARG_INFO_FLAGS(return_reference, 0, is_tentative_return_type)), NULL, NULL },

#define ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(name, return_reference, required_num_args, type, allow_null) \
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX2(name, return_reference, required_num_args, type, allow_null, 0)
Expand All @@ -213,7 +213,7 @@ typedef struct _zend_fcall_info_cache {

#define ZEND_BEGIN_ARG_INFO_EX(name, _unused, return_reference, required_num_args) \
static const zend_internal_arg_info name[] = { \
{ (const char*)(uintptr_t)(required_num_args), ZEND_TYPE_INIT_NONE(_ZEND_ARG_INFO_FLAGS(return_reference, 0, 0)), NULL },
{ (const char*)(uintptr_t)(required_num_args), ZEND_TYPE_INIT_NONE(_ZEND_ARG_INFO_FLAGS(return_reference, 0, 0)), NULL, NULL },
#define ZEND_BEGIN_ARG_INFO(name, _unused) \
ZEND_BEGIN_ARG_INFO_EX(name, {}, ZEND_RETURN_VALUE, -1)
#define ZEND_END_ARG_INFO() };
Expand Down Expand Up @@ -448,6 +448,8 @@ ZEND_API zend_result zend_update_class_constant(zend_class_constant *c, const ze
ZEND_API zend_result zend_update_class_constants(zend_class_entry *class_type);
ZEND_API HashTable *zend_separate_class_constants_table(zend_class_entry *class_type);

ZEND_API zend_type_node *zend_type_to_interned_tree(zend_type type);

static zend_always_inline HashTable *zend_class_constants_table(zend_class_entry *ce) {
if ((ce->ce_flags & ZEND_ACC_HAS_AST_CONSTANTS) && ZEND_MAP_PTR(ce->mutable_data)) {
zend_class_mutable_data *mutable_data =
Expand Down
4 changes: 4 additions & 0 deletions Zend/zend_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -7348,6 +7348,7 @@ static zend_type zend_compile_typename_ex(
}

ast->attr = orig_ast_attr;

return type;
}
/* }}} */
Expand Down Expand Up @@ -7584,6 +7585,7 @@ static void zend_compile_params(zend_ast *ast, zend_ast *return_type_ast, uint32
} else {
arg_infos->type = (zend_type) ZEND_TYPE_INIT_CODE(fallback_return_type, 0, 0);
}
arg_infos->type_tree = zend_type_to_interned_tree(arg_infos->type);
arg_infos++;
op_array->fn_flags |= ZEND_ACC_HAS_RETURN_TYPE;

Expand Down Expand Up @@ -7830,6 +7832,8 @@ static void zend_compile_params(zend_ast *ast, zend_ast *return_type_ast, uint32
&prop->attributes, attributes_ast, 0, ZEND_ATTRIBUTE_TARGET_PROPERTY, ZEND_ATTRIBUTE_TARGET_PARAMETER);
}
}

arg_info->type_tree = zend_type_to_interned_tree(arg_info->type);
}

/* These are assigned at the end to avoid uninitialized memory in case of an error */
Expand Down
Loading