diff --git a/regression/cbmc/trace-values/unbounded_array.c b/regression/cbmc/trace-values/unbounded_array.c index 76d23252541..654b04707cf 100644 --- a/regression/cbmc/trace-values/unbounded_array.c +++ b/regression/cbmc/trace-values/unbounded_array.c @@ -6,6 +6,7 @@ int main(int argc, char *argv[]) unsigned long size; __CPROVER_assume(size < 100 && size > 10); int *array = malloc(size * sizeof(int)); + __CPROVER_assume(array); array[size - 1] = 42; int fixed_array[] = {1, 2}; __CPROVER_array_replace(array, fixed_array); diff --git a/src/goto-symex/goto_symex_state.cpp b/src/goto-symex/goto_symex_state.cpp index 253b4be198e..c359168b6a0 100644 --- a/src/goto-symex/goto_symex_state.cpp +++ b/src/goto-symex/goto_symex_state.cpp @@ -27,8 +27,6 @@ Author: Daniel Kroening, kroening@kroening.com #include "simplify_expr_with_value_set.h" #include "symex_target_equation.h" -static void get_l1_name(exprt &expr); - goto_symex_statet::goto_symex_statet( const symex_targett::sourcet &_source, std::size_t max_field_sensitive_array_size, @@ -129,20 +127,7 @@ renamedt goto_symex_statet::assignment( else propagation.erase_if_exists(l1_identifier); - { - // update value sets - exprt l1_rhs(rhs); - get_l1_name(l1_rhs); - - const ssa_exprt l1_lhs = remove_level_2(lhs); - if(run_validation_checks) - { - DATA_INVARIANT(!check_renaming_l1(l1_lhs), "lhs renaming failed on l1"); - DATA_INVARIANT(!check_renaming_l1(l1_rhs), "rhs renaming failed on l1"); - } - - value_set.assign(l1_lhs, l1_rhs, ns, rhs_is_simplified, is_shared); - } + value_set.assign(lhs, rhs, ns, rhs_is_simplified, is_shared); #ifdef DEBUG std::cout << "Assigning " << l1_identifier << '\n'; @@ -789,17 +774,6 @@ void goto_symex_statet::rename( l1_type_entry.first->second=type; } -static void get_l1_name(exprt &expr) -{ - // do not reset the type ! - - if(is_ssa_expr(expr)) - to_ssa_expr(expr).remove_level_2(); - else - Forall_operands(it, expr) - get_l1_name(*it); -} - /// Dumps the current state of symex, printing the function name and location /// number for each stack frame in the currently active thread. /// This is for use from the debugger or in debug code; please don't delete it diff --git a/src/goto-symex/shadow_memory_util.cpp b/src/goto-symex/shadow_memory_util.cpp index f807a057116..5a3bf883037 100644 --- a/src/goto-symex/shadow_memory_util.cpp +++ b/src/goto-symex/shadow_memory_util.cpp @@ -982,7 +982,7 @@ normalize(const object_descriptor_exprt &expr, const namespacet &ns) { return expr; } - if(expr.offset().id() == ID_unknown) + if(!expr.offset().is_constant()) { return expr; } diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index 2c41b0484b6..c92fb02f684 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -23,14 +23,13 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include #include #include #ifdef DEBUG -#include -#include -#include +# include #endif #include "add_failed_symbols.h" @@ -184,7 +183,7 @@ void value_sett::output(std::ostream &out, const std::string &indent) const stream << "<" << format(o) << ", "; if(o_it->second) - stream << *o_it->second; + stream << format(*o_it->second); else stream << '*'; @@ -261,7 +260,7 @@ exprt value_sett::to_expr(const object_map_dt::value_type &it) const od.object()=object; if(it.second) - od.offset() = from_integer(*it.second, c_index_type()); + od.offset() = *it.second; od.type()=od.object().type(); @@ -352,17 +351,34 @@ bool value_sett::eval_pointer_offset( it=object_map.begin(); it!=object_map.end(); it++) - if(!it->second) + { + if(!it->second || !it->second->is_constant()) return false; else { + // This branch should not be reached as any constant offset will have + // been used before already. The following code will trigger + // `eval_pointer_offset`, yet we wouldn't end up in this branch: + // struct S { int a; char b; }; + // + // int main() + // { + // struct S s; + // int offset; + // __CPROVER_assume(offset >= 0 && offset <= 1 && offset % 2 == 0); + // int *p = (char*)&s + offset; + // int x = *p; + // __CPROVER_assert(s.a == x, ""); + // } + UNREACHABLE; const exprt &object=object_numbering[it->first]; auto ptr_offset = compute_pointer_offset(object, ns); if(!ptr_offset.has_value()) return false; - *ptr_offset += *it->second; + *ptr_offset += + numeric_cast_v(to_constant_expr(*it->second)); if(mod && *ptr_offset != previous_offset) return false; @@ -371,6 +387,7 @@ bool value_sett::eval_pointer_offset( previous_offset = *ptr_offset; mod=true; } + } if(mod) expr.swap(new_expr); @@ -556,8 +573,11 @@ void value_sett::get_value_set_rec( } else if(expr.id()==ID_symbol) { - auto entry_index = get_index_of_symbol( - to_symbol_expr(expr).get_identifier(), expr.type(), suffix, ns); + const symbol_exprt expr_l1 = is_ssa_expr(expr) + ? remove_level_2(to_ssa_expr(expr)) + : to_symbol_expr(expr); + auto entry_index = + get_index_of_symbol(expr_l1.get_identifier(), expr.type(), suffix, ns); if(entry_index.has_value()) make_union(dest, find_entry(*entry_index)->object_map); @@ -623,7 +643,7 @@ void value_sett::get_value_set_rec( insert( dest, exprt(ID_null_object, to_pointer_type(expr.type()).base_type()), - mp_integer{0}); + from_integer(0, c_index_type())); } else if( expr.type().id() == ID_unsignedbv || expr.type().id() == ID_signedbv) @@ -655,7 +675,10 @@ void value_sett::get_value_set_rec( if(op.is_zero()) { - insert(dest, exprt(ID_null_object, empty_typet{}), mp_integer{0}); + insert( + dest, + exprt(ID_null_object, empty_typet{}), + from_integer(0, c_index_type())); } else { @@ -692,11 +715,12 @@ void value_sett::get_value_set_rec( expr.id() == ID_bitnand || expr.id() == ID_bitnor || expr.id() == ID_bitxnor) { - if(expr.operands().size()<2) - throw expr.id_string()+" expected to have at least two operands"; + DATA_INVARIANT( + expr.operands().size() >= 2, + expr.id_string() + " expected to have at least two operands"); object_mapt pointer_expr_set; - std::optional i; + std::optional additional_offset; // special case for plus/minus and exactly one pointer std::optional ptr_operand; @@ -704,7 +728,6 @@ void value_sett::get_value_set_rec( expr.type().id() == ID_pointer && (expr.id() == ID_plus || expr.id() == ID_minus)) { - bool non_const_offset = false; for(const auto &op : expr.operands()) { if(op.type().id() == ID_pointer) @@ -717,24 +740,20 @@ void value_sett::get_value_set_rec( ptr_operand = op; } - else if(!non_const_offset) + else { - auto offset = numeric_cast(op); - if(!offset.has_value()) - { - i.reset(); - non_const_offset = true; - } + if(!additional_offset.has_value()) + additional_offset = op; else { - if(!i.has_value()) - i = mp_integer{0}; - i = *i + *offset; + additional_offset = plus_exprt{ + *additional_offset, + typecast_exprt::conditional_cast(op, additional_offset->type())}; } } } - if(ptr_operand.has_value() && i.has_value()) + if(ptr_operand.has_value() && additional_offset.has_value()) { typet pointer_base_type = to_pointer_type(ptr_operand->type()).base_type(); @@ -745,18 +764,22 @@ void value_sett::get_value_set_rec( if(!size.has_value() || (*size) == 0) { - i.reset(); + additional_offset.reset(); } else { - *i *= *size; + additional_offset = mult_exprt{ + *additional_offset, from_integer(*size, additional_offset->type())}; if(expr.id()==ID_minus) { DATA_INVARIANT( to_minus_expr(expr).lhs() == *ptr_operand, "unexpected subtraction of pointer from integer"); - i->negate(); + DATA_INVARIANT( + additional_offset->type().id() != ID_unsignedbv, + "offset type must support negation"); + additional_offset = unary_minus_exprt{*additional_offset}; } } } @@ -790,8 +813,15 @@ void value_sett::get_value_set_rec( offsett offset = it->second; // adjust by offset - if(offset && i.has_value()) - *offset += *i; + if(offset && additional_offset.has_value()) + { + offset = simplify_expr( + plus_exprt{ + *offset, + typecast_exprt::conditional_cast( + *additional_offset, offset->type())}, + ns); + } else offset.reset(); @@ -803,8 +833,9 @@ void value_sett::get_value_set_rec( // this is to do stuff like // (int*)((sel*(ulong)&a)+((sel^0x1)*(ulong)&b)) - if(expr.operands().size()<2) - throw expr.id_string()+" expected to have at least two operands"; + DATA_INVARIANT( + expr.operands().size() >= 2, + expr.id_string() + " expected to have at least two operands"); object_mapt pointer_expr_set; @@ -858,7 +889,7 @@ void value_sett::get_value_set_rec( if(statement==ID_function_call) { // these should be gone - throw "unexpected function_call sideeffect"; + UNREACHABLE; } else if(statement==ID_allocate) { @@ -871,11 +902,13 @@ void value_sett::get_value_set_rec( dynamic_object.set_instance(location_number); dynamic_object.valid()=true_exprt(); - insert(dest, dynamic_object, mp_integer{0}); + insert(dest, dynamic_object, from_integer(0, c_index_type())); } else if(statement==ID_cpp_new || statement==ID_cpp_new_array) { + // this is rewritten in the front-end, should be gone + UNREACHABLE; PRECONDITION(suffix.empty()); PRECONDITION(expr.type().id() == ID_pointer); @@ -884,7 +917,7 @@ void value_sett::get_value_set_rec( dynamic_object.set_instance(location_number); dynamic_object.valid()=true_exprt(); - insert(dest, dynamic_object, mp_integer{0}); + insert(dest, dynamic_object, from_integer(0, c_index_type())); } else insert(dest, exprt(ID_unknown, original_type)); @@ -1331,12 +1364,17 @@ void value_sett::get_reference_set_rec( expr.id()==ID_string_constant || expr.id()==ID_array) { + exprt l1_expr = + is_ssa_expr(expr) ? remove_level_2(to_ssa_expr(expr)) : expr; + if( expr.type().id() == ID_array && to_array_type(expr.type()).element_type().id() == ID_array) - insert(dest, expr); + { + insert(dest, l1_expr); + } else - insert(dest, expr, mp_integer{0}); + insert(dest, l1_expr, from_integer(0, c_index_type())); return; } @@ -1360,12 +1398,9 @@ void value_sett::get_reference_set_rec( } else if(expr.id()==ID_index) { - if(expr.operands().size()!=2) - throw "index expected to have two operands"; - const index_exprt &index_expr=to_index_expr(expr); const exprt &array=index_expr.array(); - const exprt &offset=index_expr.index(); + const exprt &index = index_expr.index(); DATA_INVARIANT( array.type().id() == ID_array, "index takes array-typed operand"); @@ -1393,22 +1428,24 @@ void value_sett::get_reference_set_rec( from_integer(0, c_index_type())); offsett o = a_it->second; - const auto i = numeric_cast(offset); - if(offset.is_zero()) - { - } - else if(i.has_value() && o) + if(!index.is_zero() && o.has_value()) { auto size = pointer_offset_size(array_type.element_type(), ns); if(!size.has_value() || *size == 0) o.reset(); else - *o = *i * (*size); + { + o = simplify_expr( + plus_exprt{ + *o, + typecast_exprt::conditional_cast( + mult_exprt{index, from_integer(*size, index.type())}, + o->type())}, + ns); + } } - else - o.reset(); insert(dest, deref_index_expr, o); } @@ -1658,7 +1695,9 @@ void value_sett::assign_rec( if(lhs.id()==ID_symbol) { - const irep_idt &identifier=to_symbol_expr(lhs).get_identifier(); + const symbol_exprt lhs_l1 = + is_ssa_expr(lhs) ? remove_level_2(to_ssa_expr(lhs)) : to_symbol_expr(lhs); + const irep_idt &identifier = lhs_l1.get_identifier(); update_entry( entryt{identifier, suffix}, lhs.type(), values_rhs, add_to_sets); @@ -1676,8 +1715,9 @@ void value_sett::assign_rec( } else if(lhs.id()==ID_dereference) { - if(lhs.operands().size()!=1) - throw lhs.id_string()+" expected to have one operand"; + DATA_INVARIANT( + lhs.operands().size() == 1, + lhs.id_string() + " expected to have one operand"); object_mapt reference_set; get_reference_set(lhs, reference_set, ns); @@ -1763,7 +1803,7 @@ void value_sett::assign_rec( // which we don't track } else - throw "assign NYI: '" + lhs.id_string() + "'"; + UNIMPLEMENTED_FEATURE("assign NYI: '" + lhs.id_string() + "'"); } void value_sett::do_function_call( @@ -1842,36 +1882,31 @@ void value_sett::apply_code_rec( } else if(statement==ID_assign) { - if(code.operands().size()!=2) - throw "assignment expected to have two operands"; - - assign(code.op0(), code.op1(), ns, false, false); + const code_assignt &a = to_code_assign(code); + assign(a.lhs(), a.rhs(), ns, false, false); } else if(statement==ID_decl) { - if(code.operands().size()!=1) - throw "decl expected to have one operand"; - - const exprt &lhs=code.op0(); - - if(lhs.id()!=ID_symbol) - throw "decl expected to have symbol on lhs"; - - const typet &lhs_type = lhs.type(); + const code_declt &decl = to_code_decl(code); + const symbol_exprt &symbol = decl.symbol(); + const typet &symbol_type = symbol.type(); if( - lhs_type.id() == ID_pointer || - (lhs_type.id() == ID_array && - to_array_type(lhs_type).element_type().id() == ID_pointer)) + symbol_type.id() == ID_pointer || + (symbol_type.id() == ID_array && + to_array_type(symbol_type).element_type().id() == ID_pointer)) { + const symbol_exprt symbol_l1 = is_ssa_expr(symbol) + ? remove_level_2(to_ssa_expr(symbol)) + : to_symbol_expr(symbol); // assign the address of the failed object - if(auto failed = get_failed_symbol(to_symbol_expr(lhs), ns)) + if(auto failed = get_failed_symbol(symbol_l1, ns)) { - address_of_exprt address_of_expr(*failed, to_pointer_type(lhs.type())); - assign(lhs, address_of_expr, ns, false, false); + address_of_exprt address_of_expr(*failed, to_pointer_type(symbol_type)); + assign(symbol, address_of_expr, ns, false, false); } else - assign(lhs, exprt(ID_invalid), ns, false, false); + assign(symbol, exprt(ID_invalid), ns, false, false); } } else if(statement==ID_expression) @@ -1944,8 +1979,8 @@ void value_sett::apply_code_rec( } else { - // std::cerr << code.pretty() << '\n'; - throw "value_sett: unexpected statement: "+id2string(statement); + UNIMPLEMENTED_FEATURE( + "value_sett: unexpected statement: " + id2string(statement)); } } diff --git a/src/pointer-analysis/value_set.h b/src/pointer-analysis/value_set.h index 5c71946acd8..73119a8ffb6 100644 --- a/src/pointer-analysis/value_set.h +++ b/src/pointer-analysis/value_set.h @@ -12,15 +12,14 @@ Author: Daniel Kroening, kroening@kroening.com #ifndef CPROVER_POINTER_ANALYSIS_VALUE_SET_H #define CPROVER_POINTER_ANALYSIS_VALUE_SET_H -#include - -#include #include #include #include "object_numbering.h" #include "value_sets.h" +#include + class namespacet; class xmlt; @@ -76,9 +75,9 @@ class value_sett /// in value sets. static object_numberingt object_numbering; - /// Represents the offset into an object: either a unique integer offset, - /// or an unknown value, represented by `!offset`. - typedef std::optional offsett; + /// Represents the offset into an object: either some (possibly constant) + /// expression, or an unknown value, represented by `!offset`. + typedef std::optional offsett; /// Represents a set of expressions (`exprt` instances) with corresponding /// offsets (`offsett` instances). This is the RHS set of a single row of @@ -140,20 +139,6 @@ class value_sett return insert(dest, object_numbering.number(src), offsett()); } - /// Adds an expression to an object map, with known offset. If the - /// destination map has an existing element for the same expression - /// with a differing offset its offset is marked unknown. - /// \param dest: object map to update - /// \param src: expression to add - /// \param offset_value: offset into `src` - bool insert( - object_mapt &dest, - const exprt &src, - const mp_integer &offset_value) const - { - return insert(dest, object_numbering.number(src), offsett(offset_value)); - } - /// Adds a numbered expression and offset to an object map. If the /// destination map has an existing element for the same expression /// with a differing offset its offset is marked unknown. diff --git a/unit/pointer-analysis/value_set.cpp b/unit/pointer-analysis/value_set.cpp index 1e123a922d0..e15b29b6920 100644 --- a/unit/pointer-analysis/value_set.cpp +++ b/unit/pointer-analysis/value_set.cpp @@ -298,4 +298,138 @@ SCENARIO( } } } + + GIVEN("A value-set containing pointers with offsets") + { + signedbv_typet int_type{sizeof(int) * CHAR_BIT}; + pointer_typet int_ptr_type{int_type, sizeof(void *) * CHAR_BIT}; + + // Create struct S { int a; char b } + struct_typet struct_S{{{"a", int_type}, {"b", unsignedbv_typet{CHAR_BIT}}}}; + struct_S.set_tag("S"); + + auto &S_a = struct_S.components()[0]; + auto &S_b = struct_S.components()[1]; + + S_a.set_base_name("a"); + S_a.set_pretty_name("a"); + + S_b.set_base_name("b"); + S_b.set_pretty_name("b"); + + type_symbolt S_symbol{"S", struct_S, irep_idt{}}; + S_symbol.base_name = "S"; + S_symbol.pretty_name = "S"; + + symbol_table.add(S_symbol); + + // Create global symbols struct S s, int *p + + symbolt s_symbol{"s", struct_tag_typet{S_symbol.name}, irep_idt{}}; + s_symbol.pretty_name = "s"; + s_symbol.is_static_lifetime = true; + symbol_table.add(s_symbol); + + symbolt p1_symbol{"p1", int_ptr_type, irep_idt{}}; + p1_symbol.pretty_name = "p1"; + p1_symbol.is_static_lifetime = true; + symbol_table.add(p1_symbol); + + // Assign p1 = &s + s.a + s.a; (which we cannot easily create via regression + // tests, because front-ends would turn this into binary expressions) + member_exprt s_a(s_symbol.symbol_expr(), S_a); + code_assignt assign_p1{ + p1_symbol.symbol_expr(), + plus_exprt{ + {typecast_exprt{ + address_of_exprt{s_symbol.symbol_expr()}, p1_symbol.type}, + s_a, + s_a}, + int_ptr_type}}; + + value_set.apply_code(assign_p1, ns); + + WHEN("We query what p1 points to") + { + const std::vector p1_result = + value_set.get_value_set(p1_symbol.symbol_expr(), ns); + + THEN("It should point to 's'") + { + REQUIRE(p1_result.size() == 1); + const exprt &result = *p1_result.begin(); + REQUIRE(object_descriptor_matches(result, s_symbol.symbol_expr())); + } + } + + symbolt p2_symbol{"p2", int_ptr_type, irep_idt{}}; + p2_symbol.pretty_name = "p2"; + p2_symbol.is_static_lifetime = true; + symbol_table.add(p2_symbol); + + // Assign p2 = &s - s.a; (which the simplifier would always rewrite to &s + + // -(s.a), so use the value_sett::assign interface to wrongly claim + // simplification had already taken place) + value_set.assign( + p2_symbol.symbol_expr(), + minus_exprt{ + typecast_exprt{ + address_of_exprt{s_symbol.symbol_expr()}, p2_symbol.type}, + s_a}, + ns, + true, + true); + + WHEN("We query what p2 points to") + { + const std::vector p2_result = + value_set.get_value_set(p2_symbol.symbol_expr(), ns); + + THEN("It should point to 's'") + { + REQUIRE(p2_result.size() == 1); + const exprt &result = *p2_result.begin(); + REQUIRE(object_descriptor_matches(result, s_symbol.symbol_expr())); + } + } + + symbolt A_symbol{ + "A", array_typet{int_type, from_integer(2, int_type)}, irep_idt{}}; + A_symbol.pretty_name = "A"; + A_symbol.is_static_lifetime = true; + symbol_table.add(A_symbol); + + symbolt p3_symbol{"p3", int_ptr_type, irep_idt{}}; + p3_symbol.pretty_name = "p3"; + p3_symbol.is_static_lifetime = true; + symbol_table.add(p3_symbol); + + // Assign p3 = &A[1]; (which the simplifier would always rewrite to A + + // sizeof(int), so use the value_sett::assign interface to wrongly claim + // simplification had already taken place) + value_set.assign( + p3_symbol.symbol_expr(), + address_of_exprt{ + index_exprt{A_symbol.symbol_expr(), from_integer(1, int_type)}}, + ns, + true, + true); + + WHEN("We query what p3 points to") + { + const std::vector p3_result = + value_set.get_value_set(p3_symbol.symbol_expr(), ns); + + THEN("It should point to 'A'") + { + REQUIRE(p3_result.size() == 1); + const exprt &result = *p3_result.begin(); + REQUIRE(object_descriptor_matches( + result, + index_exprt{ + A_symbol.symbol_expr(), + from_integer(0, to_array_type(A_symbol.type).index_type())})); + } + } + } }