/* * Copyright (C) 2016 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Implementation file for control flow graph dumping for the dexdump utility. */ #include "dexdump_cfg.h" #include <inttypes.h> #include <ostream> #include <map> #include <set> #include "dex_file-inl.h" #include "dex_instruction-inl.h" namespace art { static void dumpMethodCFGImpl(const DexFile* dex_file, uint32_t dex_method_idx, const DexFile::CodeItem* code_item, std::ostream& os) { os << "digraph {\n"; os << " # /* " << dex_file->PrettyMethod(dex_method_idx, true) << " */\n"; std::set<uint32_t> dex_pc_is_branch_target; { // Go and populate. const Instruction* inst = Instruction::At(code_item->insns_); for (uint32_t dex_pc = 0; dex_pc < code_item->insns_size_in_code_units_; dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) { if (inst->IsBranch()) { dex_pc_is_branch_target.insert(dex_pc + inst->GetTargetOffset()); } else if (inst->IsSwitch()) { const uint16_t* insns = code_item->insns_ + dex_pc; int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16); const uint16_t* switch_insns = insns + switch_offset; uint32_t switch_count = switch_insns[1]; int32_t targets_offset; if ((*insns & 0xff) == Instruction::PACKED_SWITCH) { /* 0=sig, 1=count, 2/3=firstKey */ targets_offset = 4; } else { /* 0=sig, 1=count, 2..count*2 = keys */ targets_offset = 2 + 2 * switch_count; } for (uint32_t targ = 0; targ < switch_count; targ++) { int32_t offset = static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) | static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16); dex_pc_is_branch_target.insert(dex_pc + offset); } } } } // Create nodes for "basic blocks." std::map<uint32_t, uint32_t> dex_pc_to_node_id; // This only has entries for block starts. std::map<uint32_t, uint32_t> dex_pc_to_incl_id; // This has entries for all dex pcs. { const Instruction* inst = Instruction::At(code_item->insns_); bool first_in_block = true; bool force_new_block = false; for (uint32_t dex_pc = 0; dex_pc < code_item->insns_size_in_code_units_; dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) { if (dex_pc == 0 || (dex_pc_is_branch_target.find(dex_pc) != dex_pc_is_branch_target.end()) || force_new_block) { uint32_t id = dex_pc_to_node_id.size(); if (id > 0) { // End last node. os << "}\"];\n"; } // Start next node. os << " node" << id << " [shape=record,label=\"{"; dex_pc_to_node_id.insert(std::make_pair(dex_pc, id)); first_in_block = true; force_new_block = false; } // Register instruction. dex_pc_to_incl_id.insert(std::make_pair(dex_pc, dex_pc_to_node_id.size() - 1)); // Print instruction. if (!first_in_block) { os << " | "; } else { first_in_block = false; } // Dump the instruction. Need to escape '"', '<', '>', '{' and '}'. os << "<" << "p" << dex_pc << ">"; os << " 0x" << std::hex << dex_pc << std::dec << ": "; std::string inst_str = inst->DumpString(dex_file); size_t cur_start = 0; // It's OK to start at zero, instruction dumps don't start with chars // we need to escape. while (cur_start != std::string::npos) { size_t next_escape = inst_str.find_first_of("\"{}<>", cur_start + 1); if (next_escape == std::string::npos) { os << inst_str.substr(cur_start, inst_str.size() - cur_start); break; } else { os << inst_str.substr(cur_start, next_escape - cur_start); // Escape all necessary characters. while (next_escape < inst_str.size()) { char c = inst_str.at(next_escape); if (c == '"' || c == '{' || c == '}' || c == '<' || c == '>') { os << '\\' << c; } else { break; } next_escape++; } if (next_escape >= inst_str.size()) { next_escape = std::string::npos; } cur_start = next_escape; } } // Force a new block for some fall-throughs and some instructions that terminate the "local" // control flow. force_new_block = inst->IsSwitch() || inst->IsBasicBlockEnd(); } // Close last node. if (dex_pc_to_node_id.size() > 0) { os << "}\"];\n"; } } // Create edges between them. { std::ostringstream regular_edges; std::ostringstream taken_edges; std::ostringstream exception_edges; // Common set of exception edges. std::set<uint32_t> exception_targets; // These blocks (given by the first dex pc) need exception per dex-pc handling in a second // pass. In the first pass we try and see whether we can use a common set of edges. std::set<uint32_t> blocks_with_detailed_exceptions; { uint32_t last_node_id = std::numeric_limits<uint32_t>::max(); uint32_t old_dex_pc = 0; uint32_t block_start_dex_pc = std::numeric_limits<uint32_t>::max(); const Instruction* inst = Instruction::At(code_item->insns_); for (uint32_t dex_pc = 0; dex_pc < code_item->insns_size_in_code_units_; old_dex_pc = dex_pc, dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) { { auto it = dex_pc_to_node_id.find(dex_pc); if (it != dex_pc_to_node_id.end()) { if (!exception_targets.empty()) { // It seems the last block had common exception handlers. Add the exception edges now. uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second; for (uint32_t handler_pc : exception_targets) { auto node_id_it = dex_pc_to_incl_id.find(handler_pc); if (node_id_it != dex_pc_to_incl_id.end()) { exception_edges << " node" << node_id << " -> node" << node_id_it->second << ":p" << handler_pc << ";\n"; } } exception_targets.clear(); } block_start_dex_pc = dex_pc; // Seems to be a fall-through, connect to last_node_id. May be spurious edges for things // like switch data. uint32_t old_last = last_node_id; last_node_id = it->second; if (old_last != std::numeric_limits<uint32_t>::max()) { regular_edges << " node" << old_last << ":p" << old_dex_pc << " -> node" << last_node_id << ":p" << dex_pc << ";\n"; } } // Look at the exceptions of the first entry. CatchHandlerIterator catch_it(*code_item, dex_pc); for (; catch_it.HasNext(); catch_it.Next()) { exception_targets.insert(catch_it.GetHandlerAddress()); } } // Handle instruction. // Branch: something with at most two targets. if (inst->IsBranch()) { const int32_t offset = inst->GetTargetOffset(); const bool conditional = !inst->IsUnconditional(); auto target_it = dex_pc_to_node_id.find(dex_pc + offset); if (target_it != dex_pc_to_node_id.end()) { taken_edges << " node" << last_node_id << ":p" << dex_pc << " -> node" << target_it->second << ":p" << (dex_pc + offset) << ";\n"; } if (!conditional) { // No fall-through. last_node_id = std::numeric_limits<uint32_t>::max(); } } else if (inst->IsSwitch()) { // TODO: Iterate through all switch targets. const uint16_t* insns = code_item->insns_ + dex_pc; /* make sure the start of the switch is in range */ int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16); /* offset to switch table is a relative branch-style offset */ const uint16_t* switch_insns = insns + switch_offset; uint32_t switch_count = switch_insns[1]; int32_t targets_offset; if ((*insns & 0xff) == Instruction::PACKED_SWITCH) { /* 0=sig, 1=count, 2/3=firstKey */ targets_offset = 4; } else { /* 0=sig, 1=count, 2..count*2 = keys */ targets_offset = 2 + 2 * switch_count; } /* make sure the end of the switch is in range */ /* verify each switch target */ for (uint32_t targ = 0; targ < switch_count; targ++) { int32_t offset = static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) | static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16); int32_t abs_offset = dex_pc + offset; auto target_it = dex_pc_to_node_id.find(abs_offset); if (target_it != dex_pc_to_node_id.end()) { // TODO: value label. taken_edges << " node" << last_node_id << ":p" << dex_pc << " -> node" << target_it->second << ":p" << (abs_offset) << ";\n"; } } } // Exception edges. If this is not the first instruction in the block if (block_start_dex_pc != dex_pc) { std::set<uint32_t> current_handler_pcs; CatchHandlerIterator catch_it(*code_item, dex_pc); for (; catch_it.HasNext(); catch_it.Next()) { current_handler_pcs.insert(catch_it.GetHandlerAddress()); } if (current_handler_pcs != exception_targets) { exception_targets.clear(); // Clear so we don't do something at the end. blocks_with_detailed_exceptions.insert(block_start_dex_pc); } } if (inst->IsReturn() || (inst->Opcode() == Instruction::THROW) || (inst->IsBranch() && inst->IsUnconditional())) { // No fall-through. last_node_id = std::numeric_limits<uint32_t>::max(); } } // Finish up the last block, if it had common exceptions. if (!exception_targets.empty()) { // It seems the last block had common exception handlers. Add the exception edges now. uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second; for (uint32_t handler_pc : exception_targets) { auto node_id_it = dex_pc_to_incl_id.find(handler_pc); if (node_id_it != dex_pc_to_incl_id.end()) { exception_edges << " node" << node_id << " -> node" << node_id_it->second << ":p" << handler_pc << ";\n"; } } exception_targets.clear(); } } // Second pass for detailed exception blocks. // TODO // Exception edges. If this is not the first instruction in the block for (uint32_t dex_pc : blocks_with_detailed_exceptions) { const Instruction* inst = Instruction::At(&code_item->insns_[dex_pc]); uint32_t this_node_id = dex_pc_to_incl_id.find(dex_pc)->second; while (true) { CatchHandlerIterator catch_it(*code_item, dex_pc); if (catch_it.HasNext()) { std::set<uint32_t> handled_targets; for (; catch_it.HasNext(); catch_it.Next()) { uint32_t handler_pc = catch_it.GetHandlerAddress(); auto it = handled_targets.find(handler_pc); if (it == handled_targets.end()) { auto node_id_it = dex_pc_to_incl_id.find(handler_pc); if (node_id_it != dex_pc_to_incl_id.end()) { exception_edges << " node" << this_node_id << ":p" << dex_pc << " -> node" << node_id_it->second << ":p" << handler_pc << ";\n"; } // Mark as done. handled_targets.insert(handler_pc); } } } if (inst->IsBasicBlockEnd()) { break; } // Loop update. Have a break-out if the next instruction is a branch target and thus in // another block. dex_pc += inst->SizeInCodeUnits(); if (dex_pc >= code_item->insns_size_in_code_units_) { break; } if (dex_pc_to_node_id.find(dex_pc) != dex_pc_to_node_id.end()) { break; } inst = inst->Next(); } } // Write out the sub-graphs to make edges styled. os << "\n"; os << " subgraph regular_edges {\n"; os << " edge [color=\"#000000\",weight=.3,len=3];\n\n"; os << " " << regular_edges.str() << "\n"; os << " }\n\n"; os << " subgraph taken_edges {\n"; os << " edge [color=\"#00FF00\",weight=.3,len=3];\n\n"; os << " " << taken_edges.str() << "\n"; os << " }\n\n"; os << " subgraph exception_edges {\n"; os << " edge [color=\"#FF0000\",weight=.3,len=3];\n\n"; os << " " << exception_edges.str() << "\n"; os << " }\n\n"; } os << "}\n"; } void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os) { // This is painful, we need to find the code item. That means finding the class, and then // iterating the table. if (dex_method_idx >= dex_file->NumMethodIds()) { os << "Could not find method-idx."; return; } const DexFile::MethodId& method_id = dex_file->GetMethodId(dex_method_idx); const DexFile::ClassDef* class_def = dex_file->FindClassDef(method_id.class_idx_); if (class_def == nullptr) { os << "Could not find class-def."; return; } const uint8_t* class_data = dex_file->GetClassData(*class_def); if (class_data == nullptr) { os << "No class data."; return; } ClassDataItemIterator it(*dex_file, class_data); // Skip fields while (it.HasNextStaticField() || it.HasNextInstanceField()) { it.Next(); } // Find method, and dump it. while (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) { uint32_t method_idx = it.GetMemberIndex(); if (method_idx == dex_method_idx) { dumpMethodCFGImpl(dex_file, dex_method_idx, it.GetMethodCodeItem(), os); return; } it.Next(); } // Otherwise complain. os << "Something went wrong, didn't find the method in the class data."; } } // namespace art