// Copyright (c) 2017 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "tools/stats/stats_analyzer.h" #include <algorithm> #include <cassert> #include <cstring> #include <iostream> #include <map> #include <sstream> #include <string> #include <unordered_map> #include <unordered_set> #include <utility> #include <vector> #include "source/comp/markv_model.h" #include "source/enum_string_mapping.h" #include "source/latest_version_spirv_header.h" #include "source/opcode.h" #include "source/operand.h" #include "source/spirv_constant.h" namespace spvtools { namespace stats { namespace { // Signals that the value is not in the coding scheme and a fallback method // needs to be used. const uint64_t kMarkvNoneOfTheAbove = comp::MarkvModel::GetMarkvNoneOfTheAbove(); std::string GetVersionString(uint32_t word) { std::stringstream ss; ss << "Version " << SPV_SPIRV_VERSION_MAJOR_PART(word) << "." << SPV_SPIRV_VERSION_MINOR_PART(word); return ss.str(); } std::string GetGeneratorString(uint32_t word) { return spvGeneratorStr(SPV_GENERATOR_TOOL_PART(word)); } std::string GetOpcodeString(uint32_t word) { return spvOpcodeString(static_cast<SpvOp>(word)); } std::string GetCapabilityString(uint32_t word) { return CapabilityToString(static_cast<SpvCapability>(word)); } template <class T> std::string KeyIsLabel(T key) { std::stringstream ss; ss << key; return ss.str(); } template <class Key> std::unordered_map<Key, double> GetRecall( const std::unordered_map<Key, uint32_t>& hist, uint64_t total) { std::unordered_map<Key, double> freq; for (const auto& pair : hist) { const double frequency = static_cast<double>(pair.second) / static_cast<double>(total); freq.emplace(pair.first, frequency); } return freq; } template <class Key> std::unordered_map<Key, double> GetPrevalence( const std::unordered_map<Key, uint32_t>& hist) { uint64_t total = 0; for (const auto& pair : hist) { total += pair.second; } return GetRecall(hist, total); } // Writes |freq| to |out| sorted by frequency in the following format: // LABEL3 70% // LABEL1 20% // LABEL2 10% // |label_from_key| is used to convert |Key| to label. template <class Key> void WriteFreq(std::ostream& out, const std::unordered_map<Key, double>& freq, std::string (*label_from_key)(Key)) { std::vector<std::pair<Key, double>> sorted_freq(freq.begin(), freq.end()); std::sort(sorted_freq.begin(), sorted_freq.end(), [](const std::pair<Key, double>& left, const std::pair<Key, double>& right) { return left.second > right.second; }); for (const auto& pair : sorted_freq) { if (pair.second < 0.001) break; out << label_from_key(pair.first) << " " << pair.second * 100.0 << "%" << std::endl; } } } // namespace StatsAnalyzer::StatsAnalyzer(const SpirvStats& stats) : stats_(stats) { num_modules_ = 0; for (const auto& pair : stats_.version_hist) { num_modules_ += pair.second; } version_freq_ = GetRecall(stats_.version_hist, num_modules_); generator_freq_ = GetRecall(stats_.generator_hist, num_modules_); capability_freq_ = GetRecall(stats_.capability_hist, num_modules_); extension_freq_ = GetRecall(stats_.extension_hist, num_modules_); opcode_freq_ = GetPrevalence(stats_.opcode_hist); } void StatsAnalyzer::WriteVersion(std::ostream& out) { WriteFreq(out, version_freq_, GetVersionString); } void StatsAnalyzer::WriteGenerator(std::ostream& out) { WriteFreq(out, generator_freq_, GetGeneratorString); } void StatsAnalyzer::WriteCapability(std::ostream& out) { WriteFreq(out, capability_freq_, GetCapabilityString); } void StatsAnalyzer::WriteExtension(std::ostream& out) { WriteFreq(out, extension_freq_, KeyIsLabel); } void StatsAnalyzer::WriteOpcode(std::ostream& out) { out << "Total unique opcodes used: " << opcode_freq_.size() << std::endl; WriteFreq(out, opcode_freq_, GetOpcodeString); } void StatsAnalyzer::WriteConstantLiterals(std::ostream& out) { out << "Constant literals" << std::endl; out << "Float 32" << std::endl; WriteFreq(out, GetPrevalence(stats_.f32_constant_hist), KeyIsLabel); out << std::endl << "Float 64" << std::endl; WriteFreq(out, GetPrevalence(stats_.f64_constant_hist), KeyIsLabel); out << std::endl << "Unsigned int 16" << std::endl; WriteFreq(out, GetPrevalence(stats_.u16_constant_hist), KeyIsLabel); out << std::endl << "Signed int 16" << std::endl; WriteFreq(out, GetPrevalence(stats_.s16_constant_hist), KeyIsLabel); out << std::endl << "Unsigned int 32" << std::endl; WriteFreq(out, GetPrevalence(stats_.u32_constant_hist), KeyIsLabel); out << std::endl << "Signed int 32" << std::endl; WriteFreq(out, GetPrevalence(stats_.s32_constant_hist), KeyIsLabel); out << std::endl << "Unsigned int 64" << std::endl; WriteFreq(out, GetPrevalence(stats_.u64_constant_hist), KeyIsLabel); out << std::endl << "Signed int 64" << std::endl; WriteFreq(out, GetPrevalence(stats_.s64_constant_hist), KeyIsLabel); } void StatsAnalyzer::WriteOpcodeMarkov(std::ostream& out) { if (stats_.opcode_markov_hist.empty()) return; const std::unordered_map<uint32_t, std::unordered_map<uint32_t, uint32_t>>& cue_to_hist = stats_.opcode_markov_hist[0]; // Sort by prevalence of the opcodes in opcode_freq_ (descending). std::vector<std::pair<uint32_t, std::unordered_map<uint32_t, uint32_t>>> sorted_cue_to_hist(cue_to_hist.begin(), cue_to_hist.end()); std::sort( sorted_cue_to_hist.begin(), sorted_cue_to_hist.end(), [this](const std::pair<uint32_t, std::unordered_map<uint32_t, uint32_t>>& left, const std::pair<uint32_t, std::unordered_map<uint32_t, uint32_t>>& right) { const double lf = opcode_freq_[left.first]; const double rf = opcode_freq_[right.first]; if (lf == rf) return right.first > left.first; return lf > rf; }); for (const auto& kv : sorted_cue_to_hist) { const uint32_t cue = kv.first; const double kFrequentEnoughToAnalyze = 0.0001; if (opcode_freq_[cue] < kFrequentEnoughToAnalyze) continue; const std::unordered_map<uint32_t, uint32_t>& hist = kv.second; uint32_t total = 0; for (const auto& pair : hist) { total += pair.second; } std::vector<std::pair<uint32_t, uint32_t>> sorted_hist(hist.begin(), hist.end()); std::sort(sorted_hist.begin(), sorted_hist.end(), [](const std::pair<uint32_t, uint32_t>& left, const std::pair<uint32_t, uint32_t>& right) { if (left.second == right.second) return right.first > left.first; return left.second > right.second; }); for (const auto& pair : sorted_hist) { const double prior = opcode_freq_[pair.first]; const double posterior = static_cast<double>(pair.second) / static_cast<double>(total); out << GetOpcodeString(cue) << " -> " << GetOpcodeString(pair.first) << " " << posterior * 100 << "% (base rate " << prior * 100 << "%, pair occurrences " << pair.second << ")" << std::endl; } } } } // namespace stats } // namespace spvtools