普通文本  |  592行  |  17.83 KB

/*
 * Copyright (c) 2016 Facebook, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <fcntl.h>
#include <linux/elf.h>
#include <linux/perf_event.h>
#include <sys/epoll.h>
#include <unistd.h>
#include <cerrno>
#include <cinttypes>
#include <cstdint>
#include <cstring>
#include <iostream>
#include <memory>

#include "BPFTable.h"

#include "bcc_exception.h"
#include "bcc_syms.h"
#include "common.h"
#include "file_desc.h"
#include "libbpf.h"
#include "perf_reader.h"

namespace ebpf {

BPFTable::BPFTable(const TableDesc& desc) : BPFTableBase<void, void>(desc) {}

StatusTuple BPFTable::get_value(const std::string& key_str,
                                std::string& value_str) {
  char key[desc.key_size];
  char value[desc.leaf_size];

  StatusTuple r(0);

  r = string_to_key(key_str, key);
  if (r.code() != 0)
    return r;

  if (!lookup(key, value))
    return StatusTuple(-1, "error getting value");

  return leaf_to_string(value, value_str);
}

StatusTuple BPFTable::get_value(const std::string& key_str,
                                std::vector<std::string>& value_str) {
  size_t ncpus = get_possible_cpus().size();
  char key[desc.key_size];
  char value[desc.leaf_size * ncpus];

  StatusTuple r(0);

  r = string_to_key(key_str, key);
  if (r.code() != 0)
    return r;

  if (!lookup(key, value))
    return StatusTuple(-1, "error getting value");

  value_str.resize(ncpus);

  for (size_t i = 0; i < ncpus; i++) {
    r = leaf_to_string(value + i * desc.leaf_size, value_str.at(i));
    if (r.code() != 0)
      return r;
  }
  return StatusTuple(0);
}

StatusTuple BPFTable::update_value(const std::string& key_str,
                                   const std::string& value_str) {
  char key[desc.key_size];
  char value[desc.leaf_size];

  StatusTuple r(0);

  r = string_to_key(key_str, key);
  if (r.code() != 0)
    return r;

  r = string_to_leaf(value_str, value);
  if (r.code() != 0)
    return r;

  if (!update(key, value))
    return StatusTuple(-1, "error updating element");

  return StatusTuple(0);
}

StatusTuple BPFTable::update_value(const std::string& key_str,
                                   const std::vector<std::string>& value_str) {
  size_t ncpus = get_possible_cpus().size();
  char key[desc.key_size];
  char value[desc.leaf_size * ncpus];

  StatusTuple r(0);

  r = string_to_key(key_str, key);
  if (r.code() != 0)
    return r;

  if (value_str.size() != ncpus)
    return StatusTuple(-1, "bad value size");

  for (size_t i = 0; i < ncpus; i++) {
    r = string_to_leaf(value_str.at(i), value + i * desc.leaf_size);
    if (r.code() != 0)
      return r;
  }

  if (!update(key, value))
    return StatusTuple(-1, "error updating element");

  return StatusTuple(0);
}

StatusTuple BPFTable::remove_value(const std::string& key_str) {
  char key[desc.key_size];

  StatusTuple r(0);

  r = string_to_key(key_str, key);
  if (r.code() != 0)
    return r;

  if (!remove(key))
    return StatusTuple(-1, "error removing element");

  return StatusTuple(0);
}

StatusTuple BPFTable::clear_table_non_atomic() {
  if (desc.type == BPF_MAP_TYPE_HASH || desc.type == BPF_MAP_TYPE_PERCPU_HASH ||
      desc.type == BPF_MAP_TYPE_LRU_HASH ||
      desc.type == BPF_MAP_TYPE_PERCPU_HASH ||
      desc.type == BPF_MAP_TYPE_HASH_OF_MAPS) {
    // For hash maps, use the first() interface (which uses get_next_key) to
    // iterate through the map and clear elements
    auto key = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.key_size),
                                                        ::free);

    while (this->first(key.get()))
      if (!this->remove(key.get())) {
        return StatusTuple(-1,
                           "Failed to delete element when clearing table %s",
                           desc.name.c_str());
      }
  } else if (desc.type == BPF_MAP_TYPE_ARRAY ||
             desc.type == BPF_MAP_TYPE_PERCPU_ARRAY) {
    return StatusTuple(-1, "Array map %s do not support clearing elements",
                       desc.name.c_str());
  } else if (desc.type == BPF_MAP_TYPE_PROG_ARRAY ||
             desc.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
             desc.type == BPF_MAP_TYPE_STACK_TRACE ||
             desc.type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
    // For Stack-trace and FD arrays, just iterate over all indices
    for (size_t i = 0; i < desc.max_entries; i++) {
      this->remove(&i);
    }
  } else {
    return StatusTuple(-1, "Clearing for map type of %s not supported yet",
                       desc.name.c_str());
  }

  return StatusTuple(0);
}

StatusTuple BPFTable::get_table_offline(
  std::vector<std::pair<std::string, std::string>> &res) {
  StatusTuple r(0);
  int err;

  auto key = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.key_size),
                                                      ::free);
  auto value = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.leaf_size),
                                                      ::free);
  std::string key_str;
  std::string value_str;

  if (desc.type == BPF_MAP_TYPE_ARRAY ||
      desc.type == BPF_MAP_TYPE_PROG_ARRAY ||
      desc.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
      desc.type == BPF_MAP_TYPE_PERCPU_ARRAY ||
      desc.type == BPF_MAP_TYPE_CGROUP_ARRAY ||
      desc.type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
      desc.type == BPF_MAP_TYPE_DEVMAP ||
      desc.type == BPF_MAP_TYPE_CPUMAP ||
      desc.type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
    // For arrays, just iterate over all indices
    for (size_t i = 0; i < desc.max_entries; i++) {
      err = bpf_lookup_elem(desc.fd, &i, value.get());
      if (err < 0 && errno == ENOENT) {
        // Element is not present, skip it
        continue;
      } else if (err < 0) {
        // Other error, abort
        return StatusTuple(-1, "Error looking up value: %s", std::strerror(errno));
      }

      r = key_to_string(&i, key_str);
      if (r.code() != 0)
        return r;

      r = leaf_to_string(value.get(), value_str);
      if (r.code() != 0)
        return r;
      res.emplace_back(key_str, value_str);
    }
  } else {
    res.clear();
    // For other maps, try to use the first() and next() interfaces
    if (!this->first(key.get()))
      return StatusTuple(0);

    while (true) {
      if (!this->lookup(key.get(), value.get()))
        break;
      r = key_to_string(key.get(), key_str);
      if (r.code() != 0)
        return r;

      r = leaf_to_string(value.get(), value_str);
      if (r.code() != 0)
        return r;
      res.emplace_back(key_str, value_str);
      if (!this->next(key.get(), key.get()))
        break;
    }
  }

  return StatusTuple(0);
}

size_t BPFTable::get_possible_cpu_count() { return get_possible_cpus().size(); }

BPFStackTable::BPFStackTable(const TableDesc& desc, bool use_debug_file,
                             bool check_debug_file_crc)
    : BPFTableBase<int, stacktrace_t>(desc) {
  if (desc.type != BPF_MAP_TYPE_STACK_TRACE)
    throw std::invalid_argument("Table '" + desc.name +
                                "' is not a stack table");

  symbol_option_ = {.use_debug_file = use_debug_file,
                    .check_debug_file_crc = check_debug_file_crc,
                    .use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC)};
}

BPFStackTable::BPFStackTable(BPFStackTable&& that)
    : BPFTableBase<int, stacktrace_t>(that.desc),
      symbol_option_(std::move(that.symbol_option_)),
      pid_sym_(std::move(that.pid_sym_)) {
  that.pid_sym_.clear();
}

BPFStackTable::~BPFStackTable() {
  for (auto it : pid_sym_)
    bcc_free_symcache(it.second, it.first);
}

void BPFStackTable::clear_table_non_atomic() {
  for (int i = 0; size_t(i) < capacity(); i++) {
    remove(&i);
  }
}

std::vector<uintptr_t> BPFStackTable::get_stack_addr(int stack_id) {
  std::vector<uintptr_t> res;
  stacktrace_t stack;
  if (stack_id < 0)
    return res;
  if (!lookup(&stack_id, &stack))
    return res;
  for (int i = 0; (i < BPF_MAX_STACK_DEPTH) && (stack.ip[i] != 0); i++)
    res.push_back(stack.ip[i]);
  return res;
}

std::vector<std::string> BPFStackTable::get_stack_symbol(int stack_id,
                                                         int pid) {
  auto addresses = get_stack_addr(stack_id);
  std::vector<std::string> res;
  if (addresses.empty())
    return res;
  res.reserve(addresses.size());

  if (pid < 0)
    pid = -1;
  if (pid_sym_.find(pid) == pid_sym_.end())
    pid_sym_[pid] = bcc_symcache_new(pid, &symbol_option_);
  void* cache = pid_sym_[pid];

  bcc_symbol symbol;
  for (auto addr : addresses)
    if (bcc_symcache_resolve(cache, addr, &symbol) != 0)
      res.emplace_back("[UNKNOWN]");
    else {
      res.push_back(symbol.demangle_name);
      bcc_symbol_free_demangle_name(&symbol);
    }

  return res;
}

BPFPerfBuffer::BPFPerfBuffer(const TableDesc& desc)
    : BPFTableBase<int, int>(desc), epfd_(-1) {
  if (desc.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
    throw std::invalid_argument("Table '" + desc.name +
                                "' is not a perf buffer");
}

StatusTuple BPFPerfBuffer::open_on_cpu(perf_reader_raw_cb cb,
                                       perf_reader_lost_cb lost_cb, int cpu,
                                       void* cb_cookie, int page_cnt) {
  if (cpu_readers_.find(cpu) != cpu_readers_.end())
    return StatusTuple(-1, "Perf buffer already open on CPU %d", cpu);

  auto reader = static_cast<perf_reader*>(
      bpf_open_perf_buffer(cb, lost_cb, cb_cookie, -1, cpu, page_cnt));
  if (reader == nullptr)
    return StatusTuple(-1, "Unable to construct perf reader");

  int reader_fd = perf_reader_fd(reader);
  if (!update(&cpu, &reader_fd)) {
    perf_reader_free(static_cast<void*>(reader));
    return StatusTuple(-1, "Unable to open perf buffer on CPU %d: %s", cpu,
                       std::strerror(errno));
  }

  struct epoll_event event = {};
  event.events = EPOLLIN;
  event.data.ptr = static_cast<void*>(reader);
  if (epoll_ctl(epfd_, EPOLL_CTL_ADD, reader_fd, &event) != 0) {
    perf_reader_free(static_cast<void*>(reader));
    return StatusTuple(-1, "Unable to add perf_reader FD to epoll: %s",
                       std::strerror(errno));
  }

  cpu_readers_[cpu] = reader;
  return StatusTuple(0);
}

StatusTuple BPFPerfBuffer::open_all_cpu(perf_reader_raw_cb cb,
                                        perf_reader_lost_cb lost_cb,
                                        void* cb_cookie, int page_cnt) {
  if (cpu_readers_.size() != 0 || epfd_ != -1)
    return StatusTuple(-1, "Previously opened perf buffer not cleaned");

  std::vector<int> cpus = get_online_cpus();
  ep_events_.reset(new epoll_event[cpus.size()]);
  epfd_ = epoll_create1(EPOLL_CLOEXEC);

  for (int i : cpus) {
    auto res = open_on_cpu(cb, lost_cb, i, cb_cookie, page_cnt);
    if (res.code() != 0) {
      TRY2(close_all_cpu());
      return res;
    }
  }
  return StatusTuple(0);
}

StatusTuple BPFPerfBuffer::close_on_cpu(int cpu) {
  auto it = cpu_readers_.find(cpu);
  if (it == cpu_readers_.end())
    return StatusTuple(0);
  perf_reader_free(static_cast<void*>(it->second));
  if (!remove(const_cast<int*>(&(it->first))))
    return StatusTuple(-1, "Unable to close perf buffer on CPU %d", it->first);
  cpu_readers_.erase(it);
  return StatusTuple(0);
}

StatusTuple BPFPerfBuffer::close_all_cpu() {
  std::string errors;
  bool has_error = false;

  if (epfd_ >= 0) {
    int close_res = close(epfd_);
    epfd_ = -1;
    ep_events_.reset();
    if (close_res != 0) {
      has_error = true;
      errors += std::string(std::strerror(errno)) + "\n";
    }
  }

  std::vector<int> opened_cpus;
  for (auto it : cpu_readers_)
    opened_cpus.push_back(it.first);
  for (int i : opened_cpus) {
    auto res = close_on_cpu(i);
    if (res.code() != 0) {
      errors += "Failed to close CPU" + std::to_string(i) + " perf buffer: ";
      errors += res.msg() + "\n";
      has_error = true;
    }
  }

  if (has_error)
    return StatusTuple(-1, errors);
  return StatusTuple(0);
}

int BPFPerfBuffer::poll(int timeout_ms) {
  if (epfd_ < 0)
    return -1;
  int cnt =
      epoll_wait(epfd_, ep_events_.get(), cpu_readers_.size(), timeout_ms);
  for (int i = 0; i < cnt; i++)
    perf_reader_event_read(static_cast<perf_reader*>(ep_events_[i].data.ptr));
  return cnt;
}

BPFPerfBuffer::~BPFPerfBuffer() {
  auto res = close_all_cpu();
  if (res.code() != 0)
    std::cerr << "Failed to close all perf buffer on destruction: " << res.msg()
              << std::endl;
}

BPFPerfEventArray::BPFPerfEventArray(const TableDesc& desc)
    : BPFTableBase<int, int>(desc) {
  if (desc.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
    throw std::invalid_argument("Table '" + desc.name +
                                "' is not a perf event array");
}

StatusTuple BPFPerfEventArray::open_all_cpu(uint32_t type, uint64_t config) {
  if (cpu_fds_.size() != 0)
    return StatusTuple(-1, "Previously opened perf event not cleaned");

  std::vector<int> cpus = get_online_cpus();

  for (int i : cpus) {
    auto res = open_on_cpu(i, type, config);
    if (res.code() != 0) {
      TRY2(close_all_cpu());
      return res;
    }
  }
  return StatusTuple(0);
}

StatusTuple BPFPerfEventArray::close_all_cpu() {
  std::string errors;
  bool has_error = false;

  std::vector<int> opened_cpus;
  for (auto it : cpu_fds_)
    opened_cpus.push_back(it.first);
  for (int i : opened_cpus) {
    auto res = close_on_cpu(i);
    if (res.code() != 0) {
      errors += "Failed to close CPU" + std::to_string(i) + " perf event: ";
      errors += res.msg() + "\n";
      has_error = true;
    }
  }

  if (has_error)
    return StatusTuple(-1, errors);
  return StatusTuple(0);
}

StatusTuple BPFPerfEventArray::open_on_cpu(int cpu, uint32_t type,
                                           uint64_t config) {
  if (cpu_fds_.find(cpu) != cpu_fds_.end())
    return StatusTuple(-1, "Perf event already open on CPU %d", cpu);
  int fd = bpf_open_perf_event(type, config, -1, cpu);
  if (fd < 0) {
    return StatusTuple(-1, "Error constructing perf event %" PRIu32 ":%" PRIu64,
                       type, config);
  }
  if (!update(&cpu, &fd)) {
    bpf_close_perf_event_fd(fd);
    return StatusTuple(-1, "Unable to open perf event on CPU %d: %s", cpu,
                       std::strerror(errno));
  }
  cpu_fds_[cpu] = fd;
  return StatusTuple(0);
}

StatusTuple BPFPerfEventArray::close_on_cpu(int cpu) {
  auto it = cpu_fds_.find(cpu);
  if (it == cpu_fds_.end()) {
    return StatusTuple(0);
  }
  bpf_close_perf_event_fd(it->second);
  cpu_fds_.erase(it);
  return StatusTuple(0);
}

BPFPerfEventArray::~BPFPerfEventArray() {
  auto res = close_all_cpu();
  if (res.code() != 0) {
    std::cerr << "Failed to close all perf buffer on destruction: " << res.msg()
              << std::endl;
  }
}

BPFProgTable::BPFProgTable(const TableDesc& desc)
    : BPFTableBase<int, int>(desc) {
  if (desc.type != BPF_MAP_TYPE_PROG_ARRAY)
    throw std::invalid_argument("Table '" + desc.name +
                                "' is not a prog table");
}

StatusTuple BPFProgTable::update_value(const int& index, const int& prog_fd) {
  if (!this->update(const_cast<int*>(&index), const_cast<int*>(&prog_fd)))
    return StatusTuple(-1, "Error updating value: %s", std::strerror(errno));
  return StatusTuple(0);
}

StatusTuple BPFProgTable::remove_value(const int& index) {
  if (!this->remove(const_cast<int*>(&index)))
    return StatusTuple(-1, "Error removing value: %s", std::strerror(errno));
  return StatusTuple(0);
}

BPFCgroupArray::BPFCgroupArray(const TableDesc& desc)
    : BPFTableBase<int, int>(desc) {
  if (desc.type != BPF_MAP_TYPE_CGROUP_ARRAY)
    throw std::invalid_argument("Table '" + desc.name +
                                "' is not a cgroup array");
}

StatusTuple BPFCgroupArray::update_value(const int& index,
                                         const int& cgroup2_fd) {
  if (!this->update(const_cast<int*>(&index), const_cast<int*>(&cgroup2_fd)))
    return StatusTuple(-1, "Error updating value: %s", std::strerror(errno));
  return StatusTuple(0);
}

StatusTuple BPFCgroupArray::update_value(const int& index,
                                         const std::string& cgroup2_path) {
  FileDesc f(::open(cgroup2_path.c_str(), O_RDONLY | O_CLOEXEC));
  if ((int)f < 0)
    return StatusTuple(-1, "Unable to open %s", cgroup2_path.c_str());
  TRY2(update_value(index, (int)f));
  return StatusTuple(0);
}

StatusTuple BPFCgroupArray::remove_value(const int& index) {
  if (!this->remove(const_cast<int*>(&index)))
    return StatusTuple(-1, "Error removing value: %s", std::strerror(errno));
  return StatusTuple(0);
}

BPFDevmapTable::BPFDevmapTable(const TableDesc& desc) 
    : BPFTableBase<int, int>(desc) {
    if(desc.type != BPF_MAP_TYPE_DEVMAP)
      throw std::invalid_argument("Table '" + desc.name + 
                                  "' is not a devmap table");
}

StatusTuple BPFDevmapTable::update_value(const int& index, 
                                         const int& value) {
    if (!this->update(const_cast<int*>(&index), const_cast<int*>(&value)))
      return StatusTuple(-1, "Error updating value: %s", std::strerror(errno));
    return StatusTuple(0);
}

StatusTuple BPFDevmapTable::get_value(const int& index, 
                                      int& value) {
    if (!this->lookup(const_cast<int*>(&index), &value))
      return StatusTuple(-1, "Error getting value: %s", std::strerror(errno));
    return StatusTuple(0);
}

StatusTuple BPFDevmapTable::remove_value(const int& index) {
    if (!this->remove(const_cast<int*>(&index)))
      return StatusTuple(-1, "Error removing value: %s", std::strerror(errno));
    return StatusTuple(0);
}

}  // namespace ebpf