/*
 * Copyright (C) 2018 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <inttypes.h>
#include <malloc.h>
#include <private/bionic_malloc.h>
#include <private/bionic_malloc_dispatch.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/system_properties.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>

#include <atomic>
#include <tuple>

#include "perfetto/base/build_config.h"
#include "perfetto/base/logging.h"
#include "perfetto/base/no_destructor.h"
#include "perfetto/base/unix_socket.h"
#include "perfetto/base/utils.h"
#include "src/profiling/memory/client.h"
#include "src/profiling/memory/proc_utils.h"
#include "src/profiling/memory/scoped_spinlock.h"
#include "src/profiling/memory/unhooked_allocator.h"
#include "src/profiling/memory/wire_protocol.h"

using perfetto::profiling::ScopedSpinlock;
using perfetto::profiling::UnhookedAllocator;

// This is so we can make an so that we can swap out with the existing
// libc_malloc_hooks.so
#ifndef HEAPPROFD_PREFIX
#define HEAPPROFD_PREFIX heapprofd
#endif

#define HEAPPROFD_ADD_PREFIX(name) \
  PERFETTO_BUILDFLAG_CAT(HEAPPROFD_PREFIX, name)

#pragma GCC visibility push(default)
extern "C" {

bool HEAPPROFD_ADD_PREFIX(_initialize)(const MallocDispatch* malloc_dispatch,
                                       bool* zygote_child,
                                       const char* options);
void HEAPPROFD_ADD_PREFIX(_finalize)();
void HEAPPROFD_ADD_PREFIX(_dump_heap)(const char* file_name);
void HEAPPROFD_ADD_PREFIX(_get_malloc_leak_info)(uint8_t** info,
                                                 size_t* overall_size,
                                                 size_t* info_size,
                                                 size_t* total_memory,
                                                 size_t* backtrace_size);
bool HEAPPROFD_ADD_PREFIX(_write_malloc_leak_info)(FILE* fp);
ssize_t HEAPPROFD_ADD_PREFIX(_malloc_backtrace)(void* pointer,
                                                uintptr_t* frames,
                                                size_t frame_count);
void HEAPPROFD_ADD_PREFIX(_free_malloc_leak_info)(uint8_t* info);
size_t HEAPPROFD_ADD_PREFIX(_malloc_usable_size)(void* pointer);
void* HEAPPROFD_ADD_PREFIX(_malloc)(size_t size);
void HEAPPROFD_ADD_PREFIX(_free)(void* pointer);
void* HEAPPROFD_ADD_PREFIX(_aligned_alloc)(size_t alignment, size_t size);
void* HEAPPROFD_ADD_PREFIX(_memalign)(size_t alignment, size_t bytes);
void* HEAPPROFD_ADD_PREFIX(_realloc)(void* pointer, size_t bytes);
void* HEAPPROFD_ADD_PREFIX(_calloc)(size_t nmemb, size_t bytes);
struct mallinfo HEAPPROFD_ADD_PREFIX(_mallinfo)();
int HEAPPROFD_ADD_PREFIX(_mallopt)(int param, int value);
int HEAPPROFD_ADD_PREFIX(_malloc_info)(int options, FILE* fp);
int HEAPPROFD_ADD_PREFIX(_posix_memalign)(void** memptr,
                                          size_t alignment,
                                          size_t size);
int HEAPPROFD_ADD_PREFIX(_iterate)(uintptr_t base,
                                   size_t size,
                                   void (*callback)(uintptr_t base,
                                                    size_t size,
                                                    void* arg),
                                   void* arg);
void HEAPPROFD_ADD_PREFIX(_malloc_disable)();
void HEAPPROFD_ADD_PREFIX(_malloc_enable)();

#if defined(HAVE_DEPRECATED_MALLOC_FUNCS)
void* HEAPPROFD_ADD_PREFIX(_pvalloc)(size_t bytes);
void* HEAPPROFD_ADD_PREFIX(_valloc)(size_t size);
#endif
}
#pragma GCC visibility pop

namespace {

// The real malloc function pointers we get in initialize. Set once in the first
// initialize invocation, and never changed afterwards. Because bionic does a
// release write after initialization and an acquire read to retrieve the hooked
// malloc functions, we can use relaxed memory mode for both writing and
// reading.
std::atomic<const MallocDispatch*> g_dispatch{nullptr};

// Holds the active profiling client. Is empty at the start, or after we've
// started shutting down a profiling session. Hook invocations take shared_ptr
// copies (ensuring that the client stays alive until no longer needed), and do
// nothing if this master pointer is empty.
//
// This shared_ptr itself is protected by g_client_lock. Note that shared_ptr
// handles are not thread-safe by themselves:
// https://en.cppreference.com/w/cpp/memory/shared_ptr/atomic
//
// To avoid on-destruction re-entrancy issues, this shared_ptr needs to be
// constructed with an allocator that uses the unhooked malloc & free functions.
// See UnhookedAllocator.
//
// NoDestructor<> wrapper is used to avoid destructing the shared_ptr at program
// exit. The rationale is:
// * Avoiding the atexit destructor racing against other threads that are
//   possibly running within the hooks.
// * Making sure that atexit handlers running after this global's destructor
//   can still safely enter the hooks.
perfetto::base::NoDestructor<std::shared_ptr<perfetto::profiling::Client>>
    g_client;

// Protects g_client, and serves as an external lock for sampling decisions (see
// perfetto::profiling::Sampler).
//
// We rely on this atomic's destuction being a nop, as it is possible for the
// hooks to attempt to acquire the spinlock after its destructor should have run
// (technically a use-after-destruct scenario).
std::atomic<bool> g_client_lock{false};

constexpr char kHeapprofdBinPath[] = "/system/bin/heapprofd";

const MallocDispatch* GetDispatch() {
  return g_dispatch.load(std::memory_order_relaxed);
}

int CloneWithoutSigchld() {
  return clone(nullptr, nullptr, 0, nullptr);
}

int ForklikeClone() {
  return clone(nullptr, nullptr, SIGCHLD, nullptr);
}

// Like daemon(), but using clone to avoid invoking pthread_atfork(3) handlers.
int Daemonize() {
  switch (ForklikeClone()) {
    case -1:
      PERFETTO_PLOG("Daemonize.clone");
      return -1;
      break;
    case 0:
      break;
    default:
      _exit(0);
      break;
  }
  if (setsid() == -1) {
    PERFETTO_PLOG("Daemonize.setsid");
    return -1;
  }
  // best effort chdir & fd close
  chdir("/");
  int fd = open("/dev/null", O_RDWR, 0);
  if (fd != -1) {
    dup2(fd, STDIN_FILENO);
    dup2(fd, STDOUT_FILENO);
    dup2(fd, STDERR_FILENO);
    if (fd > STDERR_FILENO)
      close(fd);
  }
  return 0;
}

// Called only if |g_client_lock| acquisition fails, which shouldn't happen
// unless we're in a completely unexpected state (which we won't know how to
// recover from). Tries to abort (SIGABRT) the whole process to serve as an
// explicit indication of a bug.
//
// Doesn't use PERFETTO_FATAL as that is a single attempt to self-signal (in
// practice - SIGTRAP), while abort() tries to make sure the process has
// exited one way or another.
__attribute__((noreturn, noinline)) void AbortOnSpinlockTimeout() {
  PERFETTO_ELOG(
      "Timed out on the spinlock - something is horribly wrong. "
      "Aborting whole process.");
  abort();
}

std::string ReadSystemProperty(const char* key) {
  std::string prop_value;
  const prop_info* prop = __system_property_find(key);
  if (!prop) {
    return prop_value;  // empty
  }
  __system_property_read_callback(
      prop,
      [](void* cookie, const char* name, const char* value, uint32_t) {
        std::string* prop_value = reinterpret_cast<std::string*>(cookie);
        *prop_value = value;
      },
      &prop_value);
  return prop_value;
}

bool ShouldForkPrivateDaemon() {
  std::string build_type = ReadSystemProperty("ro.build.type");
  if (build_type.empty()) {
    PERFETTO_ELOG(
        "Cannot determine platform build type, proceeding in fork mode "
        "profiling.");
    return true;
  }

  // On development builds, we support both modes of profiling, depending on a
  // system property.
  if (build_type == "userdebug" || build_type == "eng") {
    // Note: if renaming the property, also update system_property.cc
    std::string mode = ReadSystemProperty("heapprofd.userdebug.mode");
    return mode == "fork";
  }

  // User/other builds - always fork private profiler.
  return true;
}

std::shared_ptr<perfetto::profiling::Client> CreateClientForCentralDaemon(
    UnhookedAllocator<perfetto::profiling::Client> unhooked_allocator) {
  PERFETTO_DLOG("Constructing client for central daemon.");
  using perfetto::profiling::Client;

  perfetto::base::Optional<perfetto::base::UnixSocketRaw> sock =
      Client::ConnectToHeapprofd(perfetto::profiling::kHeapprofdSocketFile);
  if (!sock)
    return nullptr;
  return Client::CreateAndHandshake(std::move(sock.value()),
                                    unhooked_allocator);
}

std::shared_ptr<perfetto::profiling::Client> CreateClientAndPrivateDaemon(
    UnhookedAllocator<perfetto::profiling::Client> unhooked_allocator) {
  PERFETTO_DLOG("Setting up fork mode profiling.");
  perfetto::base::UnixSocketRaw parent_sock;
  perfetto::base::UnixSocketRaw child_sock;
  std::tie(parent_sock, child_sock) = perfetto::base::UnixSocketRaw::CreatePair(
      perfetto::base::SockType::kStream);

  if (!parent_sock || !child_sock) {
    PERFETTO_PLOG("Failed to create socketpair.");
    return nullptr;
  }

  child_sock.RetainOnExec();

  // Record own pid and cmdline, to pass down to the forked heapprofd.
  pid_t target_pid = getpid();
  std::string target_cmdline;
  if (!perfetto::profiling::GetCmdlineForPID(target_pid, &target_cmdline)) {
    target_cmdline = "failed-to-read-cmdline";
    PERFETTO_ELOG(
        "Failed to read own cmdline, proceeding as this might be a by-pid "
        "profiling request (which will still work).");
  }

  // Prepare arguments for heapprofd.
  std::string pid_arg =
      std::string("--exclusive-for-pid=") + std::to_string(target_pid);
  std::string cmd_arg =
      std::string("--exclusive-for-cmdline=") + target_cmdline;
  std::string fd_arg =
      std::string("--inherit-socket-fd=") + std::to_string(child_sock.fd());
  const char* argv[] = {kHeapprofdBinPath, pid_arg.c_str(), cmd_arg.c_str(),
                        fd_arg.c_str(), nullptr};

  // Use fork-like clone to avoid invoking the host's pthread_atfork(3)
  // handlers. Also avoid sending the current process a SIGCHILD to further
  // reduce our interference.
  pid_t clone_pid = CloneWithoutSigchld();
  if (clone_pid == -1) {
    PERFETTO_PLOG("Failed to clone.");
    return nullptr;
  }
  if (clone_pid == 0) {  // child
    // Daemonize clones again, terminating the calling thread (i.e. the direct
    // child of the original process). So the rest of this codepath will be
    // executed in a new reparented process.
    if (Daemonize() == -1) {
      PERFETTO_PLOG("Daemonization failed.");
      _exit(1);
    }
    execv(kHeapprofdBinPath, const_cast<char**>(argv));
    PERFETTO_PLOG("Failed to execute private heapprofd.");
    _exit(1);
  }  // else - parent continuing the client setup

  child_sock.ReleaseFd().reset();  // close child socket's fd
  if (!parent_sock.SetTxTimeout(perfetto::profiling::kClientSockTimeoutMs)) {
    PERFETTO_PLOG("Failed to set socket transmit timeout.");
    return nullptr;
  }

  if (!parent_sock.SetRxTimeout(perfetto::profiling::kClientSockTimeoutMs)) {
    PERFETTO_PLOG("Failed to set socket receive timeout.");
    return nullptr;
  }

  // Wait on the immediate child to exit (allow for ECHILD in the unlikely case
  // we're in a process that has made its children unwaitable).
  int unused = 0;
  if (PERFETTO_EINTR(waitpid(clone_pid, &unused, __WCLONE)) == -1 &&
      errno != ECHILD) {
    PERFETTO_PLOG("Failed to waitpid on immediate child.");
    return nullptr;
  }

  return perfetto::profiling::Client::CreateAndHandshake(std::move(parent_sock),
                                                         unhooked_allocator);
}

// Note: android_mallopt(M_RESET_HOOKS) is mutually exclusive with
// heapprofd_initialize. Concurrent calls get discarded, which might be our
// unpatching attempt if there is a concurrent re-initialization running due to
// a new signal.
//
// Note: g_client can be reset by heapprofd_initialize without calling this
// function.
void ShutdownLazy() {
  ScopedSpinlock s(&g_client_lock, ScopedSpinlock::Mode::Try);
  if (PERFETTO_UNLIKELY(!s.locked()))
    AbortOnSpinlockTimeout();

  if (!g_client.ref())  // other invocation already initiated shutdown
    return;

  // Clear primary shared pointer, such that later hook invocations become nops.
  g_client.ref().reset();

  if (!android_mallopt(M_RESET_HOOKS, nullptr, 0))
    PERFETTO_PLOG("Unpatching heapprofd hooks failed.");
}

}  // namespace

// Setup for the rest of profiling. The first time profiling is triggered in a
// process, this is called after this client library is dlopened, but before the
// rest of the hooks are patched in. However, as we support multiple profiling
// sessions within a process' lifetime, this function can also be legitimately
// called any number of times afterwards (note: bionic guarantees that at most
// one initialize call is active at a time).
//
// Note: if profiling is triggered at runtime, this runs on a dedicated pthread
// (which is safe to block). If profiling is triggered at startup, then this
// code runs synchronously.
bool HEAPPROFD_ADD_PREFIX(_initialize)(const MallocDispatch* malloc_dispatch,
                                       bool*,
                                       const char*) {
  using ::perfetto::profiling::Client;

  // Table of pointers to backing implementation.
  g_dispatch.store(malloc_dispatch, std::memory_order_relaxed);

  // TODO(fmayer): Check other destructions of client and make a decision
  // whether we want to ban heap objects in the client or not.
  std::shared_ptr<Client> old_client;
  {
    ScopedSpinlock s(&g_client_lock, ScopedSpinlock::Mode::Try);
    if (PERFETTO_UNLIKELY(!s.locked()))
      AbortOnSpinlockTimeout();

    if (g_client.ref()) {
      PERFETTO_LOG("Rejecting concurrent profiling initialization.");
      return true;  // success as we're in a valid state
    }
    old_client = g_client.ref();
    g_client.ref().reset();
  }

  old_client.reset();

  // The dispatch table never changes, so let the custom allocator retain the
  // function pointers directly.
  UnhookedAllocator<Client> unhooked_allocator(malloc_dispatch->malloc,
                                               malloc_dispatch->free);

  // These factory functions use heap objects, so we need to run them without
  // the spinlock held.
  std::shared_ptr<Client> client =
      ShouldForkPrivateDaemon()
          ? CreateClientAndPrivateDaemon(unhooked_allocator)
          : CreateClientForCentralDaemon(unhooked_allocator);

  if (!client) {
    PERFETTO_LOG("heapprofd_client not initialized, not installing hooks.");
    return false;
  }
  PERFETTO_LOG("heapprofd_client initialized.");
  {
    ScopedSpinlock s(&g_client_lock, ScopedSpinlock::Mode::Try);
    if (PERFETTO_UNLIKELY(!s.locked()))
      AbortOnSpinlockTimeout();

    // This cannot have been set in the meantime. There are never two concurrent
    // calls to this function, as Bionic uses atomics to guard against that.
    PERFETTO_DCHECK(g_client.ref() == nullptr);
    g_client.ref() = std::move(client);
  }
  return true;
}

void HEAPPROFD_ADD_PREFIX(_finalize)() {
  // At the time of writing, invoked only as an atexit handler. We don't have
  // any specific action to take, and cleanup can be left to the OS.
}

// Decides whether an allocation with the given address and size needs to be
// sampled, and if so, records it. Performs the necessary synchronization (holds
// |g_client_lock| spinlock) while accessing the shared sampler, and obtaining a
// profiling client handle (shared_ptr).
//
// If the allocation is to be sampled, the recording is done without holding
// |g_client_lock|. The client handle is guaranteed to not be invalidated while
// the allocation is being recorded.
//
// If the attempt to record the allocation fails, initiates lazy shutdown of the
// client & hooks.
static void MaybeSampleAllocation(size_t size, void* addr) {
  size_t sampled_alloc_sz = 0;
  std::shared_ptr<perfetto::profiling::Client> client;
  {
    ScopedSpinlock s(&g_client_lock, ScopedSpinlock::Mode::Try);
    if (PERFETTO_UNLIKELY(!s.locked()))
      AbortOnSpinlockTimeout();

    if (!g_client.ref())  // no active client (most likely shutting down)
      return;

    sampled_alloc_sz = g_client.ref()->GetSampleSizeLocked(size);
    if (sampled_alloc_sz == 0)  // not sampling
      return;

    client = g_client.ref();  // owning copy
  }                           // unlock

  if (!client->RecordMalloc(size, sampled_alloc_sz,
                            reinterpret_cast<uint64_t>(addr))) {
    ShutdownLazy();
  }
}

void* HEAPPROFD_ADD_PREFIX(_malloc)(size_t size) {
  const MallocDispatch* dispatch = GetDispatch();
  void* addr = dispatch->malloc(size);
  MaybeSampleAllocation(size, addr);
  return addr;
}

void* HEAPPROFD_ADD_PREFIX(_calloc)(size_t nmemb, size_t size) {
  const MallocDispatch* dispatch = GetDispatch();
  void* addr = dispatch->calloc(nmemb, size);
  MaybeSampleAllocation(size, addr);
  return addr;
}

void* HEAPPROFD_ADD_PREFIX(_aligned_alloc)(size_t alignment, size_t size) {
  const MallocDispatch* dispatch = GetDispatch();
  void* addr = dispatch->aligned_alloc(alignment, size);
  MaybeSampleAllocation(size, addr);
  return addr;
}

void* HEAPPROFD_ADD_PREFIX(_memalign)(size_t alignment, size_t size) {
  const MallocDispatch* dispatch = GetDispatch();
  void* addr = dispatch->memalign(alignment, size);
  MaybeSampleAllocation(size, addr);
  return addr;
}

int HEAPPROFD_ADD_PREFIX(_posix_memalign)(void** memptr,
                                          size_t alignment,
                                          size_t size) {
  const MallocDispatch* dispatch = GetDispatch();
  int res = dispatch->posix_memalign(memptr, alignment, size);
  if (res != 0)
    return res;

  MaybeSampleAllocation(size, *memptr);
  return 0;
}

// Note: we record the free before calling the backing implementation to make
// sure that the address is not reused before we've processed the deallocation
// (which includes assigning a sequence id to it).
void HEAPPROFD_ADD_PREFIX(_free)(void* pointer) {
  const MallocDispatch* dispatch = GetDispatch();
  std::shared_ptr<perfetto::profiling::Client> client;
  {
    ScopedSpinlock s(&g_client_lock, ScopedSpinlock::Mode::Try);
    if (PERFETTO_UNLIKELY(!s.locked()))
      AbortOnSpinlockTimeout();

    client = g_client.ref();  // owning copy (or empty)
  }

  if (client) {
    if (!client->RecordFree(reinterpret_cast<uint64_t>(pointer)))
      ShutdownLazy();
  }
  return dispatch->free(pointer);
}

// Approach to recording realloc: under the initial lock, get a safe copy of the
// client, and make the sampling decision in advance. Then record the
// deallocation, call the real realloc, and finally record the sample if one is
// necessary.
//
// As with the free, we record the deallocation before calling the backing
// implementation to make sure the address is still exclusive while we're
// processing it.
void* HEAPPROFD_ADD_PREFIX(_realloc)(void* pointer, size_t size) {
  const MallocDispatch* dispatch = GetDispatch();

  size_t sampled_alloc_sz = 0;
  std::shared_ptr<perfetto::profiling::Client> client;
  {
    ScopedSpinlock s(&g_client_lock, ScopedSpinlock::Mode::Try);
    if (PERFETTO_UNLIKELY(!s.locked()))
      AbortOnSpinlockTimeout();

    // If there is no active client, we still want to reach the backing realloc,
    // so keep going.
    if (g_client.ref()) {
      client = g_client.ref();  // owning copy
      sampled_alloc_sz = g_client.ref()->GetSampleSizeLocked(size);
    }
  }  // unlock

  if (client && pointer) {
    if (!client->RecordFree(reinterpret_cast<uint64_t>(pointer)))
      ShutdownLazy();
  }
  void* addr = dispatch->realloc(pointer, size);

  if (size == 0 || sampled_alloc_sz == 0)
    return addr;

  if (!client->RecordMalloc(size, sampled_alloc_sz,
                            reinterpret_cast<uint64_t>(addr))) {
    ShutdownLazy();
  }
  return addr;
}

void HEAPPROFD_ADD_PREFIX(_dump_heap)(const char*) {}

void HEAPPROFD_ADD_PREFIX(
    _get_malloc_leak_info)(uint8_t**, size_t*, size_t*, size_t*, size_t*) {}

bool HEAPPROFD_ADD_PREFIX(_write_malloc_leak_info)(FILE*) {
  return false;
}

ssize_t HEAPPROFD_ADD_PREFIX(_malloc_backtrace)(void*, uintptr_t*, size_t) {
  return -1;
}

void HEAPPROFD_ADD_PREFIX(_free_malloc_leak_info)(uint8_t*) {}

size_t HEAPPROFD_ADD_PREFIX(_malloc_usable_size)(void* pointer) {
  const MallocDispatch* dispatch = GetDispatch();
  return dispatch->malloc_usable_size(pointer);
}

struct mallinfo HEAPPROFD_ADD_PREFIX(_mallinfo)() {
  const MallocDispatch* dispatch = GetDispatch();
  return dispatch->mallinfo();
}

int HEAPPROFD_ADD_PREFIX(_mallopt)(int param, int value) {
  const MallocDispatch* dispatch = GetDispatch();
  return dispatch->mallopt(param, value);
}

int HEAPPROFD_ADD_PREFIX(_malloc_info)(int options, FILE* fp) {
  const MallocDispatch* dispatch = GetDispatch();
  return dispatch->malloc_info(options, fp);
}

int HEAPPROFD_ADD_PREFIX(_iterate)(uintptr_t,
                                   size_t,
                                   void (*)(uintptr_t base,
                                            size_t size,
                                            void* arg),
                                   void*) {
  return 0;
}

void HEAPPROFD_ADD_PREFIX(_malloc_disable)() {
  const MallocDispatch* dispatch = GetDispatch();
  return dispatch->malloc_disable();
}

void HEAPPROFD_ADD_PREFIX(_malloc_enable)() {
  const MallocDispatch* dispatch = GetDispatch();
  return dispatch->malloc_enable();
}

#if defined(HAVE_DEPRECATED_MALLOC_FUNCS)
void* HEAPPROFD_ADD_PREFIX(_pvalloc)(size_t size) {
  const MallocDispatch* dispatch = GetDispatch();
  return dispatch->pvalloc(size);
}

void* HEAPPROFD_ADD_PREFIX(_valloc)(size_t size) {
  const MallocDispatch* dispatch = GetDispatch();
  return dispatch->valloc(size);
}

#endif