// Copyright 2009 The Go Authors.  All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package gosym implements access to the Go symbol
// and line number tables embedded in Go binaries generated
// by the gc compilers.
package gosym

// The table format is a variant of the format used in Plan 9's a.out
// format, documented at http://plan9.bell-labs.com/magic/man2html/6/a.out.
// The best reference for the differences between the Plan 9 format
// and the Go format is the runtime source, specifically ../../runtime/symtab.c.

import (
	"bytes"
	"encoding/binary"
	"fmt"
	"strconv"
	"strings"
)

/*
 * Symbols
 */

// A Sym represents a single symbol table entry.
type Sym struct {
	Value  uint64
	Type   byte
	Name   string
	GoType uint64
	// If this symbol is a function symbol, the corresponding Func
	Func *Func
}

// Static reports whether this symbol is static (not visible outside its file).
func (s *Sym) Static() bool { return s.Type >= 'a' }

// PackageName returns the package part of the symbol name,
// or the empty string if there is none.
func (s *Sym) PackageName() string {
	if i := strings.Index(s.Name, "."); i != -1 {
		return s.Name[0:i]
	}
	return ""
}

// ReceiverName returns the receiver type name of this symbol,
// or the empty string if there is none.
func (s *Sym) ReceiverName() string {
	l := strings.Index(s.Name, ".")
	r := strings.LastIndex(s.Name, ".")
	if l == -1 || r == -1 || l == r {
		return ""
	}
	return s.Name[l+1 : r]
}

// BaseName returns the symbol name without the package or receiver name.
func (s *Sym) BaseName() string {
	if i := strings.LastIndex(s.Name, "."); i != -1 {
		return s.Name[i+1:]
	}
	return s.Name
}

// A Func collects information about a single function.
type Func struct {
	Entry uint64
	*Sym
	End       uint64
	Params    []*Sym
	Locals    []*Sym
	FrameSize int
	LineTable *LineTable
	Obj       *Obj
}

// An Obj represents a collection of functions in a symbol table.
//
// The exact method of division of a binary into separate Objs is an internal detail
// of the symbol table format.
//
// In early versions of Go each source file became a different Obj.
//
// In Go 1 and Go 1.1, each package produced one Obj for all Go sources
// and one Obj per C source file.
//
// In Go 1.2, there is a single Obj for the entire program.
type Obj struct {
	// Funcs is a list of functions in the Obj.
	Funcs []Func

	// In Go 1.1 and earlier, Paths is a list of symbols corresponding
	// to the source file names that produced the Obj.
	// In Go 1.2, Paths is nil.
	// Use the keys of Table.Files to obtain a list of source files.
	Paths []Sym // meta
}

/*
 * Symbol tables
 */

// Table represents a Go symbol table.  It stores all of the
// symbols decoded from the program and provides methods to translate
// between symbols, names, and addresses.
type Table struct {
	Syms  []Sym
	Funcs []Func
	Files map[string]*Obj // nil for Go 1.2 and later binaries
	Objs  []Obj           // nil for Go 1.2 and later binaries

	go12line *LineTable // Go 1.2 line number table
}

type sym struct {
	value  uint64
	gotype uint64
	typ    byte
	name   []byte
}

var (
	littleEndianSymtab    = []byte{0xFD, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00}
	bigEndianSymtab       = []byte{0xFF, 0xFF, 0xFF, 0xFD, 0x00, 0x00, 0x00}
	oldLittleEndianSymtab = []byte{0xFE, 0xFF, 0xFF, 0xFF, 0x00, 0x00}
)

func walksymtab(data []byte, fn func(sym) error) error {
	if len(data) == 0 { // missing symtab is okay
		return nil
	}
	var order binary.ByteOrder = binary.BigEndian
	newTable := false
	switch {
	case bytes.HasPrefix(data, oldLittleEndianSymtab):
		// Same as Go 1.0, but little endian.
		// Format was used during interim development between Go 1.0 and Go 1.1.
		// Should not be widespread, but easy to support.
		data = data[6:]
		order = binary.LittleEndian
	case bytes.HasPrefix(data, bigEndianSymtab):
		newTable = true
	case bytes.HasPrefix(data, littleEndianSymtab):
		newTable = true
		order = binary.LittleEndian
	}
	var ptrsz int
	if newTable {
		if len(data) < 8 {
			return &DecodingError{len(data), "unexpected EOF", nil}
		}
		ptrsz = int(data[7])
		if ptrsz != 4 && ptrsz != 8 {
			return &DecodingError{7, "invalid pointer size", ptrsz}
		}
		data = data[8:]
	}
	var s sym
	p := data
	for len(p) >= 4 {
		var typ byte
		if newTable {
			// Symbol type, value, Go type.
			typ = p[0] & 0x3F
			wideValue := p[0]&0x40 != 0
			goType := p[0]&0x80 != 0
			if typ < 26 {
				typ += 'A'
			} else {
				typ += 'a' - 26
			}
			s.typ = typ
			p = p[1:]
			if wideValue {
				if len(p) < ptrsz {
					return &DecodingError{len(data), "unexpected EOF", nil}
				}
				// fixed-width value
				if ptrsz == 8 {
					s.value = order.Uint64(p[0:8])
					p = p[8:]
				} else {
					s.value = uint64(order.Uint32(p[0:4]))
					p = p[4:]
				}
			} else {
				// varint value
				s.value = 0
				shift := uint(0)
				for len(p) > 0 && p[0]&0x80 != 0 {
					s.value |= uint64(p[0]&0x7F) << shift
					shift += 7
					p = p[1:]
				}
				if len(p) == 0 {
					return &DecodingError{len(data), "unexpected EOF", nil}
				}
				s.value |= uint64(p[0]) << shift
				p = p[1:]
			}
			if goType {
				if len(p) < ptrsz {
					return &DecodingError{len(data), "unexpected EOF", nil}
				}
				// fixed-width go type
				if ptrsz == 8 {
					s.gotype = order.Uint64(p[0:8])
					p = p[8:]
				} else {
					s.gotype = uint64(order.Uint32(p[0:4]))
					p = p[4:]
				}
			}
		} else {
			// Value, symbol type.
			s.value = uint64(order.Uint32(p[0:4]))
			if len(p) < 5 {
				return &DecodingError{len(data), "unexpected EOF", nil}
			}
			typ = p[4]
			if typ&0x80 == 0 {
				return &DecodingError{len(data) - len(p) + 4, "bad symbol type", typ}
			}
			typ &^= 0x80
			s.typ = typ
			p = p[5:]
		}

		// Name.
		var i int
		var nnul int
		for i = 0; i < len(p); i++ {
			if p[i] == 0 {
				nnul = 1
				break
			}
		}
		switch typ {
		case 'z', 'Z':
			p = p[i+nnul:]
			for i = 0; i+2 <= len(p); i += 2 {
				if p[i] == 0 && p[i+1] == 0 {
					nnul = 2
					break
				}
			}
		}
		if len(p) < i+nnul {
			return &DecodingError{len(data), "unexpected EOF", nil}
		}
		s.name = p[0:i]
		i += nnul
		p = p[i:]

		if !newTable {
			if len(p) < 4 {
				return &DecodingError{len(data), "unexpected EOF", nil}
			}
			// Go type.
			s.gotype = uint64(order.Uint32(p[:4]))
			p = p[4:]
		}
		fn(s)
	}
	return nil
}

// NewTable decodes the Go symbol table in data,
// returning an in-memory representation.
func NewTable(symtab []byte, pcln *LineTable) (*Table, error) {
	var n int
	err := walksymtab(symtab, func(s sym) error {
		n++
		return nil
	})
	if err != nil {
		return nil, err
	}

	var t Table
	if pcln.isGo12() {
		t.go12line = pcln
	}
	fname := make(map[uint16]string)
	t.Syms = make([]Sym, 0, n)
	nf := 0
	nz := 0
	lasttyp := uint8(0)
	err = walksymtab(symtab, func(s sym) error {
		n := len(t.Syms)
		t.Syms = t.Syms[0 : n+1]
		ts := &t.Syms[n]
		ts.Type = s.typ
		ts.Value = uint64(s.value)
		ts.GoType = uint64(s.gotype)
		switch s.typ {
		default:
			// rewrite name to use . instead of · (c2 b7)
			w := 0
			b := s.name
			for i := 0; i < len(b); i++ {
				if b[i] == 0xc2 && i+1 < len(b) && b[i+1] == 0xb7 {
					i++
					b[i] = '.'
				}
				b[w] = b[i]
				w++
			}
			ts.Name = string(s.name[0:w])
		case 'z', 'Z':
			if lasttyp != 'z' && lasttyp != 'Z' {
				nz++
			}
			for i := 0; i < len(s.name); i += 2 {
				eltIdx := binary.BigEndian.Uint16(s.name[i : i+2])
				elt, ok := fname[eltIdx]
				if !ok {
					return &DecodingError{-1, "bad filename code", eltIdx}
				}
				if n := len(ts.Name); n > 0 && ts.Name[n-1] != '/' {
					ts.Name += "/"
				}
				ts.Name += elt
			}
		}
		switch s.typ {
		case 'T', 't', 'L', 'l':
			nf++
		case 'f':
			fname[uint16(s.value)] = ts.Name
		}
		lasttyp = s.typ
		return nil
	})
	if err != nil {
		return nil, err
	}

	t.Funcs = make([]Func, 0, nf)
	t.Files = make(map[string]*Obj)

	var obj *Obj
	if t.go12line != nil {
		// Put all functions into one Obj.
		t.Objs = make([]Obj, 1)
		obj = &t.Objs[0]
		t.go12line.go12MapFiles(t.Files, obj)
	} else {
		t.Objs = make([]Obj, 0, nz)
	}

	// Count text symbols and attach frame sizes, parameters, and
	// locals to them.  Also, find object file boundaries.
	lastf := 0
	for i := 0; i < len(t.Syms); i++ {
		sym := &t.Syms[i]
		switch sym.Type {
		case 'Z', 'z': // path symbol
			if t.go12line != nil {
				// Go 1.2 binaries have the file information elsewhere. Ignore.
				break
			}
			// Finish the current object
			if obj != nil {
				obj.Funcs = t.Funcs[lastf:]
			}
			lastf = len(t.Funcs)

			// Start new object
			n := len(t.Objs)
			t.Objs = t.Objs[0 : n+1]
			obj = &t.Objs[n]

			// Count & copy path symbols
			var end int
			for end = i + 1; end < len(t.Syms); end++ {
				if c := t.Syms[end].Type; c != 'Z' && c != 'z' {
					break
				}
			}
			obj.Paths = t.Syms[i:end]
			i = end - 1 // loop will i++

			// Record file names
			depth := 0
			for j := range obj.Paths {
				s := &obj.Paths[j]
				if s.Name == "" {
					depth--
				} else {
					if depth == 0 {
						t.Files[s.Name] = obj
					}
					depth++
				}
			}

		case 'T', 't', 'L', 'l': // text symbol
			if n := len(t.Funcs); n > 0 {
				t.Funcs[n-1].End = sym.Value
			}
			if sym.Name == "runtime.etext" || sym.Name == "etext" {
				continue
			}

			// Count parameter and local (auto) syms
			var np, na int
			var end int
		countloop:
			for end = i + 1; end < len(t.Syms); end++ {
				switch t.Syms[end].Type {
				case 'T', 't', 'L', 'l', 'Z', 'z':
					break countloop
				case 'p':
					np++
				case 'a':
					na++
				}
			}

			// Fill in the function symbol
			n := len(t.Funcs)
			t.Funcs = t.Funcs[0 : n+1]
			fn := &t.Funcs[n]
			sym.Func = fn
			fn.Params = make([]*Sym, 0, np)
			fn.Locals = make([]*Sym, 0, na)
			fn.Sym = sym
			fn.Entry = sym.Value
			fn.Obj = obj
			if t.go12line != nil {
				// All functions share the same line table.
				// It knows how to narrow down to a specific
				// function quickly.
				fn.LineTable = t.go12line
			} else if pcln != nil {
				fn.LineTable = pcln.slice(fn.Entry)
				pcln = fn.LineTable
			}
			for j := i; j < end; j++ {
				s := &t.Syms[j]
				switch s.Type {
				case 'm':
					fn.FrameSize = int(s.Value)
				case 'p':
					n := len(fn.Params)
					fn.Params = fn.Params[0 : n+1]
					fn.Params[n] = s
				case 'a':
					n := len(fn.Locals)
					fn.Locals = fn.Locals[0 : n+1]
					fn.Locals[n] = s
				}
			}
			i = end - 1 // loop will i++
		}
	}

	if t.go12line != nil && nf == 0 {
		t.Funcs = t.go12line.go12Funcs()
	}
	if obj != nil {
		obj.Funcs = t.Funcs[lastf:]
	}
	return &t, nil
}

// PCToFunc returns the function containing the program counter pc,
// or nil if there is no such function.
func (t *Table) PCToFunc(pc uint64) *Func {
	funcs := t.Funcs
	for len(funcs) > 0 {
		m := len(funcs) / 2
		fn := &funcs[m]
		switch {
		case pc < fn.Entry:
			funcs = funcs[0:m]
		case fn.Entry <= pc && pc < fn.End:
			return fn
		default:
			funcs = funcs[m+1:]
		}
	}
	return nil
}

// PCToLine looks up line number information for a program counter.
// If there is no information, it returns fn == nil.
func (t *Table) PCToLine(pc uint64) (file string, line int, fn *Func) {
	if fn = t.PCToFunc(pc); fn == nil {
		return
	}
	if t.go12line != nil {
		file = t.go12line.go12PCToFile(pc)
		line = t.go12line.go12PCToLine(pc)
	} else {
		file, line = fn.Obj.lineFromAline(fn.LineTable.PCToLine(pc))
	}
	return
}

// LineToPC looks up the first program counter on the given line in
// the named file.  It returns UnknownPathError or UnknownLineError if
// there is an error looking up this line.
func (t *Table) LineToPC(file string, line int) (pc uint64, fn *Func, err error) {
	obj, ok := t.Files[file]
	if !ok {
		return 0, nil, UnknownFileError(file)
	}

	if t.go12line != nil {
		pc := t.go12line.go12LineToPC(file, line)
		if pc == 0 {
			return 0, nil, &UnknownLineError{file, line}
		}
		return pc, t.PCToFunc(pc), nil
	}

	abs, err := obj.alineFromLine(file, line)
	if err != nil {
		return
	}
	for i := range obj.Funcs {
		f := &obj.Funcs[i]
		pc := f.LineTable.LineToPC(abs, f.End)
		if pc != 0 {
			return pc, f, nil
		}
	}
	return 0, nil, &UnknownLineError{file, line}
}

// LookupSym returns the text, data, or bss symbol with the given name,
// or nil if no such symbol is found.
func (t *Table) LookupSym(name string) *Sym {
	// TODO(austin) Maybe make a map
	for i := range t.Syms {
		s := &t.Syms[i]
		switch s.Type {
		case 'T', 't', 'L', 'l', 'D', 'd', 'B', 'b':
			if s.Name == name {
				return s
			}
		}
	}
	return nil
}

// LookupFunc returns the text, data, or bss symbol with the given name,
// or nil if no such symbol is found.
func (t *Table) LookupFunc(name string) *Func {
	for i := range t.Funcs {
		f := &t.Funcs[i]
		if f.Sym.Name == name {
			return f
		}
	}
	return nil
}

// SymByAddr returns the text, data, or bss symbol starting at the given address.
func (t *Table) SymByAddr(addr uint64) *Sym {
	for i := range t.Syms {
		s := &t.Syms[i]
		switch s.Type {
		case 'T', 't', 'L', 'l', 'D', 'd', 'B', 'b':
			if s.Value == addr {
				return s
			}
		}
	}
	return nil
}

/*
 * Object files
 */

// This is legacy code for Go 1.1 and earlier, which used the
// Plan 9 format for pc-line tables. This code was never quite
// correct. It's probably very close, and it's usually correct, but
// we never quite found all the corner cases.
//
// Go 1.2 and later use a simpler format, documented at golang.org/s/go12symtab.

func (o *Obj) lineFromAline(aline int) (string, int) {
	type stackEnt struct {
		path   string
		start  int
		offset int
		prev   *stackEnt
	}

	noPath := &stackEnt{"", 0, 0, nil}
	tos := noPath

pathloop:
	for _, s := range o.Paths {
		val := int(s.Value)
		switch {
		case val > aline:
			break pathloop

		case val == 1:
			// Start a new stack
			tos = &stackEnt{s.Name, val, 0, noPath}

		case s.Name == "":
			// Pop
			if tos == noPath {
				return "<malformed symbol table>", 0
			}
			tos.prev.offset += val - tos.start
			tos = tos.prev

		default:
			// Push
			tos = &stackEnt{s.Name, val, 0, tos}
		}
	}

	if tos == noPath {
		return "", 0
	}
	return tos.path, aline - tos.start - tos.offset + 1
}

func (o *Obj) alineFromLine(path string, line int) (int, error) {
	if line < 1 {
		return 0, &UnknownLineError{path, line}
	}

	for i, s := range o.Paths {
		// Find this path
		if s.Name != path {
			continue
		}

		// Find this line at this stack level
		depth := 0
		var incstart int
		line += int(s.Value)
	pathloop:
		for _, s := range o.Paths[i:] {
			val := int(s.Value)
			switch {
			case depth == 1 && val >= line:
				return line - 1, nil

			case s.Name == "":
				depth--
				if depth == 0 {
					break pathloop
				} else if depth == 1 {
					line += val - incstart
				}

			default:
				if depth == 1 {
					incstart = val
				}
				depth++
			}
		}
		return 0, &UnknownLineError{path, line}
	}
	return 0, UnknownFileError(path)
}

/*
 * Errors
 */

// UnknownFileError represents a failure to find the specific file in
// the symbol table.
type UnknownFileError string

func (e UnknownFileError) Error() string { return "unknown file: " + string(e) }

// UnknownLineError represents a failure to map a line to a program
// counter, either because the line is beyond the bounds of the file
// or because there is no code on the given line.
type UnknownLineError struct {
	File string
	Line int
}

func (e *UnknownLineError) Error() string {
	return "no code at " + e.File + ":" + strconv.Itoa(e.Line)
}

// DecodingError represents an error during the decoding of
// the symbol table.
type DecodingError struct {
	off int
	msg string
	val interface{}
}

func (e *DecodingError) Error() string {
	msg := e.msg
	if e.val != nil {
		msg += fmt.Sprintf(" '%v'", e.val)
	}
	msg += fmt.Sprintf(" at byte %#x", e.off)
	return msg
}