// Copyright 2015 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package zip import ( "bytes" "compress/flate" "errors" "fmt" "hash/crc32" "io" "io/ioutil" "log" "os" "path/filepath" "runtime/pprof" "runtime/trace" "sort" "strings" "sync" "time" "unicode" "github.com/google/blueprint/pathtools" "android/soong/jar" "android/soong/third_party/zip" ) // Block size used during parallel compression of a single file. const parallelBlockSize = 1 * 1024 * 1024 // 1MB // Minimum file size to use parallel compression. It requires more // flate.Writer allocations, since we can't change the dictionary // during Reset const minParallelFileSize = parallelBlockSize * 6 // Size of the ZIP compression window (32KB) const windowSize = 32 * 1024 type nopCloser struct { io.Writer } func (nopCloser) Close() error { return nil } type byteReaderCloser struct { *bytes.Reader io.Closer } type pathMapping struct { dest, src string zipMethod uint16 } type uniqueSet map[string]bool func (u *uniqueSet) String() string { return `""` } func (u *uniqueSet) Set(s string) error { if _, found := (*u)[s]; found { return fmt.Errorf("File %q was specified twice as a file to not deflate", s) } else { (*u)[s] = true } return nil } type FileArg struct { PathPrefixInZip, SourcePrefixToStrip string SourceFiles []string GlobDir string } type FileArgs []FileArg type ZipWriter struct { time time.Time createdFiles map[string]string createdDirs map[string]string directories bool errors chan error writeOps chan chan *zipEntry cpuRateLimiter *CPURateLimiter memoryRateLimiter *MemoryRateLimiter compressorPool sync.Pool compLevel int } type zipEntry struct { fh *zip.FileHeader // List of delayed io.Reader futureReaders chan chan io.Reader // Only used for passing into the MemoryRateLimiter to ensure we // release as much memory as much as we request allocatedSize int64 } type ZipArgs struct { FileArgs FileArgs OutputFilePath string CpuProfileFilePath string TraceFilePath string EmulateJar bool AddDirectoryEntriesToZip bool CompressionLevel int ManifestSourcePath string NumParallelJobs int NonDeflatedFiles map[string]bool WriteIfChanged bool } const NOQUOTE = '\x00' func ReadRespFile(bytes []byte) []string { var args []string var arg []rune isEscaping := false quotingStart := NOQUOTE for _, c := range string(bytes) { switch { case isEscaping: if quotingStart == '"' { if !(c == '"' || c == '\\') { // '\"' or '\\' will be escaped under double quoting. arg = append(arg, '\\') } } arg = append(arg, c) isEscaping = false case c == '\\' && quotingStart != '\'': isEscaping = true case quotingStart == NOQUOTE && (c == '\'' || c == '"'): quotingStart = c case quotingStart != NOQUOTE && c == quotingStart: quotingStart = NOQUOTE case quotingStart == NOQUOTE && unicode.IsSpace(c): // Current character is a space outside quotes if len(arg) != 0 { args = append(args, string(arg)) } arg = arg[:0] default: arg = append(arg, c) } } if len(arg) != 0 { args = append(args, string(arg)) } return args } func Run(args ZipArgs) (err error) { if args.CpuProfileFilePath != "" { f, err := os.Create(args.CpuProfileFilePath) if err != nil { fmt.Fprintln(os.Stderr, err.Error()) os.Exit(1) } defer f.Close() pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } if args.TraceFilePath != "" { f, err := os.Create(args.TraceFilePath) if err != nil { fmt.Fprintln(os.Stderr, err.Error()) os.Exit(1) } defer f.Close() err = trace.Start(f) if err != nil { fmt.Fprintln(os.Stderr, err.Error()) os.Exit(1) } defer trace.Stop() } if args.OutputFilePath == "" { return fmt.Errorf("output file path must be nonempty") } if args.EmulateJar { args.AddDirectoryEntriesToZip = true } w := &ZipWriter{ time: jar.DefaultTime, createdDirs: make(map[string]string), createdFiles: make(map[string]string), directories: args.AddDirectoryEntriesToZip, compLevel: args.CompressionLevel, } pathMappings := []pathMapping{} for _, fa := range args.FileArgs { srcs := fa.SourceFiles if fa.GlobDir != "" { srcs = append(srcs, recursiveGlobFiles(fa.GlobDir)...) } for _, src := range srcs { if err := fillPathPairs(fa.PathPrefixInZip, fa.SourcePrefixToStrip, src, &pathMappings, args.NonDeflatedFiles); err != nil { log.Fatal(err) } } } buf := &bytes.Buffer{} var out io.Writer = buf if !args.WriteIfChanged { f, err := os.Create(args.OutputFilePath) if err != nil { return err } defer f.Close() defer func() { if err != nil { os.Remove(args.OutputFilePath) } }() out = f } err = w.write(out, pathMappings, args.ManifestSourcePath, args.EmulateJar, args.NumParallelJobs) if err != nil { return err } if args.WriteIfChanged { err := pathtools.WriteFileIfChanged(args.OutputFilePath, buf.Bytes(), 0666) if err != nil { return err } } return nil } func fillPathPairs(prefix, rel, src string, pathMappings *[]pathMapping, nonDeflatedFiles map[string]bool) error { src = strings.TrimSpace(src) if src == "" { return nil } src = filepath.Clean(src) dest, err := filepath.Rel(rel, src) if err != nil { return err } dest = filepath.Join(prefix, dest) zipMethod := zip.Deflate if _, found := nonDeflatedFiles[dest]; found { zipMethod = zip.Store } *pathMappings = append(*pathMappings, pathMapping{dest: dest, src: src, zipMethod: zipMethod}) return nil } func jarSort(mappings []pathMapping) { less := func(i int, j int) (smaller bool) { return jar.EntryNamesLess(mappings[i].dest, mappings[j].dest) } sort.SliceStable(mappings, less) } type readerSeekerCloser interface { io.Reader io.ReaderAt io.Closer io.Seeker } func (z *ZipWriter) write(f io.Writer, pathMappings []pathMapping, manifest string, emulateJar bool, parallelJobs int) error { z.errors = make(chan error) defer close(z.errors) // This channel size can be essentially unlimited -- it's used as a fifo // queue decouple the CPU and IO loads. Directories don't require any // compression time, but still cost some IO. Similar with small files that // can be very fast to compress. Some files that are more difficult to // compress won't take a corresponding longer time writing out. // // The optimum size here depends on your CPU and IO characteristics, and // the the layout of your zip file. 1000 was chosen mostly at random as // something that worked reasonably well for a test file. // // The RateLimit object will put the upper bounds on the number of // parallel compressions and outstanding buffers. z.writeOps = make(chan chan *zipEntry, 1000) z.cpuRateLimiter = NewCPURateLimiter(int64(parallelJobs)) z.memoryRateLimiter = NewMemoryRateLimiter(0) defer func() { z.cpuRateLimiter.Stop() z.memoryRateLimiter.Stop() }() if manifest != "" && !emulateJar { return errors.New("must specify --jar when specifying a manifest via -m") } if emulateJar { // manifest may be empty, in which case addManifest will fill in a default pathMappings = append(pathMappings, pathMapping{jar.ManifestFile, manifest, zip.Deflate}) jarSort(pathMappings) } go func() { var err error defer close(z.writeOps) for _, ele := range pathMappings { if emulateJar && ele.dest == jar.ManifestFile { err = z.addManifest(ele.dest, ele.src, ele.zipMethod) } else { err = z.addFile(ele.dest, ele.src, ele.zipMethod, emulateJar) } if err != nil { z.errors <- err return } } }() zipw := zip.NewWriter(f) var currentWriteOpChan chan *zipEntry var currentWriter io.WriteCloser var currentReaders chan chan io.Reader var currentReader chan io.Reader var done bool for !done { var writeOpsChan chan chan *zipEntry var writeOpChan chan *zipEntry var readersChan chan chan io.Reader if currentReader != nil { // Only read and process errors } else if currentReaders != nil { readersChan = currentReaders } else if currentWriteOpChan != nil { writeOpChan = currentWriteOpChan } else { writeOpsChan = z.writeOps } select { case writeOp, ok := <-writeOpsChan: if !ok { done = true } currentWriteOpChan = writeOp case op := <-writeOpChan: currentWriteOpChan = nil var err error if op.fh.Method == zip.Deflate { currentWriter, err = zipw.CreateCompressedHeader(op.fh) } else { var zw io.Writer op.fh.CompressedSize64 = op.fh.UncompressedSize64 zw, err = zipw.CreateHeaderAndroid(op.fh) currentWriter = nopCloser{zw} } if err != nil { return err } currentReaders = op.futureReaders if op.futureReaders == nil { currentWriter.Close() currentWriter = nil } z.memoryRateLimiter.Finish(op.allocatedSize) case futureReader, ok := <-readersChan: if !ok { // Done with reading currentWriter.Close() currentWriter = nil currentReaders = nil } currentReader = futureReader case reader := <-currentReader: _, err := io.Copy(currentWriter, reader) if err != nil { return err } currentReader = nil case err := <-z.errors: return err } } // One last chance to catch an error select { case err := <-z.errors: return err default: zipw.Close() return nil } } // imports (possibly with compression) <src> into the zip at sub-path <dest> func (z *ZipWriter) addFile(dest, src string, method uint16, emulateJar bool) error { var fileSize int64 var executable bool if s, err := os.Lstat(src); err != nil { return err } else if s.IsDir() { if z.directories { return z.writeDirectory(dest, src, emulateJar) } return nil } else { if err := z.writeDirectory(filepath.Dir(dest), src, emulateJar); err != nil { return err } if prev, exists := z.createdDirs[dest]; exists { return fmt.Errorf("destination %q is both a directory %q and a file %q", dest, prev, src) } if prev, exists := z.createdFiles[dest]; exists { return fmt.Errorf("destination %q has two files %q and %q", dest, prev, src) } z.createdFiles[dest] = src if s.Mode()&os.ModeSymlink != 0 { return z.writeSymlink(dest, src) } else if !s.Mode().IsRegular() { return fmt.Errorf("%s is not a file, directory, or symlink", src) } fileSize = s.Size() executable = s.Mode()&0100 != 0 } r, err := os.Open(src) if err != nil { return err } header := &zip.FileHeader{ Name: dest, Method: method, UncompressedSize64: uint64(fileSize), } if executable { header.SetMode(0700) } return z.writeFileContents(header, r) } func (z *ZipWriter) addManifest(dest string, src string, method uint16) error { if prev, exists := z.createdDirs[dest]; exists { return fmt.Errorf("destination %q is both a directory %q and a file %q", dest, prev, src) } if prev, exists := z.createdFiles[dest]; exists { return fmt.Errorf("destination %q has two files %q and %q", dest, prev, src) } if err := z.writeDirectory(filepath.Dir(dest), src, true); err != nil { return err } fh, buf, err := jar.ManifestFileContents(src) if err != nil { return err } reader := &byteReaderCloser{bytes.NewReader(buf), ioutil.NopCloser(nil)} return z.writeFileContents(fh, reader) } func (z *ZipWriter) writeFileContents(header *zip.FileHeader, r readerSeekerCloser) (err error) { header.SetModTime(z.time) compressChan := make(chan *zipEntry, 1) z.writeOps <- compressChan // Pre-fill a zipEntry, it will be sent in the compressChan once // we're sure about the Method and CRC. ze := &zipEntry{ fh: header, } ze.allocatedSize = int64(header.UncompressedSize64) z.cpuRateLimiter.Request() z.memoryRateLimiter.Request(ze.allocatedSize) fileSize := int64(header.UncompressedSize64) if fileSize == 0 { fileSize = int64(header.UncompressedSize) } if header.Method == zip.Deflate && fileSize >= minParallelFileSize { wg := new(sync.WaitGroup) // Allocate enough buffer to hold all readers. We'll limit // this based on actual buffer sizes in RateLimit. ze.futureReaders = make(chan chan io.Reader, (fileSize/parallelBlockSize)+1) // Calculate the CRC in the background, since reading the entire // file could take a while. // // We could split this up into chunks as well, but it's faster // than the compression. Due to the Go Zip API, we also need to // know the result before we can begin writing the compressed // data out to the zipfile. wg.Add(1) go z.crcFile(r, ze, compressChan, wg) for start := int64(0); start < fileSize; start += parallelBlockSize { sr := io.NewSectionReader(r, start, parallelBlockSize) resultChan := make(chan io.Reader, 1) ze.futureReaders <- resultChan z.cpuRateLimiter.Request() last := !(start+parallelBlockSize < fileSize) var dict []byte if start >= windowSize { dict, err = ioutil.ReadAll(io.NewSectionReader(r, start-windowSize, windowSize)) if err != nil { return err } } wg.Add(1) go z.compressPartialFile(sr, dict, last, resultChan, wg) } close(ze.futureReaders) // Close the file handle after all readers are done go func(wg *sync.WaitGroup, closer io.Closer) { wg.Wait() closer.Close() }(wg, r) } else { go func() { z.compressWholeFile(ze, r, compressChan) r.Close() }() } return nil } func (z *ZipWriter) crcFile(r io.Reader, ze *zipEntry, resultChan chan *zipEntry, wg *sync.WaitGroup) { defer wg.Done() defer z.cpuRateLimiter.Finish() crc := crc32.NewIEEE() _, err := io.Copy(crc, r) if err != nil { z.errors <- err return } ze.fh.CRC32 = crc.Sum32() resultChan <- ze close(resultChan) } func (z *ZipWriter) compressPartialFile(r io.Reader, dict []byte, last bool, resultChan chan io.Reader, wg *sync.WaitGroup) { defer wg.Done() result, err := z.compressBlock(r, dict, last) if err != nil { z.errors <- err return } z.cpuRateLimiter.Finish() resultChan <- result } func (z *ZipWriter) compressBlock(r io.Reader, dict []byte, last bool) (*bytes.Buffer, error) { buf := new(bytes.Buffer) var fw *flate.Writer var err error if len(dict) > 0 { // There's no way to Reset a Writer with a new dictionary, so // don't use the Pool fw, err = flate.NewWriterDict(buf, z.compLevel, dict) } else { var ok bool if fw, ok = z.compressorPool.Get().(*flate.Writer); ok { fw.Reset(buf) } else { fw, err = flate.NewWriter(buf, z.compLevel) } defer z.compressorPool.Put(fw) } if err != nil { return nil, err } _, err = io.Copy(fw, r) if err != nil { return nil, err } if last { fw.Close() } else { fw.Flush() } return buf, nil } func (z *ZipWriter) compressWholeFile(ze *zipEntry, r io.ReadSeeker, compressChan chan *zipEntry) { crc := crc32.NewIEEE() _, err := io.Copy(crc, r) if err != nil { z.errors <- err return } ze.fh.CRC32 = crc.Sum32() _, err = r.Seek(0, 0) if err != nil { z.errors <- err return } readFile := func(reader io.ReadSeeker) ([]byte, error) { _, err := reader.Seek(0, 0) if err != nil { return nil, err } buf, err := ioutil.ReadAll(reader) if err != nil { return nil, err } return buf, nil } ze.futureReaders = make(chan chan io.Reader, 1) futureReader := make(chan io.Reader, 1) ze.futureReaders <- futureReader close(ze.futureReaders) if ze.fh.Method == zip.Deflate { compressed, err := z.compressBlock(r, nil, true) if err != nil { z.errors <- err return } if uint64(compressed.Len()) < ze.fh.UncompressedSize64 { futureReader <- compressed } else { buf, err := readFile(r) if err != nil { z.errors <- err return } ze.fh.Method = zip.Store futureReader <- bytes.NewReader(buf) } } else { buf, err := readFile(r) if err != nil { z.errors <- err return } ze.fh.Method = zip.Store futureReader <- bytes.NewReader(buf) } z.cpuRateLimiter.Finish() close(futureReader) compressChan <- ze close(compressChan) } // writeDirectory annotates that dir is a directory created for the src file or directory, and adds // the directory entry to the zip file if directories are enabled. func (z *ZipWriter) writeDirectory(dir string, src string, emulateJar bool) error { // clean the input dir = filepath.Clean(dir) // discover any uncreated directories in the path zipDirs := []string{} for dir != "" && dir != "." { if _, exists := z.createdDirs[dir]; exists { break } if prev, exists := z.createdFiles[dir]; exists { return fmt.Errorf("destination %q is both a directory %q and a file %q", dir, src, prev) } z.createdDirs[dir] = src // parent directories precede their children zipDirs = append([]string{dir}, zipDirs...) dir = filepath.Dir(dir) } if z.directories { // make a directory entry for each uncreated directory for _, cleanDir := range zipDirs { var dirHeader *zip.FileHeader if emulateJar && cleanDir+"/" == jar.MetaDir { dirHeader = jar.MetaDirFileHeader() } else { dirHeader = &zip.FileHeader{ Name: cleanDir + "/", } dirHeader.SetMode(0700 | os.ModeDir) } dirHeader.SetModTime(z.time) ze := make(chan *zipEntry, 1) ze <- &zipEntry{ fh: dirHeader, } close(ze) z.writeOps <- ze } } return nil } func (z *ZipWriter) writeSymlink(rel, file string) error { fileHeader := &zip.FileHeader{ Name: rel, } fileHeader.SetModTime(z.time) fileHeader.SetMode(0700 | os.ModeSymlink) dest, err := os.Readlink(file) if err != nil { return err } ze := make(chan *zipEntry, 1) futureReaders := make(chan chan io.Reader, 1) futureReader := make(chan io.Reader, 1) futureReaders <- futureReader close(futureReaders) futureReader <- bytes.NewBufferString(dest) close(futureReader) ze <- &zipEntry{ fh: fileHeader, futureReaders: futureReaders, } close(ze) z.writeOps <- ze return nil } func recursiveGlobFiles(path string) []string { var files []string filepath.Walk(path, func(path string, info os.FileInfo, err error) error { if !info.IsDir() { files = append(files, path) } return nil }) return files }