普通文本  |  30行  |  837 B

#! /usr/bin/python
"""Cleans output from other scripts to eliminate duplicates.

When frequently sampling data, we see that records occasionally will contain
the same timestamp (due to perf recording twice in the same second).

This removes all of the duplicate timestamps for every record. Order with
respect to timestamps is not preserved. Also, the assumption is that the log
file is a csv with the first value in each row being the time in seconds from a
standard time.

"""

import argparse

parser = argparse.ArgumentParser()
parser.add_argument('filename')
args = parser.parse_args()

my_file = open(args.filename)
output_file = open('clean2.csv', 'a')
dictionary = dict()

for line in my_file:
  new_time = int(line.split(',')[0])
  dictionary[new_time] = line

for key in dictionary.keys():
  output_file.write(dictionary[key])