git » msnlib » next » tree

[next] / utils / hmerge

#!/usr/bin/env python

"""
Merger for msnlib logfiles.

It takes two logfiles as arguments, and prints out the merge between them,
sorting using the time.

Quite useful when you have used msn in two different places and want to unify
the logs.


Note that this will not do absolute time sorting (as it's usual for time to go
backwards, as we all know =), but record-by-record time compares.

Alberto Bertogli (albertito@blitiri.com.ar), 02/Jun/2003
"""


import sys
import time


def get_records(fd):
	records = []
	l = fd.readline()
	rec = l
	l = fd.readline()
	while rec:
		# if the line begins with \t, then it's a multi-line record
		if l and l[0] == '\t':
			rec += l
			l = fd.readline()
			continue
		
		# process the actual record
		ls = rec.split(' ', 2)
		raw_date = ls[0] + ' ' + ls[1]
		date = time.strptime(raw_date, '%d/%b/%Y %H:%M:%S ')
		date = time.mktime(date)
		records.append((date, rec))
		
		# save the current line
		rec = l
		l = fd.readline()
	return records

def panic(s):
	print s
	sys.exit(1)

try:
	fd1 = open(sys.argv[1])
	fd2 = open(sys.argv[2])
except:
	panic("Use: hmerge file1 file2")

# this is the invalid record to mark the end of the record list
eor_record = (0, '')


rec1 = get_records(fd1)
rec2 = get_records(fd2)

if not rec1: panic("Error: file 1 doesn't have any records")
if not rec2: panic("Error: file 1 doesn't have any records")

# append the eor_record to both lists
rec1.append(eor_record)
rec2.append(eor_record)

len1 = len(rec1)
len2 = len(rec2)

point1 = 0
point2 = 0

while 1:
		
	r1 = rec1[point1]
	r2 = rec2[point2]
	
	# if we have any at the end, print it or exit
	if r1[0] == 0 or r2[0] == 0:
		# if we reach the end of both lists, we exit
		if r1[0] == 0 and r2[0] == 0:
			break
		if r1[0] == 0:
			print r2[1],
			point2 += 1
		elif r2[0] == 0:
			print r1[1],
			point1 += 1
	
	# otherwise, compare and print the earlier
	else:
		if r1[0] < r2[0]:
			print r1[1],
			point1 += 1
		elif r1[0] > r2[0]:
			print r2[1],
			point2 += 1
		else:
			print r1[1],
			print r2[1],
			point1 += 1
			point2 += 1