0byt3m1n1
Path:
/
data
/
9
/
logs
/
[
Home
]
File: LogSorter.java
/* * Web server log sorting utility. * Copyright (C) 2001 Stephen Ostermiller * http://ostermiller.org/contact.pl?regarding=webalizer * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * See COPYING.TXT for details. */ import java.util.*; import java.text.*; import java.io.*; /** * Sorts web logs that are in combined log format. * <p> * Log files are assumed to be semi-sorted. If * too many log entries are out of order, an error * message will be printed and you should try again * with a larger look ahead pool size. * <p> * If P is the look ahead pool size, N is the number of log entries * and F is the number of files, this program will run * in order N*F*Log(P) time. (Notice that it is not optimized * for a large number of files and that it will run faster * with a smaller look ahead pool.) * <p> * <h3>Usage:</h3> * <p>This program either reads form standard input (with no arguments) * or from each of the files specified as arguments. Output always * is written to standard output, errors to standard error. If you * wish to sort logs and remove duplicate entries or if you want to * bump up the look ahead pool size, simply recompile the program with * those options.</p> * <h3>Examples:</h3> * <h4>Sorting a singe log file:</h4> * <code>java LogSorter access.log > sorted_access.log</code> * <h4>Merging log files from several servers:</h4> * <code>java LogSorter server1_access.log server2_access.log server3_access.log > sorted_access.log</code> * <h4>Merging rotated log files:</h4> * <code>cat access1999.log access2000.log access2001.log | java LogSorter > sorted_access.log</code><br> * (concatinating all the log files and sending them to standard input has * effect of running this program with one log file rather than F files.) * <h4>Sending the output directly to Webalizer</h4> * <code>java LogSorter access.log | webalizer</code> * <h4>Sending the output to gzip:</h4> * <code>java LogSorter access.log | gzip > sorted_access.log.gz</code> * <h4>Sorting gzipped log files:</h4> * <code>gunzip -c access.log.gz | java LogSorter | gzip > sorted_access.log.gz</code> */ public class LogSorter { private static int LOOKAHEAD_POOL_SIZE = 8192; private static boolean REMOVE_DUPLICATES = false; private BufferedReader[] logs; private LogEntry[] firstEntries; private int lookAhead; private static SimpleDateFormat logDateFormat = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss"); TreeSet pool = new TreeSet(); private LogEntry lastReturned; public LogSorter(InputStream[] logs){ this (logs, LOOKAHEAD_POOL_SIZE); } public LogSorter(InputStream[] logs, int lookAhead){ this.logs = new BufferedReader[logs.length]; for (int i=0; i<logs.length; i++){ this.logs[i] = new BufferedReader(new InputStreamReader(logs[i])); } this.lookAhead = lookAhead; firstEntries = new LogEntry[logs.length]; for (int i=0; i<logs.length; i++){ fillFirstEntry(i); } fillPool(); } public void fillFirstEntry(int i){ boolean found = false; while (!found){ try { String entry = this.logs[i].readLine(); if (entry == null){ firstEntries[i] = null; } else { firstEntries[i] = new LogEntry(entry); } found = true; } catch (IOException x){ System.err.println(x); firstEntries[i] = null; found = true; } catch (IllegalArgumentException x){ System.err.println(x); firstEntries[i] = null; } } } public String next(){ LogEntry ret; try { ret = (LogEntry)pool.first(); pool.remove(ret); fillPool(); if (lastReturned != null){ if (ret.compareTo(lastReturned) < 0){ System.err.println( "Logs may be out of order, a date of " + lastReturned.date + " was already printed but a date of " + ret.date + " was just found. Try increasing the lookAhead pool size. (" + pool.size() + ")" ); } else { lastReturned = ret; } } else { lastReturned = ret; } return ret.entry; } catch (NoSuchElementException x){ return null; } } private LogEntry getLeastFirstEntry(){ int least = -1; for (int i=0; i<firstEntries.length; i++){ if (firstEntries[i] != null){ if (least==-1 || firstEntries[i].compareTo(firstEntries[least]) < 0){ least = i; } } } if (least == -1){ return null; } LogEntry ret = firstEntries[least]; fillFirstEntry(least); return ret; } public static void main(String[] args){ try { InputStream[] logs; if(args.length > 0){ logs = new InputStream[args.length]; for (int i=0; i<logs.length; i++){ logs[i] = new FileInputStream(args[i]); } } else { logs = new InputStream[]{ System.in }; } LogSorter sorter = new LogSorter(logs); String s; while ((s = sorter.next()) != null){ System.out.println(s); } } catch (IOException x){ System.err.println(x); } } private void fillPool(){ LogEntry entry; while (pool.size() < lookAhead && (entry = getLeastFirstEntry()) != null){ if (!pool.add(entry)){ System.err.println("Duplicate log entry: " + entry); } } } private static long counter = 0; class LogEntry implements Comparable{ private long count; public LogEntry (String s){ entry = s; int startParse = s.indexOf("["); if (startParse == -1){ throw new IllegalArgumentException("No date found in log entry: " + s); } ParsePosition p = new ParsePosition(startParse + 1); date = logDateFormat.parse(s, p); if (date == null){ throw new IllegalArgumentException("No date found in log entry: " + s); } count = LogSorter.counter++; } public String entry; public Date date; public String toString(){ return entry; } public int compareTo(Object o){ LogEntry l = (LogEntry)o; int result = this.date.compareTo(l.date); if (result == 0){ result = this.entry.compareTo(l.entry); } if (result == 0 && !REMOVE_DUPLICATES){ result = (int)(count - l.count); } return result; } } }