diff --git a/continuousintegration/test/preprocess b/continuousintegration/test/preprocess
new file mode 100755
index 0000000000000000000000000000000000000000..607639d4910e2923448cc2b515d4ac1bc12963a6
--- /dev/null
+++ b/continuousintegration/test/preprocess
@@ -0,0 +1,101 @@
+#!/bin/sh
+#
+# Preprocess logfiles for more efficient data extraction
+
+# Colours
+NC='\033[0m' # No colour
+RED='\033[0;31m'
+LT_RED='\033[1;31m'
+GREEN='\033[0;32m'
+LT_GREEN='\033[1;32m'
+ORANGE='\033[0;33m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+LT_BLUE='\033[1;34m'
+
+echoc()
+{
+  COLOUR=$1
+  shift
+  echo -ne "${COLOUR}$*${NC}"
+}
+
+process_frontend_log()
+{
+  echoc $LT_BLUE "Extracting archive log messages from cta-frontend..."
+
+  FE_HOSTNAME=$(head -1 cta-frontend.log | awk '{ print $4 }')
+  DATE_OFFSET=$(head -1 cta-frontend.log | awk '{ gsub("^Apr ", "201804"); print $1 }')
+
+  awk -vFE_HOSTNAME="$FE_HOSTNAME" -vDATE_OFFSET="$DATE_OFFSET" '
+    function processLine(event, logline) {
+      gsub("^Apr ", "201804", logline)
+      gsub("archiveFileId", "fileId", logline)
+      gsub(FE_HOSTNAME " .* fileId=\"", "", logline)
+      gsub("\".*$", "", logline)
+      split(logline,logarray)
+      DAYS=logarray[1]-DATE_OFFSET
+      HOURS=substr(logarray[2],1,2)
+      MINS=substr(logarray[2],4,2)
+      SECS=substr(logarray[2],7,2)
+      timesecs=((DAYS*24 + HOURS)*60 + MINS)*60 + SECS
+      print logarray[3],event,timesecs
+    }
+
+    /CREATE/ { processLine("CRE", $0) }
+    /CLOSEW/ { processLine("CLW", $0) }
+    /PREPARE/ { processLine("PRE", $0) }
+  ' cta-frontend.log >cta-frontend.log.$$
+
+  echoc $LT_BLUE "done.\nSorting..."
+  sort -n cta-frontend.log.$$ >cta-frontend-events.log
+  echoc $LT_BLUE "done.\n"
+  rm -f cta-frontend.log.$$
+}
+
+process_taped_log()
+{
+  echoc $LT_BLUE "Extracting archive log messages from cta-taped..."
+
+  TAPED_HOSTNAME=$(head -1 cta-taped.log | awk '{ print $4 }')
+  DATE_OFFSET=$(head -1 cta-taped.log | awk '{ gsub("^Apr ", "201804"); print $1 }')
+
+  awk -vTAPED_HOSTNAME="$TAPED_HOSTNAME" -vDATE_OFFSET="$DATE_OFFSET" '
+    function processLine(event, logline) {
+      gsub("^Apr ", "201804", logline)
+      gsub(TAPED_HOSTNAME " .* fileId=\"", "", logline)
+      gsub("\".*$", "", logline)
+      split(logline,logarray)
+      DAYS=logarray[1]-DATE_OFFSET
+      HOURS=substr(logarray[2],1,2)
+      MINS=substr(logarray[2],4,2)
+      SECS=substr(logarray[2],7,2)
+      timesecs=((DAYS*24 + HOURS)*60 + MINS)*60 + SECS
+      print logarray[3],event,timesecs
+    }
+
+    /In ArchiveMount::getNextJobBatch\(\): popped one job/    { processLine("A_POP", $0) }
+    /Created tasks for migrating a file/                      { processLine("A_TSS", $0) }
+    /Opened disk file for read/                               { processLine("A_OPN", $0) }
+    /File successfully read from disk/                        { processLine("A_RED", $0) }
+    /File successfully transmitted to drive/                  { processLine("A_DNE", $0) }
+    /Reported to the client a full file archival/             { processLine("A_RPT", $0) }
+    /In RetrieveMount::getNextJobBatch\(\): popped one job/   { processLine("R_POP", $0) }
+    /Recall task created/                                     { processLine("R_TSK", $0) }
+    /Created tasks for recalling a file/                      { processLine("R_RCL", $0) }
+    /Successfully positioned for reading/                     { processLine("R_POS", $0) }
+    /File successfully read from tape/                        { processLine("R_RED", $0) }
+    /Opened disk file for writing/                            { processLine("R_OPN", $0) }
+    /File successfully transfered to disk/                    { processLine("R_TRF", $0) }
+  ' cta-taped.log >cta-taped.log.$$
+
+  echoc $LT_BLUE "done.\nSorting..."
+  sort -n cta-taped.log.$$ >cta-taped-events.log
+  echoc $LT_BLUE "done.\n"
+  rm -f cta-taped.log.$$
+}
+
+
+#process_frontend_log
+#process_taped_log
+