From eff3895abdc605b8b7578db596cca33df13eae70 Mon Sep 17 00:00:00 2001
From: Michael Davis <michael.davis@cern.ch>
Date: Thu, 3 May 2018 16:31:50 +0200
Subject: [PATCH] [log-analysis] Get EOS directory from artefacts

---
 .../log_analysis/CI/analyse_logs.sh           |  26 ----
 .../log_analysis/CI/get_times.sh              | 124 ++++++++++--------
 .../log_analysis/CI/postprocess.sh            | 106 ++++++++++++++-
 .../log_analysis/CI/preprocess.sh             |   4 +-
 .../log_analysis/CI/process_log_events.sh     |  31 +++++
 5 files changed, 200 insertions(+), 91 deletions(-)
 delete mode 100755 continuousintegration/log_analysis/CI/analyse_logs.sh
 create mode 100755 continuousintegration/log_analysis/CI/process_log_events.sh

diff --git a/continuousintegration/log_analysis/CI/analyse_logs.sh b/continuousintegration/log_analysis/CI/analyse_logs.sh
deleted file mode 100755
index 9204849dd8..0000000000
--- a/continuousintegration/log_analysis/CI/analyse_logs.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/sh
-#
-# analyse_logs.sh
-#
-# Extract timing information for CTA events from CI logs. Create artifacts for analysis and
-# visualisation.
-
-# Parameters, should be determined from the artifacts
-SCRIPTDIR=${HOME}/CTA/continuousintegration/log_analysis/CI
-ARTIFACT_HOME=${1-/tmp/pod_logs/archiveretrieve-367256gitec0e2007-a7ki}
-TEST_RUN=${2-378e3d54-f89f-4e5d-83ef-e4c78d60b16c}
-
-# Location of the logfiles we want to analyse
-MGM_LOG=${ARTIFACT_HOME}/ctaeos/eos/mgm/xrdlog.mgm
-FRONTEND_LOG=${ARTIFACT_HOME}/ctafrontend/cta/cta-frontend.log
-TAPED_LOG=${ARTIFACT_HOME}/tpsrv01/taped/cta/cta-taped.log
-
-# Create temporary working directory
-TMPDIR=/tmp/analyse_logs
-rm -rf ${TMPDIR}
-mkdir ${TMPDIR}
-cd ${TMPDIR}
-
-# Extract the events of interest from the logfiles
-${SCRIPTDIR}/preprocess.sh ${TEST_RUN} ${MGM_LOG} ${FRONTEND_LOG} ${TAPED_LOG}
-
diff --git a/continuousintegration/log_analysis/CI/get_times.sh b/continuousintegration/log_analysis/CI/get_times.sh
index cf795d82a9..a9d64d865c 100755
--- a/continuousintegration/log_analysis/CI/get_times.sh
+++ b/continuousintegration/log_analysis/CI/get_times.sh
@@ -1,22 +1,33 @@
-#!/bin/sh
+#!/bin/bash
+
+# Colours
+NC='\033[0m' # No colour
+RED='\033[0;31m'
+LT_RED='\033[1;31m'
+GREEN='\033[0;32m'
+LT_GREEN='\033[1;32m'
+ORANGE='\033[0;33m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+LT_BLUE='\033[1;34m'
 
 error()
 {
-  echo "$*" >&2
+  echo -e "${RED}$*${NC}" >&2
   exit 1
 }
 
-get_filename()
+echoc()
 {
-  awk -vFILENAME=${1} -vFILENAME_PREFIX=${FILENAME_PREFIX} 'BEGIN {
-    FILENAME="00000000" FILENAME
-    print FILENAME_PREFIX substr(FILENAME, length(FILENAME)-7)
-  }'
+  COLOUR=$1
+  shift
+  echo -ne "${COLOUR}$*${NC}" >&2
 }
 
 get_ids()
 {
-  IDS=$(grep -m1 "^$1 " filenameToLogIds.txt)
+  IDS=$(sed "$1q;d" filenameToLogIds.txt)
+  FILENAME=$(echo $IDS | cut -d' ' -f1)
   FILE_ID=$(echo $IDS | cut -d' ' -f2)
   ARCHIVE_ID=$(echo $IDS | cut -d' ' -f3)
 }
@@ -31,9 +42,9 @@ check_results()
 
 get_mgm_times()
 {
-  TMP_RESULTS=$(grep -m6 "^$1 " xrdmgm-events.log)
+  TMP_RESULTS=$(grep -m7 "^$1 " xrdmgm-events.log)
 
-  echo -e "${TMP_RESULTS}" | check_results 6
+  echo -e "${TMP_RESULTS}" | check_results 7
   [ $? -ne 0 ] && TMP_RESULTS=$(grep "^$1 " xrdmgm-events.log)
 
   TIME_MGM_CREATE=$(echo -e "${TMP_RESULTS}" | grep -m1 "^$1 CRE" | cut -d' ' -f3)
@@ -42,7 +53,7 @@ get_mgm_times()
   TIME_MGM_ARCHIVED=$(echo -e "${TMP_RESULTS}" | grep -m1 "^$1 ARC" | cut -d' ' -f3)
   TIME_MGM_PREPARE=$(echo -e "${TMP_RESULTS}" | grep -m1 "^$1 PRE" | cut -d' ' -f3)
   TIME_MGM_RCOMMIT=$(echo -e "${TMP_RESULTS}" | grep -m1 "^$1 RCO" | cut -d' ' -f3)
-  #TIME_MGM_RETRIEVED=$(echo -e "${TMP_RESULTS}" | grep -m1 "^$1 RWC" | cut -d' ' -f3)
+  TIME_MGM_RETRIEVED=$(echo -e "${TMP_RESULTS}" | grep -m1 "^$1 RWC" | cut -d' ' -f3)
 }
 
 get_frontend_times()
@@ -90,66 +101,67 @@ get_offset_time()
 get_times()
 {
   get_ids $1
-  get_mgm_times $1
-  get_frontend_times $ARCHIVE_ID
-  get_taped_times $ARCHIVE_ID
-
-  echo -n "$ARCHIVE_ID $FILE_ID "
-
-# For debugging
-#  echo $TIME_MGM_CREATE $TIME_FE_CREATE $TIME_MGM_ACOMMIT $TIME_MGM_CLOSEW $TIME_FE_CLOSEW $TIME_TS_ARCHIVE_POP \
-#       $TIME_TS_ARCHIVE_TASKS $TIME_TS_ARCHIVE_OPEN $TIME_TS_ARCHIVE_READ $TIME_TS_ARCHIVE_DONE $TIME_TS_ARCHIVE_REPORT \
-#       $TIME_MGM_ARCHIVED
-#  echo $TIME_MGM_PREPARE $TIME_FE_PREPARE $TIME_TS_RETRIEVE_POP $TIME_TS_RETRIEVE_TASK $TIME_TS_RETRIEVE_TASKS \
-#       $TIME_TS_RETRIEVE_POSITION $TIME_TS_RETRIEVE_READ $TIME_TS_RETRIEVE_OPEN $TIME_TS_RETRIEVE_DONE \
-#       $TIME_MGM_RCOMMIT $TIME_MGM_RETRIEVED
-
-  # MGM events have second precision. CTA events have microsecond precision
-
-  echo -n "$(get_offset_time $TIME_MGM_CREATE $TIME_FE_CREATE) "
-  echo -n "$(get_offset_time $TIME_MGM_CREATE $TIME_MGM_ACOMMIT) "
-  echo -n "$(get_offset_time $TIME_MGM_ACOMMIT $TIME_MGM_CLOSEW) "
-  echo -n "$(get_offset_time $TIME_FE_CREATE $TIME_FE_CLOSEW) "
-  echo -n "$(get_offset_time $TIME_FE_CLOSEW $TIME_TS_ARCHIVE_POP) "
-  echo -n "$(get_offset_time $TIME_TS_ARCHIVE_POP $TIME_TS_ARCHIVE_TASKS) "
-  echo -n "$(get_offset_time $TIME_TS_ARCHIVE_TASKS $TIME_TS_ARCHIVE_OPEN) "
-  echo -n "$(get_offset_time $TIME_TS_ARCHIVE_OPEN $TIME_TS_ARCHIVE_READ) "
-  echo -n "$(get_offset_time $TIME_TS_ARCHIVE_READ $TIME_TS_ARCHIVE_DONE) "
-  echo -n "$(get_offset_time $TIME_TS_ARCHIVE_DONE $TIME_TS_ARCHIVE_REPORT) "
-  echo -n "$(get_offset_time $TIME_TS_ARCHIVE_REPORT $TIME_MGM_ARCHIVED) "
-
-  echo -n "$(get_offset_time $TIME_MGM_PREPARE $TIME_FE_PREPARE) "
-  echo -n "$(get_offset_time $TIME_FE_PREPARE $TIME_TS_RETRIEVE_POP) "
-  echo -n "$(get_offset_time $TIME_TS_RETRIEVE_POP $TIME_TS_RETRIEVE_TASK) "
-  echo -n "$(get_offset_time $TIME_TS_RETRIEVE_TASK $TIME_TS_RETRIEVE_TASKS) "
-  echo -n "$(get_offset_time $TIME_TS_RETRIEVE_TASKS $TIME_TS_RETRIEVE_POSITION) "
-  echo -n "$(get_offset_time $TIME_TS_RETRIEVE_POSITION $TIME_TS_RETRIEVE_READ) "
-  echo -n "$(get_offset_time $TIME_TS_RETRIEVE_READ $TIME_TS_RETRIEVE_OPEN) "
-  echo -n "$(get_offset_time $TIME_TS_RETRIEVE_OPEN $TIME_TS_RETRIEVE_DONE) "
-  echo    "$(get_offset_time $TIME_TS_RETRIEVE_DONE $TIME_MGM_RCOMMIT);"
-#  echo -n "$(get_offset_time $TIME_TS_RETRIEVE_DONE $TIME_MGM_RCOMMIT) "
-#  echo    "$(get_offset_time $TIME_MGM_RCOMMIT $TIME_MGM_RETRIEVED);"
+  get_mgm_times ${FILENAME}
+  get_frontend_times ${ARCHIVE_ID}
+  get_taped_times ${ARCHIVE_ID}
+
+  echo -n "${ARCHIVE_ID},"
+  echo -n "${FILE_ID},"
+
+  # Note: MGM events have second precision. CTA events have microsecond precision
+
+  echo -n "${TIME_MGM_CREATE},"
+  echo -n "${TIME_FE_CREATE},"
+  echo -n "${TIME_MGM_ACOMMIT},"
+  echo -n "${TIME_MGM_CLOSEW},"
+  echo -n "${TIME_FE_CLOSEW},"
+  echo -n "${TIME_TS_ARCHIVE_POP},"
+  echo -n "${TIME_TS_ARCHIVE_TASKS},"
+  echo -n "${TIME_TS_ARCHIVE_OPEN},"
+  echo -n "${TIME_TS_ARCHIVE_READ},"
+  echo -n "${TIME_TS_ARCHIVE_DONE},"
+  echo -n "${TIME_TS_ARCHIVE_REPORT},"
+  echo -n "${TIME_MGM_ARCHIVED},"
+
+  echo -n "${TIME_MGM_PREPARE},"
+  echo -n "${TIME_FE_PREPARE},"
+  echo -n "${TIME_TS_RETRIEVE_POP},"
+  echo -n "${TIME_TS_RETRIEVE_TASK},"
+  echo -n "${TIME_TS_RETRIEVE_TASKS},"
+  echo -n "${TIME_TS_RETRIEVE_POSITION},"
+  echo -n "${TIME_TS_RETRIEVE_READ},"
+  echo -n "${TIME_TS_RETRIEVE_OPEN},"
+  echo -n "${TIME_TS_RETRIEVE_DONE},"
+  echo -n "${TIME_MGM_RCOMMIT},"
+  echo    "${TIME_MGM_RETRIEVED}"
 }
 
-FILE_NUM_START=${1:-0}
-FILE_NUM_END=${2:-9099999}
-
-FILENAME_PREFIX=
+FILE_NUM_START=1
+FILE_NUM_END=$(wc -l filenameToLogIds.txt | cut -d' ' -f1)
 
 [ -r filenameToLogIds.txt ] || error "Can't open filenameToLogIds.txt"
 [ -r xrdmgm-events.log ] || error "Can't open xrdmgm-events.log"
 [ -r cta-frontend-events.log ] || error "Can't open cta-frontend-events.log"
 [ -r cta-taped-events.log ] || error "Can't open cta-taped-events.log"
 
-echo "cta_times = ["
+echoc $LT_BLUE "Creating CSV from events..."
+
+# Print CSV header
+echo -n "Archive File ID,Disk File ID,MGM CREATE,FE CREATE,MGM commit,MGM CLOSEW,FE CLOSEW,"
+echo -n "TS Archive Pop,TS Archive Tasks,TS Archive Open,TS Archive Read,TS Archive Done,"
+echo -n "TS Archive Report,MGM ARCHIVED,MGM PREPARE,FE PREPARE,TS Retrieve Pop,TS Retrieve Task,"
+echo -n "TS Retrieve Tasks,TS Retrieve Position,TS Retrieve Read,TS Retrieve Open,TS Retrieve Done,"
+echo    "MGM commit,MGM RETRIEVED"
 
+# Print CSV data
 FILE_NUM=${FILE_NUM_START}
 while :
 do
+  [ "$FILE_NUM" == "$((${FILE_NUM}/100))00" ] && echoc $LT_BLUE "${FILE_NUM}..."
   get_times ${FILE_NUM}
   FILE_NUM=$((${FILE_NUM}+1))
   [ $FILE_NUM -le $FILE_NUM_END ] || break
 done
 
-echo "];"
+echoc $LT_BLUE "done.\n"
 
diff --git a/continuousintegration/log_analysis/CI/postprocess.sh b/continuousintegration/log_analysis/CI/postprocess.sh
index 21db63c8a2..a720344365 100755
--- a/continuousintegration/log_analysis/CI/postprocess.sh
+++ b/continuousintegration/log_analysis/CI/postprocess.sh
@@ -1,10 +1,104 @@
-#!/bin/sh
+#!/bin/bash
+#
+# Preprocess logfiles for more efficient data extraction
 
-process()
+# Colours
+NC='\033[0m' # No colour
+RED='\033[0;31m'
+LT_RED='\033[1;31m'
+GREEN='\033[0;32m'
+LT_GREEN='\033[1;32m'
+ORANGE='\033[0;33m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+LT_BLUE='\033[1;34m'
+
+error()
+{
+  echo -e "${RED}$*${NC}" >&2
+  exit 1
+}
+
+echoc()
 {
-  echo "log_data = ["
-  awk '/[^ ];$/ && NF == 21 { print $0; }' $1
-  echo "];"
+  COLOUR=$1
+  shift
+  echo -ne "${COLOUR}$*${NC}" >&2
+}
+
+check_file_exists()
+{
+  [ -f ${1} ] || error "${1} does not exist or is not a regular file"
+  [ -r ${1} ] || error "Can't open ${1} for reading"
+  [ -s ${1} ] || error "${1} is zero-length"
+}
+
+convert_abs_rel()
+{
+  awk 'BEGIN { FS=","; SKIPHEADER=1; }
+    function rel(arg1, arg2) { return arg2-arg1; }
+    SKIPHEADER==1 {
+      printf "Archive File ID,Disk File ID,FE CREATE,MGM commit,MGM CLOSEW,TS Archive Pop,TS Archive Tasks,"
+      printf "TS Archive Open,TS Archive Read,TS Archive Done,TS Archive Report,MGM ARCHIVED,FE PREPARE,"
+      printf "TS Retrieve Pop,TS Retrieve Task,TS Retrieve Tasks,TS Retrieve Position,TS Retrieve Read,"
+      printf "TS Retrieve Open,TS Retrieve Done,MGM commit,MGM RETRIEVED\n"
+      SKIPHEADER=0; next;
+    }
+    {
+      ARCHIVE_ID=$1
+      FILE_ID=$2
+      TIME_MGM_CREATE=$3
+      TIME_FE_CREATE=$4
+      TIME_MGM_ACOMMIT=$5
+      TIME_MGM_CLOSEW=$6
+      TIME_FE_CLOSEW=$7
+      TIME_TS_ARCHIVE_POP=$8
+      TIME_TS_ARCHIVE_TASKS=$9
+      TIME_TS_ARCHIVE_OPEN=$10
+      TIME_TS_ARCHIVE_READ=$11
+      TIME_TS_ARCHIVE_DONE=$12
+      TIME_TS_ARCHIVE_REPORT=$13
+      TIME_MGM_ARCHIVED=$14
+      TIME_MGM_PREPARE=$15
+      TIME_FE_PREPARE=$16
+      TIME_TS_RETRIEVE_POP=$17
+      TIME_TS_RETRIEVE_TASK=$18
+      TIME_TS_RETRIEVE_TASKS=$19
+      TIME_TS_RETRIEVE_POSITION=$20
+      TIME_TS_RETRIEVE_READ=$21
+      TIME_TS_RETRIEVE_OPEN=$22
+      TIME_TS_RETRIEVE_DONE=$23
+      TIME_MGM_RCOMMIT=$24
+      TIME_MGM_RETRIEVED=$25
+
+      printf "%d,%d,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f\n",
+        ARCHIVE_ID,FILE_ID,
+        rel(TIME_MGM_CREATE,TIME_FE_CREATE),
+        rel(TIME_MGM_CREATE,TIME_MGM_ACOMMIT),
+        rel(TIME_MGM_ACOMMIT,TIME_MGM_CLOSEW),
+        rel(TIME_MGM_CLOSEW,TIME_FE_CLOSEW),
+        rel(TIME_FE_CLOSEW,TIME_TS_ARCHIVE_POP),
+        rel(TIME_TS_ARCHIVE_POP,TIME_TS_ARCHIVE_TASKS),
+        rel(TIME_TS_ARCHIVE_TASKS,TIME_TS_ARCHIVE_OPEN),
+        rel(TIME_TS_ARCHIVE_OPEN,TIME_TS_ARCHIVE_READ),
+        rel(TIME_TS_ARCHIVE_READ,TIME_TS_ARCHIVE_DONE),
+        rel(TIME_TS_ARCHIVE_DONE,TIME_TS_ARCHIVE_REPORT),
+        rel(TIME_TS_ARCHIVE_REPORT,TIME_MGM_ARCHIVED),
+        rel(TIME_MGM_PREPARE,TIME_FE_PREPARE),
+        rel(TIME_FE_PREPARE,TIME_TS_RETRIEVE_POP),
+        rel(TIME_TS_RETRIEVE_POP,TIME_TS_RETRIEVE_TASK),
+        rel(TIME_TS_RETRIEVE_TASK,TIME_TS_RETRIEVE_TASKS),
+        rel(TIME_TS_RETRIEVE_TASKS,TIME_TS_RETRIEVE_POSITION),
+        rel(TIME_TS_RETRIEVE_POSITION,TIME_TS_RETRIEVE_READ),
+        rel(TIME_TS_RETRIEVE_READ,TIME_TS_RETRIEVE_OPEN),
+        rel(TIME_TS_RETRIEVE_OPEN,TIME_TS_RETRIEVE_DONE),
+        rel(TIME_TS_RETRIEVE_DONE,TIME_MGM_RCOMMIT),
+        rel(TIME_MGM_RCOMMIT,TIME_MGM_RETRIEVED)
+    }' $1
 }
 
-process log_analysis.m >log_analysis_post.m
+[ $# -eq 1 ] || error "Usage: postprocess <file.csv>"
+check_file_exists $1
+echoc ${LT_BLUE} "Converting from absolute to relative times..."
+convert_abs_rel $1
+echoc ${LT_BLUE} "done.\n"
diff --git a/continuousintegration/log_analysis/CI/preprocess.sh b/continuousintegration/log_analysis/CI/preprocess.sh
index dbeff4c37c..b8b4dc8dc5 100755
--- a/continuousintegration/log_analysis/CI/preprocess.sh
+++ b/continuousintegration/log_analysis/CI/preprocess.sh
@@ -70,7 +70,7 @@ process_frontend_log()
 {
   echoc $LT_BLUE "Extracting archive log messages from cta-frontend..."
 
-  awk -vDATE_OFFSET="$DATE_OFFSET" -vTEST_RUN=${TEST_RUN} '
+  awk -vDATE_OFFSET="$DATE_OFFSET" '
     function processLine(event, logline) {
       sub(" .* fileId=\"", " ", logline)
       sub("\".*$", "", logline)
@@ -88,8 +88,6 @@ process_frontend_log()
       print logarray[3],event,timesecs "." MSECS
     }
 
-    $0 !~ TEST_RUN { next }
-
     /CREATE/ { processLine("CRE", $0) }
     /CLOSEW/ { processLine("CLW", $0) }
     /PREPARE/ { processLine("PRE", $0) }
diff --git a/continuousintegration/log_analysis/CI/process_log_events.sh b/continuousintegration/log_analysis/CI/process_log_events.sh
new file mode 100755
index 0000000000..d6fea76a07
--- /dev/null
+++ b/continuousintegration/log_analysis/CI/process_log_events.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+#
+# process_log_events.sh
+#
+# Extract timing information for CTA events from CI logs. Create artifacts for analysis and
+# visualisation.
+
+CTA_HOME=${HOME}/CTA/continuousintegration
+SCRIPT_HOME=${CTA_HOME}/log_analysis/CI
+
+# Location of the latest set of artefacts
+ARTEFACTS_HOME=/tmp/pod_logs
+ARTEFACT_HOME=${ARTEFACTS_HOME}/$(ls -t ${ARTEFACTS_HOME} | head -1)
+
+# Location of the logfiles we want to analyse
+MGM_LOG=${ARTEFACT_HOME}/ctaeos/eos/mgm/xrdlog.mgm
+FRONTEND_LOG=${ARTEFACT_HOME}/ctafrontend/cta/cta-frontend.log
+TAPED_LOG=${ARTEFACT_HOME}/tpsrv01/taped/cta/cta-taped.log
+
+# The path of the run we want to analyse
+EOS_PATH=$(grep "Creating test dir in eos:" ${ARTEFACT_HOME}/systests.sh.log | cut -d' ' -f6)
+
+# Create temporary working directory
+TMPDIR=/tmp/$(basename ${0}).$$
+mkdir -p ${TMPDIR}
+cd ${TMPDIR}
+
+# Extract the events of interest from the logfiles
+${SCRIPT_HOME}/preprocess.sh ${EOS_PATH} ${MGM_LOG} ${FRONTEND_LOG} ${TAPED_LOG}
+${SCRIPT_HOME}/get_times.sh >cta_times_abs.csv
+${SCRIPT_HOME}/postprocess.sh cta_times_abs.csv >cta_times_rel.csv
-- 
GitLab