Commit ea78eecd authored by Eric Cano's avatar Eric Cano
Browse files

Merge branch 'atlas_week_2019_12'

parents b97037d2 21b6636f
*.aux
*.bbl
*.log
*.nav
*.out
*.snm
*.synctex.gz
*.toc
not_needed/
*.blg
cern-logo.pdf
\documentclass[aspectratio=149]{beamer}
\usepackage[utf8]{inputenc}
\usepackage[super]{nth}
\usepackage{adjustbox}
\usepackage[binary-units=true, per-mode=symbol]{siunitx}
\usetheme{cern}
% ADC meetings: Data carousel | (discussion)
% Thursday, 5 December 40/S2-D01 - Salle Dirac
% https://indico.cern.ch/event/823341/#day-2019-12-05
%09:00 → 10:30
%ADC meetings: Data carousel | (discussion)¶ 40/S2-D01 - Salle Dirac
%Conveners: Alexei Klimentov (Brookhaven National Laboratory (US)), David Cameron (University of Oslo (NO)), David Michael South (Deutsches Elektronen-Synchrotron (DE)), Johannes Elmsheuser (Brookhaven National Laboratory (US)), Mario Lassnig (CERN), Xin Zhao (Brookhaven National Laboratory (US))
%
% 09:00
% Materials : Tier-1s presentations (October-November splinter meetings), FTS, Rucio, ProdSys2¶ 1m
%
%09:01
%Meeting goals¶ 10m
%
%can we agree on the way forward to cut the following two gaps in the meeting
%
%Gap 1 : between the throughput out of the tape system itself and the throughput delivered to users (rucio in this case)
%--This is to tackle the issues along the staging chain, dCache, FTS, Rucio, PS2, etc, to minimize performance penalties to the original tape throughput
%Gap 2 : between the nominal tape throughput and the current throughput out of tape
%--This is about “smart writing”, by bigger files and/or better organizing files among tapes, to reach higher tape reading efficiency
%Speakers: Alexei Klimentov (Brookhaven National Laboratory (US)), Mario Lassnig (CERN), Xin Zhao (Brookhaven National Laboratory (US))
%09:11
%Summary and highlights of Oct-Nov discussions with Tier 1s and CERN¶ 20m
%Speakers: Alexei Klimentov (Brookhaven National Laboratory (US)), Mario Lassnig (CERN), Xin Zhao (Brookhaven National Laboratory (US))
%09:31
%ATLAS Sites (topical discussion)¶ 59m
%
% Planning / notification / feedback /discussions
% What concerns sites have with tape system throughput (writing/reading) ?
% -- Active tape drives usage vs drives lifetime (data staging scenario)
% -- What are the bottlenecks at sites, any planned improvements ?
% -- What help do they need from ADC, dCache, FTS ? (e.g. sites want the staged files to be transferred out of disk buffer as soon as possible, avoid too many staging retries ...)
% Site staging profile
% -- Possible extension ?
% What can sites do w.r.t. “Smart writing” ?
% -- new features in tape system ?
% -- external users (eg. ADC/dCache) to create bigger files and/or better organize the order of files when writing to tape ?
% What are the special concerns for sites supporting multiple-VO ?
% -- Do we understand requirements and intended usage of tape resources of other VOs ?
% monitoring tools enough for sites? any suggestions on improving monitoring ?
%
%11:00 → 12:30
%ADC meetings: Data carousel (II)¶ 40/S2-D01 - Salle Dirac
%Conveners: Alexei Klimentov (Brookhaven National Laboratory (US)), David Cameron (University of Oslo (NO)), David Michael South (Deutsches Elektronen-Synchrotron (DE)), Johannes Elmsheuser (Brookhaven National Laboratory (US)), Mario Lassnig (CERN), Xin Zhao (Brookhaven National Laboratory (US))
%
% 11:00
% dCache global view and development roadmap¶ 25m
% Speaker: Mr Tigran Mkrtchyan (DESY)
% 11:25
% Topical discussion¶ 1h
% 7 out of 10 T1s run dCache as tape frontend
% Can dCache provide common solution to improve scalability of dCache HSM interface, something like ENDIT does for TSM sites, that will benefit all dCache sites ?
% Can dCache group files by datasets in writing to tapes, which will benefit all dCache T1s ?
% other tape frontend (CTA, StoRM)
% -- plans ?
% alternative protocol for tape access ?
% -- SRM vs QoS APIs ?
% Based on CERN beamer theme by Jerome Belleman https://github.com/jeromebelleman/beamer-cern
% The optional `\author` command defines the author and is displayed in the slide produced by the `\titlepage` command.
\author{Eric Cano on behalf of the CTA team}
% The optional `\title` command defines the title and is displayed in the slide produced by the `\titlepage` command.
\title{CTA Smart writing}
% The optional `\subtitle` command will add a smaller title below the main one, and will not be displayed in any of the slides' footer.
\subtitle{Conceptual proposition}
% The optional `\date` command will display a custom free text date on the all of the slides' footer. If omitted today's date will be used.
\date{ADC meetings: Data carousel, 5 Dec 2019}
\setcounter{tocdepth}{1}
\begin{document}
\frontcover
% The optional `\titlepage` command will create a slide with the presentation's title, subtitle and author.
\frame{\titlepage}
% The optional `\tableofcontents` command will automatically create a table of contents based pm the sections.
\frame{\tableofcontents}
\section{The problem}
\begin{frame}{What are we trying to solve?}
\begin{itemize}
\item Datasets are always read whole
\item Tape systems not dataset-aware during write
\begin{itemize}
\item Files scattered over tapes $\Rightarrow$ more read mounts
\item Files interleaved with others within tape
$\Rightarrow$ drive spends time positioning on reads
\end{itemize}
\item Making files bigger will impact tape performance
\begin{itemize}
\item Tape drive typically faster than file system (\SI{360}{\mega\byte\per\second} today, up to \SI{1}{\giga\byte\per\second} in the roadmaps)
\item Tape server memory should hold several files to allow streaming them in parallel
\item Typical tape server memory size: \SI{60}{\giga\byte}
\item Upper bound for efficient file size: \SI{10}{\giga\byte}
\end{itemize}
\end{itemize}
\end{frame}
\section{Files tagging by dataset name}
\begin{frame}{Files tagging by dataset name}
\begin{itemize}
\item Per-file property
\item Type = string
\begin{itemize}
\item Can we define a length cap?
\end{itemize}
\item Only rely on comparison (no ordering, ranking...)
\end{itemize}
\end{frame}
\section{User interface}
\begin{frame}{User interface}
\begin{itemize}
\item On write, per file tagging
\begin{itemize}
\item Has to go through Rucio/FTS/EOS/CTA
\end{itemize}
\item Back tagging of existing files (several scenarios)
\begin{itemize}
\item Executed as a one-off, we could have rule based update script
\item More general: provide get/set operation per file and leave it to the user
\end{itemize}
\end{itemize}
\end{frame}
\section{Tape system optimization}
\begin{frame}{Tape system optimizations}
\adjustbox{minipage=1.18\textwidth, scale=0.85}{
\begin{itemize}
\item Write optimization
\begin{itemize}
\item Divide archive queue in per-dataset sub-queue
\item Make write mounts stick to a dataset (until it is drained)
\begin{itemize}
\item $\Rightarrow$ Contiguous files, zero positioning on read
\end{itemize}
\item Possibly cap the per-dataset parallel writes
\begin{itemize}
\item $\Rightarrow$ Soft-limiting the spreading over tapes
\end{itemize}
\end{itemize}
\item Repack/defrag
\begin{itemize}
\item Repack can then write in an optimized manner (defrag)
\item Repack input (which files to read) could be dataset driven instead of tape driven
\begin{itemize}
\item If extra read mount cost bearable
\item Will have to take into account tape level constraints as well
\item Make sure we empty old tapes and not re-repack a target tape
\item Will it be worth the complexity?
\end{itemize}
\end{itemize}
\end{itemize}
}
\end{frame}
\section{Possible bonus features}
\begin{frame}{Possible bonus features}
\begin{itemize}
\item Multi-level tagging, allowing to better choose the {\em{}next} dataset in a mount
\item Retrieve by dataset (implies big changes in whole data transfer chain, and possibly hairy error handling)
\end{itemize}
\end{frame}
\section{Timing and parallel developments}
\begin{frame}{Timing and parallel developments}
\begin{itemize}
\item This concept would take quite some time to implement (many projects involved)
\item In the shorter term, we plan to rely on the existing
\begin{itemize}
\item Temporal collocation
\item Software RAO will be implemented for LTO drives
\end{itemize}
\end{itemize}
\end{frame}
\section{Conclusion}
\begin{frame}{Conclusions}
\begin{itemize}
\item Changes from outside the tape system (bigger files) will push us to a non-optimal working point
\item With proper hints tape system can optimize read access, knowing that access is done by full dataset
\end{itemize}
\end{frame}
\backcover
\end{document}
% Jerome Belleman <Jerome.Belleman@cern.ch>, March 2015
\mode<presentation>
\definecolor{cern}{RGB}{0,83,161}
\setbeamercolor{structure}{fg=cern}
\setbeamercolor{footline}{use=structure,
fg=structure.fg!50!white,
bg=structure.fg}
\mode
<all>
% Jerome Belleman <Jerome.Belleman@cern.ch>, March 2015
\mode<presentation>
% Slightly smaller
\AtBeginDocument{\fontsize{15}{18}\selectfont}
% Slightly smaller
\setbeamerfont{title}{size*={22}{25pt}}
% Slightly smaller
\setbeamerfont*{frametitle}{parent=title}
% Slightly smaller
\setbeamerfont*{section title}{parent=title}
% Slightly larger
\setbeamerfont{author}{size*={8}{9.5pt}}
% Slightly larger
\setbeamerfont{title in head/foot}{size*={7}{8pt}}
% About the same size
\setbeamerfont{item}{size*={8}{9.5pt}}
% About the same size
\setbeamerfont{subitem}{size*={7}{8pt}}
% About the same size
\setbeamerfont{subitem}{size*={6}{7pt}}
% Slightly smaller
\setbeamerfont{itemize/enumerate subbody}{size*={12}{14pt}}
% Slightly smaller
\setbeamerfont{itemize/enumerate subsubbody}{size*={11}{13.6pt}}
\mode
<all>
% Jerome Belleman <Jerome.Belleman@cern.ch>, March 2015
\mode<presentation>
\defbeamertemplate*{title page}{cern}
{
\vspace{42pt}
\begin{beamercolorbox}[leftskip=\titlelf]{title}
\usebeamerfont{title}\inserttitle
\ifx\insertsubtitle\@empty
\else
\par\usebeamerfont{subtitle}\insertsubtitle
\fi
\end{beamercolorbox}
\vspace{2pt}
\begin{beamercolorbox}[leftskip=\titlelf]{author}
\usebeamerfont{author}\insertauthor
\end{beamercolorbox}
}
\defbeamertemplate*{section page}{cern}{
\vfill
\begin{beamercolorbox}[leftskip=\titlelf]{section title}
\usebeamerfont{section title}\insertsection
\end{beamercolorbox}
\vfill
}
\defbeamertemplate*{itemize item}{cern}{%
\raise2pt\hbox{\textbullet}%
}
\defbeamertemplate*{itemize subitem}{cern}{%
\raise2pt\hbox{\textbullet}%
}
\defbeamertemplate*{itemize subsubitem}{cern}{%
\raise2pt\hbox{\textbullet}%
}
\mode
<all>
% Jerome Belleman <Jerome.Belleman@cern.ch>, March 2015
\usepackage{calc}
\mode<presentation>
% Lengths: trust Inkscape to measure them, PowerPoint if far too approximate
\newlength{\footlinedp}
\setlength{\footlinedp}{7.127mm * \ratio{128mm}{254mm}}
\newlength{\footlineht}
\setlength{\footlineht}{19.454mm * \ratio{128mm}{254mm} - \footlinedp}
\newlength{\footlinesk}
\setlength{\footlinesk}{3.338mm * \ratio{128mm}{254mm}}
\newlength{\logowd}
\setlength{\logowd}{14.046mm * \ratio{128mm}{254mm}}
\pgfdeclareimage[width=\logowd]{smalllogo}{cern-logo}
\newlength{\logolw}
\setlength{\logolw}{\footlinedp - 2.392mm * \ratio{128mm}{254mm}}
\newlength{\footlinetextwd}
\setlength{\footlinetextwd}{160pt}
\newlength{\numberwd}
\setlength{\numberwd}{20pt}
\defbeamertemplate*{footline}{cern}
{
\ifbeamer@cern@cover
\else
\begin{beamercolorbox}[wd=\paperwidth,dp=\footlinedp,ht=\footlineht,
leftskip=\footlinesk,
rightskip=\footlinesk]{footline}%
\usebeamerfont{title in head/foot}%
\lower\logolw\hbox{\pgfuseimage{smalllogo}}%
\hfill\makebox[\footlinetextwd][c]{\insertshortdate}%
\makebox[\footlinetextwd][c]{\insertshorttitle}%
\makebox[\numberwd][r]{\insertframenumber}%
\end{beamercolorbox}%
\fi
}
\defbeamertemplate*{frametitle}{cern}
{
\vspace{16pt}
% Set depth=0pt to avoid body position from depending on whether title
% text has any depth or not.
\begin{beamercolorbox}[dp=0pt,leftskip=\titlelf]{title}
\usebeamerfont{frametitle}%
\insertframetitle
\end{beamercolorbox}
}
\defbeamertemplate*{navigation symbols}{cern}{}
\defbeamertemplate*{frametitle continuation}{cern}{
(\insertcontinuationcountroman)
}
\addtobeamertemplate{footline}{\hfill\usebeamertemplate***{navigation symbols}%
\hspace*{0.2cm}\par\vskip 10pt}{}
\mode
<all>
% Jerome Belleman <Jerome.Belleman@cern.ch>, March 2015
% The size and position of graphical elements was rigorously computed/measured.
% That of text not so much as I kept the default Beamer font, so I just
% overlaid the official template to reach a result that's perceivably close.
\mode<presentation>
\newif\ifbeamer@cern@cover
\beamer@cern@coverfalse
\newlength{\titlelf}
\setlength{\titlelf}{-9pt}
\usefonttheme{cern}
\usecolortheme{cern}
\useinnertheme{cern}
\useoutertheme{cern}
\pgfdeclareimage[width=100pt]{frontlogo}{cern-logo}
\pgfdeclareimage[width=62pt]{backlogo}{cern-logo}
\newcommand{\frontcover}{
\beamer@cern@covertrue
{
\setbeamercolor{background canvas}{bg=cern}
\frame{
\vspace{49pt}
\centerline{\pgfuseimage{frontlogo}}
}
}
\beamer@cern@coverfalse
}
\newcommand{\backcover}{
\beamer@cern@covertrue
{
\setbeamercolor{background canvas}{bg=cern}
\frame{
\vspace{105pt}
\centerline{\pgfuseimage{backlogo}}
\vspace{76pt}
\centerline{%
\scriptsize\color{cern!50!white}\href{http://home.cern}{home.cern}
}
}
}
\beamer@cern@coverfalse
}
\mode
<all>
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment