%ADASS_PROCEEDINGS_FORM%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% SAMPLE1.TEX -- ADASS XII (2002) ASP Conference Proceedings sample
% paper with minimal markup. Based on the sample from ADASS XI (01).
%
% This is a simple example.  If you want to see a more comprehensive
% sample paper,  take a look at sample2.tex.
%
% Much of the input will be enclosed by braces (i.e., { }).  The
% percent sign, "%", denotes the start of a comment; text after it
% will be ignored by LaTeX.  You might also notice in some of the
% examples below the use of "\ " after a period; this prevents LaTeX
% from interpreting the period as the end of a sentence and putting
% extra space after it.   
% 
% You should check your paper by processing it with LaTeX.  For
% details about how to run LaTeX as well as how to print out the User
% Guide, consult the README file.  
%
% If you do not have access to the LaTeX software or a laser printer
% at your site, you can still prepare your paper following the
% instructions in the User Guide.  In such cases, the editors will
% process the file and make any necessary editorial adjustments.
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 
\documentclass[11pt,twoside]{article}  % Leave intact
\usepackage{adassconf}

% If you have the old LaTeX 2.09, and not the current LaTeX2e, comment
% out the \documentclass and \usepackage lines above and uncomment
% the following:

%\documentstyle[11pt,twoside,adassconf]{article}

\begin{document}   % Leave intact

%-----------------------------------------------------------------------
%			    Paper ID Code
%-----------------------------------------------------------------------
% Enter the proper paper identification code.  The ID code for your
% paper is the session number associated with your presentation as
% published in the official conference proceedings.  You can
% find this number locating your abstract in the printed proceedings
% that you received at the meeting or on-line at the conference web
% site; the ID code is the letter/number sequence proceeding the title 
% of your presentation.  
%
% This will not appear in your paper; however, it allows different
% papers in the proceedings to cross-reference each other.  Note that
% you should only have one \paperID, and it should not include a
% trailing period.
%

\paperID{O10-2}
%%%% ID=O10-2

%-----------------------------------------------------------------------
%		            Paper Title 
%-----------------------------------------------------------------------
% Enter the title of the paper.
%
% EXAMPLE: \title{A Breakthrough in Astronomical Software Development}
%
% If your title is so long as to fill the page header when you print it,
% then please supply a short form as a \titlemark.
%
% EXAMPLE:
%  \title{Rapid Development for Distributed Computing, with Implications
%         for the Virtual Observatory}
%  \titlemark{Rapid Development for Distributed Computing}
%

\title{The XMM-Newton SAS - Distributed Development and
Maintenance of a Large Science Analysis System: A Critical Analysis}
\titlemark{The XMM-Newton SAS}

%-----------------------------------------------------------------------
%		          Authors of Paper
%-----------------------------------------------------------------------
% Enter the authors followed by their affiliations.  The \author and
% \affil commands may appear multiple times as necessary.  List each
% author by giving the first name or initials first followed by the
% last name.  Authors with the same affiliations should grouped
% together. 
%
% Try to limit the front matter to no more than three \author
% commands.  Group authors with the same affiliations.  Too many
% \author commands fills the first page of the paper with little
% actual text.

\author{Carlos Gabriel\altaffilmark{1}, John Hoar\altaffilmark{1}, 
Aitor Ibarra\altaffilmark{1},  
Uwe Lammers\altaffilmark{2}, Eduardo Ojero\altaffilmark{1}, 
Richard Saxton\altaffilmark{1}, Giuseppe Vacanti\altaffilmark{2}}
\affil{XMM-Newton Science Operations Centre, Science Operations and Data Systems
Division of ESA, European Space Agency}
\altaffiltext{1}{VILSPA, Villafranca del Castillo, P.O.Box 50727,
28080 Madrid, Spain}
\altaffiltext{2}{ESTEC, Keplerlaan 1, 2200 AG Noordwijk,
The Netherlands}
\author{Mike Denby, Duncan Fyfe, Julian Osborne}
\affil{Department of Physics and Astronomy, University of Leicester, Leicester,
LE1 7RH, UK}
%-----------------------------------------------------------------------
%			 Contact Information
%-----------------------------------------------------------------------
% This information will not appear in the paper but will be used by
% the editors in case you need to be contacted concerning your
% submission.  Enter your name as the contact along with your email
% address.

\contact{Carlos Gabriel}
\email{Carlos.Gabriel@esa.int}

%-----------------------------------------------------------------------
%		      Author Index Specification
%-----------------------------------------------------------------------
% Specify how each author name should appear in the author index.  The 
% \paindex{ } should be used to indicate the primary author, and the
% \aindex for all other co-authors.  You MUST use the following
% syntax: 
%
% SYNTAX:  \aindex{LASTNAME, F. M.}
% 
% where F is the first initial and M is the second initial (if
% used).  This guarantees that authors that appear in multiple papers
% will appear only once in the author index.  
%
% EXAMPLE: \paindex{Crabtree, D.}
%          \aindex{Manset, N.}
%          \aindex{Veillet, C.}
%
% NOTE: this information is also used to build the author list that
% appears in the table of contents.  Authors will be listed in the order
% of the \paindex and \aindex commmands.
%

\paindex{Gabriel, C.}
\aindex{Denby, M.}     
\aindex{Fyfe, D. J.}     
\aindex{Hoar, J.}     
\aindex{Ibarra, A.}     
\aindex{Ojero, E.}     
\aindex{Osborne, J.}     
\aindex{Saxton, R. D.}     
\aindex{Jansen, F.}     
\aindex{Lammers, U.}     
\aindex{Vacanti, G.}     

%-----------------------------------------------------------------------
%                     Author list for page header
%-----------------------------------------------------------------------
% Please supply a list of author last names for the page header. in
% one of these formats:
%
% EXAMPLES:
% \authormark{LASTNAME}
% \authormark{LASTNAME1 \& LASTNAME2}
% \authormark{LASTNAME1, LASTNAME2, ... \& LASTNAMEn}
% \authormark{LASTNAME et al.}
%
% Use the "et al." form in the case of seven or more authors, or if
% the preferred form is too long to fit in the header.

\authormark{Gabriel et al.}

%-----------------------------------------------------------------------
%			Subject Index keywords
%-----------------------------------------------------------------------
% Enter up to 6 keywords describing your paper.  These will NOT be
% printed as part of your paper; however, they will be used to
% generate the subject index for the proceedings.  There is no
% standard list; however, you can consult the indices for past ADASS
% proceedings (http://adass.org/adass/proceedings/).

\keywords{XMM: Newton, distributed: development, C++, Fortran 90/95, Perl, software: scientific analysis, multi: dimensional, data analysis, pipelines, interactive: analysis, X-ray}

%-----------------------------------------------------------------------
%			       Abstract
%-----------------------------------------------------------------------
% Type abstract in the space below.  Consult the User Guide and Latex
% Information file for a list of supported macros (e.g. for typesetting 
% special symbols). Do not leave a blank line between \begin{abstract} 
% and the start of your text.

\begin{abstract}          % Leave intact
The XMM-Newton Scientific Analysis System (SAS) is the software used for 
the reduction and calibration of data taken with the XMM-Newton satellite
instruments leading to almost 400 refereed 
scientific papers published in the last 2.5 years. Its maintenance, 
further development and distribution is under the responsibility of the 
XMM-Newton Science Operations Centre together with the Survey Science 
Centre, representing a collaborative 
effort of more than 30 scientific institutes.

Developed in C++, Fortran 90/95 and Perl, the SAS makes large use of 
open software packages such as {\it ds9} for image display 
(SAO-R\&D Software Suite), 
Grace, LHEASOFT and cfitsio (HEASARC project), pgplot, fftw and the  
non-commercial version of Qt (TrollTech).

The combination of supporting several versions of SAS for multiple 
platforms (including SunOS, DEC, many Linux flavours and MacOS) in a 
widely distributed development process which makes use of a suite of 
external packages and libraries presents substantial issues for the 
integrity of the SAS maintenance and development.
A further challenge comes from the necessity of maintaining the 
flexibility of a software package evolving together with progress made 
in instrument calibration and analysis refinement, whilst at the same 
time being the source of all official products of the XMM-Newton 
mission. To cope with this requirement, a sophisticated system for 
continuous integration and testing on several platforms of different 
branches has been put in place on top of a refined development model 
designed for this special S/W development case.

The SAS is considered now a mature system. We present
the different aspects of its development, maintenance
and distribution, extracting lessons learned for present and future
projects of this magnitude.

\end{abstract}

%-----------------------------------------------------------------------
%			      Main Body
%-----------------------------------------------------------------------
% Place the text for the main body of the paper here.  You should use
% the \section command to label the various sections; use of
% \subsection is optional.  Significant words in section titles should
% be capitalized.  Sections and subsections will be numbered
% automatically. 

\section{Introduction}

The XMM-Newton Scientific Analysis System (SAS) is the main tool for
offline processing of data obtained from the scientific instruments on
board XMM-Newton [Jansen et al. 2001]. These consist of X-ray
instruments performing imaging, spectroscopy and timing and an
optical/UV camera for imaging, timing and medium-resolution dispersive
spectroscopy.

The SAS runs both in interactive mode, including a complete GUI
system, and in scripting mode, in which input
parameters of the tasks are specified on the command line. 
The scripting capability is used to
create the XMM-Newton Pipeline (PPS) from a subset of the SAS, for
generation the official scientific mission data products.

The SAS has been in continual development for around 6 years by a team
of up to 30 developers, distributed around the world. Its development
therefore presents the typical difficulties of distributed
development. The large use of external freeware libraries and the
objective of distributing the system for multiple platforms whilst
taking the most user-friendly approach possible accentuates the
demand for a flexible but controlled development with a continuous
integration process. Specially important in this context is that the
individual developer has rapid feedback on the integration of their task
into the system. Several of the techniques used in the development (e.g.
continuous integration and testing) are established paradigms in Extreme
Programming (XP) [see e.g. Beck and Fowler 2001]. 



\section{The XMM-Newton SAS general capabilities}

The main purpose of the SAS is the reduction of data from all
XMM-Newton scientific instruments to the level of calibrated event
lists, images, spectra, source lists and detector response matrices,
allowing the observer to perform astronomical analysis using those
products without the need of any special knowledge of instrumental
performance or calibration. At the same time the SAS provides the
observer with the means to repeat the reduction process after
improvements in calibration. A complete data analysis toolset is also
in place for optimizing the selection criteria in order to achieve the
best signal to noise tailored to the scientific case the observer is
interested in.

A modular architecture allows the SAS user to go step-by-step through
the entire chain of the data reduction. SAS tasks are highly
parameterised, such that almost all the different data handling steps
are configurable; in particular those steps which are considered
essential. In this way the effect of the presence / absence / value of
those steps are easily tested.

SAS ``task'' GUIs are created by a common graphical software mechanism 
based on Qt which allows easy access to all their own specific 
parameters. That method produces a consistent and easy-to-use  
look-and-feel as compared to having to use a different GUI for each ``task''.
All tasks can be run from the command line specifying all
mandatory task parameters.

The modular structure and highly parameterised nature of the SAS
allows detailed configuration of the data manipulation process; when
combined with the command line interface it provides a powerful and
efficient data reduction facility. It is very easy to compose data
reduction scripts since the output produced by each task is
potentially input to a task following logically in one of the possible
data reduction chains. This is taken to its logical conclusion in the
XMM-Newton Pipeline, a sophisticated processing system with the SAS at
the core, developed by the SSC [Fyfe et al 2001].

The log file of any SAS data reduction session
(irrespective of whether tasks are run from GUI or command line)
contains information on all the tasks performed, including the
parameters used, marked in a way that they are easily recognized as
such by the reader. Use of this capability to record every single performed 
call is made by a special task,
which creates automatically from such a logfile a corresponding
executable script.

Access to general information on SAS, on-line documentation, binaries,
special reduction scripts and much more is provided through the
XMM-Newton webpages (http://xmm.vilspa.esa.es/).


\section{A fully distributed development}

A natural task distribution among the developers collaborating in the
project has been established since the beginning of development. A
central development group within ESA's SOC, composed of scientists and
software engineers, takes care of all the infrastructure tasks,
including the data and calibration access layers. A highly distributed
team, coordinated by the Survey Science Center at the University of
Leicester\footnote{http://xmmssc-www.star.le.ac.uk/} 
and composed mainly of scientists working closely with the
different XMM-Newton instrument teams, is in charge of the
instruments' data processing tasks. A high level of communication is
needed in such a distributed environment, which is achieved through a
mailing list dedicated to development, as the forum for discussion,
exchange of ideas and communication of new developments. 

In addition a number of SAS Working Group meetings are organized every
year, bringing the developers together to communicate and report on the
status and future of each area of development.  Two separate
configuration control systems for Software Problem Report (SPR) and
Software Change Request (SCR) were established at the
XMM-Newton SOC and the SSC respectively, reflecting the ``local''
handling of the SAS tasks. These are visible to the entire project
and automatically send emails to the task developers and managers when
there is a change of report status.
A Configuration Control Board (SAS CCB),
composed by members of both the SOC and the SSC, handles general
strategy questions, change requests, release schedules, etc.

\section{The SAS development model}

In order to make possible the SAS development, with many distributed
developers working through a system of honour rather than subject to
authority-wielding management of their work by a central body, a special 
development
model had to be put in place. It is based on three central elements:
\begin{itemize}
\item the whole system is broken down into single packages,
\item the multiple dependencies among packages are taken care of by the build
system, 
\item a thorough unit- and system-level testing approach is
realized through continuous integration and testing on several
platforms.
\end{itemize}

\noindent
\vspace{0.2cm}
The cycle of integration is composed of following steps:
\begin{itemize}
\item changed SAS task packages are uploaded to the 
SOC central repository where a new software ``manifest'' is issued daily

\item builds based on the issued ``manifest'' are performed daily on different 
reference systems for specific platforms and operating systems, in the 
SOC and other selected sites,

\item the build reports containing information on the build setup, its results 
and tests are published automatically by the build process onto the 
development central web page (http://xmm.vilspa.esa.es/sasdev/integration),

\item within 24 hours developers can see the results of their uploaded 
changes to any SAS task code, into the complete structure of SAS for 
all the different platforms, operating systems and different build 
configurations. 
\end{itemize}

\noindent	%%% FO
A number of parallel building ``tracks'' are used in order to allow flexibility
during development (``development track'') as well as a highly configuration-controlled 
system, as needed during the period prior to an imminent release or for official pipeline 
development (``release track''). Snapshots of a certain development stage can 
be taken at any time. The contents of a snapshot are determined by the version numbers of the
packages recorded in a ``manifest'', which can be changed following simple rules governing 
packages within a ``manifest'' .

Every major public SAS release is accompanied by a process of scientific
validation of the software. This consists in the automatic data reduction of a
pre-determined set of test data performed by the PPS, followed by a thorough
scientific interactive analysis. The aims of this exercise are to establish:
\begin{itemize}
\item which instrumental modes are fully supported by SAS,
\item which scientific products SAS can produce, and
\item the level of accuracy associated with those products.
\end{itemize}

\noindent	%%% FO
The outcome of the SAS scientific validation is summarized in a report,
made available through the SAS webpages.
  
The SAS is distributed in binary form for easy installation on a wide range of
platforms and operating systems. The officially supported systems vary slightly 
from version to version of the SAS, following the general evolution of the target 
environments, in particular in the Linux sector. The platforms used currently for 
SAS integration are: Solaris 2.6 and 2.8, Linux Red Hat 9.0 and SuSe 7.3, Mac OS 
Darwin 6.6 as well as DEC Tru64 OSF5.1. These platforms are planned to be 
supported officially by the next release (SAS 6.0) in early 2004.
 
\section{Lessons learned}

The concept behind the SAS development has proven to be generally
successful in many aspects, underlined by the fact that up to date
almost all 400 refereed scientific papers on XMM-Newton data have been
possible also due to the SAS data reduction capabilities. There are
however also some aspects on the negative side, which have resulted in
larger manpower demands due to its development characteristics, such as

\begin{itemize}

\item the mix of programming languages (C++ and F90) used for coding SAS and 
the evolution of compiler support for these languages, which caused serious 
trouble in many occassions and avoidable work if only a single language would
have been used. This gave a higher compiler dependency than would otherwise 
have been
possible, but was a constraint forced on us by the skills of the
pre-defined set of contributors;

\item code reviews were infrequent during development. 
This could have improved the level of overall code quality. Code re-use was 
identified early on as a means of keeping total costs
down. Lack of resources prevented wholesale code walkthroughs;

\item the development of common utilities is extremely difficult in geographical
distributed environments and can lead to code duplication. 
Given the ease with which the SAS
infrastructure allows new tasks to be quickly made, this has to be
considered a minor cost, to be set against the cost of determining the
exact function of external software components;

\item the strong dependency of the SAS on the evolution of external librarires
and operation systems. The consequence are relatively high demands on 
maintenance. 

\end{itemize}

\noindent

The most positive aspects of the development are the following:

\begin{itemize}

\item core infrastructure developed in a central place and a very
good work-split between software engineers and scientists;

\item succesful delivery and integration concepts and procedures, including the
use of web and automatic emails to report the status of the daily builds, and of
the problems identified;

\item defined standard structure for all SAS packages, including
source, documentation, GUI parameter handling, test harnesses, dependencies,
version, changelog and distribution specification;

\item access and use of complex data (including calibration), through
layers of abstraction enabling users / SAS developers without
knowledge of underlying data structure intricacies or algorithms used
for derived data,

\item quick turnaround times due to the continuous integration and build
processes,

\item no imposition of any commercial S/W package on the end users,

\item incarnation of a SAS subset as official pipeline (PPS), used for
the derivation of the official products distributed to the observers
and populating the XMM-Newton archive. The resulting large exposure of the
software to the data led to rapid bug elimination.

\end{itemize}
  
    
%-----------------------------------------------------------------------
%			      References
%-----------------------------------------------------------------------
% List your references below within the reference environment
% (i.e. between the \begin{references} and \end{references} tags).
% Each new reference should begin with a \reference command which sets
% up the proper indentation.  Observe the following order when listing
% bibliographical information for each reference:  author name(s),
% publication year, journal name, volume, and page number for
% articles.  Note that many journal names are available as macros; see
% the User Guide for a listing "macro-ized" journals.   
%
% Note the following are some of the tricks that can be used:
%
%   o  \& is used to format an ampersand symbol (&).
%   o  \'e puts an accent grave over the letter e.  See the User Guide
%      for details on formatting special characters.  
%   o  "\ " after a period prevents LaTeX from interpreting the period 
%      as an end of a sentence.
%   o  \aj is a macro that expands to "Astron. J."  See the User Guide
%      for a full list of journal macros
%   o  \adassvii is a macro that expands to the full title, editor,
%      and publishing information for the ADASS VII conference
%      proceedings.  Such macros are defined for ADASS conferences I
%      through IX.
%   o  When referencing a paper in the current volume, use the
%      \adassix and \paperref macros.  The argument to \paperref is
%      the paper ID code for the paper you are referencing.  See the 
%      note in the "Paper ID Code" section above for details on how to 
%      determine the paper ID code for the paper you reference.  
%
\begin{references}
\reference Beck, K., Fowler, M., 2001, ``Planning Extreme Programming. XP", 
Addison-Wesley
\reference Fyfe, D.J. et al., 2001, 
%``XMM-NEWTON SURVEY SCIENCE CENTRE 
%PIPELINE PROCESSING SYSTEM AND XMM-NEWTON SCIENCE ANALYSIS SYSTEM'' 
in Proceedings of the Conference ``New Visions of the X-ray Universe'', 
Noordwijk, in press.
\reference Jansen, F. et al., 2001, \aap, 365, L1-L6 
\end{references}

% Do not place any material after the references section

\end{document}  % Leave intact
