%ADASS_PROCEEDINGS_FORM%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
\documentclass[11pt,twoside]{article}  % Leave intact
\usepackage{adassconf}
\begin{document}   % Leave intact

%-----------------------------------------------------------------------
%			    Paper ID Code
%-----------------------------------------------------------------------
% Enter the proper paper identification code.  The ID code for your
% paper is the session number associated with your presentation as
% published in the official conference proceedings.  You can
% find this number locating your abstract in the printed proceedings
% that you received at the meeting or on-line at the conference web
% site; the ID code is the letter/number sequence proceeding the title
% of your presentation.
%
% This will not appear in your paper; however, it allows different
% papers in the proceedings to cross-reference each other.  Note that
% you should only have one \paperID, and it should not include a
% trailing period.
%

\paperID{P2-8}
%%%% ID=P2-8

%-----------------------------------------------------------------------
%		            Paper Title 
%-----------------------------------------------------------------------
% Enter the title of the paper.
%

\title{High Availability Architecture for the Chandra Data Archive}
\titlemark{High Availability Architecture}

%-----------------------------------------------------------------------
%		          Authors of Paper
%-----------------------------------------------------------------------
% Enter the authors followed by their affiliations.  The \author and
% \affil commands may appear multiple times as necessary.  List each
% author by giving the first name or initials first followed by the
% last name.  Authors with the same affiliations should grouped
% together.
 
\author{P.\ Zografou, P.\ Harbo, K.\ McCusker, J.\ Moran,  A.\ Patz, \\ P.\ Ramadurai,  D.\ Van\ Stone}
\affil{Harvard-Smithsonian Center for Astrophysics, 60 Garden St., Cambridge, MA 02138}

%-----------------------------------------------------------------------
%			 Contact Information
%-----------------------------------------------------------------------
% This information will not appear in the paper but will be used by
% the editors in case you need to be contacted concerning your
% submission.  Enter your name as the contact along with your email
% address.

\contact{Panagoula Zografou}
\email{pz@head.cfa.harvard.edu}

%-----------------------------------------------------------------------
%		      Author Index Specification
%-----------------------------------------------------------------------
% Specify how each author name should appear in the author index.  The 
% \paindex{ } should be used to indicate the primary author, and the
% \aindex for all other co-authors.  You MUST use the following
% syntax: 
%
% SYNTAX:  \aindex{LASTNAME, F. M.}
% 
% where F is the first initial and M is the second initial (if
% used).  This guarantees that authors that appear in multiple papers
% will appear only once in the author index.  

\paindex{Zografou, P.}
\aindex{Harbo, P.}
\aindex{McCusker, K. J.}
\aindex{Moran, J.}
\aindex{Patz, A.}
\aindex{Ramadurai, P.}
\aindex{Van Stone@Van Stone, D.}

%-----------------------------------------------------------------------
%                     Author list for page header
%-----------------------------------------------------------------------
% Please supply a list of author last names for the page header. in
% one of these formats:
%

\authormark{Zografou et al.}

%-----------------------------------------------------------------------
%			Subject Index keywords
%-----------------------------------------------------------------------
% Enter up to 6 keywords describing your paper.  These will NOT be
% printed as part of your paper; however, they will be used to
% generate the subject index for the proceedings.  There is no
% standard list; however, you can consult the indices for past ADASS
% proceedings (http://iraf.noao.edu/ADASS/adass.html). 

\keywords{archives, databases, replication, client, server}

%-----------------------------------------------------------------------
%			       Abstract
%-----------------------------------------------------------------------
% Type abstract in the space below.  Consult the User Guide and Latex
% Information file for a list of supported macros (e.g. for typesetting 
% special symbols). Do not leave a blank line between \begin{abstract} 
% and the start of your text.

\begin{abstract}          % Leave intact

The Chandra Data Archive is distributed at three physically remote locations, 
two of them in Cambridge, MA and a third in Leicester, UK. Each installation operates 
local hardware and a locally configured software release. The data are stored 
at a single location or in synchronized copies at multiple locations. The architecture 
enables processes to access the installation that is closest to the user or another 
installation if the first becomes overloaded or unavailable. 
This paper presents the archive architecture for the multiple installations.
We explain the mechanisms that synchronize the data and we analyze the differences 
in data holdings across sites. We discuss how the software release is configured 
to operate at each installation and how users are routed to an installation depending 
on their profile. Finally, we describe the load balancing and failover mechanisms 
built into the archive.

\end{abstract}

%-----------------------------------------------------------------------
%			      Main Body
%-----------------------------------------------------------------------
% Place the text for the main body of the paper here.  You should use
% the \section command to label the various sections; use of
% \subsection is optional.  Significant words in section titles should
% be capitalized.  Sections and subsections will be numbered
% automatically. 

\section{Introduction}

The Chandra Data Archive contains data from observations in the form of telemetry and data products. It also contains catalogs and operational data like observing proposals, mission planning schedules and Chandra users' information. The archive serves both as a storage area for existing data and as an active data store interacting with daily Chandra X-ray Center (CXC) operations. The large number of archive users may generate a heavy load at peak times or may request large data transfers to remote, slow networked locations. Operational processes that access the archive may require continuous availability of the data regardless of potential heavy load or system downtime. In order to address these needs, the archive was designed to operate at multiple installations, each of which is configurable to the needs of different groups of users.

\section{Archive Installations}
\begin{figure}
    \plotone{P2-8_f1.eps} 
    \caption{Archive Installations} \label{P2-8_f1}
\end{figure}

A single archive installation consists of an RDBMS server that manages databases and one or more archive servers that manage files.
The server software is configurable at runtime to operate at a specific installation.
An installation can operate with all or a subset of the archive data holdings.
A number of archive installations can operate simultaneously at the same or at remote locations. They differ by the IP address or the port of the servers.

Archive installations are currently in production at three different locations (Figure~\ref{P2-8_f1}). The \textit{SAO} and \textit{OCC} installations are at the Center for Astrophysics (CfA) and at the Chandra Operations Control Center (OCC), within the High Energy Astrophysics Division (HEAD) network in Cambridge, MA. The \textit{LEDAS} installation is at the Leicester Database and Archive Service (LEDAS), in the UK.

\section{Replication Mechanism}
The replication is one-directional. Data are entered in the archive at a primary installation and are replicated to secondary installations. The primary installation is the one nearest to the data production site.

The Sybase ASE 12.5 SQL and replication servers are used for the storage and replication of relational data. The replication server monitors the activity of the SQL server at the primary installation and repeats recorded transactions at the subscribing secondary installations.

Data files entered in the primary archive server cause entry of metadata in the primary SQL server. When a row of metadata is replicated to a secondary SQL server it triggers a call to the secondary archive server to transfer the data file from the primary archive server (Figure~\ref{P2-8_f2}). The archive server is a Sybase 12.5 Open Server application and can receive RPC calls from the SQL server. It is also a Sybase 12.5 Open Client application, which allows it to connect to the primary archive server and retrieve files.


\begin{figure}
    \epsscale{0.6}
    \plotone{P2-8_f2.eps} 
    \caption{Replication Data Flow} \label{P2-8_f2}
\end{figure}


\section{Data Location}
Data are stored at single or multiple locations according to their type.

All data are stored at their primary installation where they are ingested in the archive by data production operations. For example, the \textit{OCC} installation at the site where telemetry is received and processed, is the primary for telemetry and pipeline products; the \textit{SAO} installation at the site where the User Support group is located, is the primary for proposals and user information.

Frequently accessed data are replicated to one or more installations in order to provide better network access to users and decouple them from CXC operations. In this category belong the high-level data products which are accessed by remote clients and are replicated to \textit{SAO}. The public subset is also replicated to \textit{LEDAS}.

Mission-critical data are replicated for continuous availability, in case the primary installation fails.
Mission Planning data can be accessed by operations at both the \textit{OCC} and \textit{SAO} installations.


A list of datatypes and their location is available to clients connecting to the archive servers. The list points to two installations for each replicated datatype or a single one for non-replicated data. The installation names are parameters that are set for each client at runtime.

\section{Software Configuration}

All client, server and middle-tier components are bundled in the same archive software release.
All backend server installations need to run the same release version for components that affect
replicated data.


The software, including the backend and middle-tier servers and all their clients, is configured at runtime by a number of parameters. The server parameters determine the archive installation where the server runs. The client parameters determine the archive installations where the client connects. The configuration for all known archive installations is included with the release runtime environment. Each server or client process is automatically assigned a set of parameter values. The values are stored in environment variables and are determined by the user ID for the backend servers and by the IP address for clients. For middle-tier Java servers, the values are stored as properties when a release is installed. The same properties are also used to configure remote Java clients.

\section{Failover and Load Balancing}

Clients browse and retrieve data at their nearest available installation. If this is not available because of failure or heavy usage, they failover to a different installation, if it exists for the requested data. A client may failover at the beginning or at any time during a session and fallback to the first installation if it becomes available during the same session.

\section{Conclusion}

The \textit{OCC} and \textit{SAO} archive installations, both within the CfA subnet, have been operating successfully since the Chandra launch, serving end-users and operations. More recently, the \textit{LEDAS} installation was added to provide closer access for users in Europe. 

With the growing size of the public archive, there is interest in establishing more installations to reach users in Asia and other parts of the world. While the mirror archives fulfill all the initial requirements, they are costly to establish and operate. Furthermore, they include features like proprietary rights checking, user authentication and operations support which are not needed in a public data archive. A simplified approach for remote archive installations is currently under development. In this approach, a remote archive is a public FTP directory. Clients use the primary installation to browse the contents of the archive and to 
submit retrieval requests. The primary installation forwards the request to the FTP 
server installation nearest to the user. The first FTP archive installation already exists at the CfA. More FTP archives in Europe and Asia are planned for the near future.

\acknowledgments
This project is supported by the Chandra X-ray Center under NASA contract NAS8-39073.

%
%-----------------------------------------------------------------------
%			      References
%-----------------------------------------------------------------------
% Now comes the reference list.  Since we typed out the citations ourselves,
% the reference list is enclosed in a "references" environment.  Each
% new reference begins with a \reference command which sets up the proper
% indentation.  Typography that may be required in the reference list by
% the editorial staff must be included by the author.
%
% Observe the "standard" order for bibliographic material: author name(s),
% publication year, journal name, volume, and page number for articles.
% Some journal names are available as macros; see the package
% instructions for a listing of which ones have been "macro-ized".
%
% There is no need to engage in any other typographic manipulation.
%
% List your references below within the reference environment
% (i.e. between the \begin{references} and \end{references} tags).
% Each new reference should begin with a \reference command which sets
% up the proper indentation.  Observe the following order when listing
% bibliographical information for each reference:  author name(s),
% publication year, journal name, volume, and page number for
% articles.  Note that many journal names are available as macros; see
% the User Guide for a listing "macro-ized" journals.   
%
% Note the following are some of the tricks that can be used:
%
%   o  \& is used to format an ampersand symbol (&).
%   o  \'e and \`e puts an accent agu and accent grave, respectively,
%      over the letter e.  See the User Guide for details on
%      formatting special characters.
%   o  "\ " after a period prevents LaTeX from interpreting the period 
%      as an end of a sentence.
%   o  \aj is a macro that expands to "Astron. J."  See the User Guide
%      for a full list of journal macros
%   o  \adassviii is a macro that expands to the full title, editor,
%      and publishing information for the ADASS VIII conference
%      proceedings.  Such macros are defined for ADASS conferences I
%      through X.
%   o  When referencing a paper in the current volume, use the
%      \adassviii and \paperref macros.  The argument to \paperref is
%      the paper ID code for the paper you are referencing.  See the 
%      note in the "Paper ID Code" section above for details on how to 
%      determine the paper ID code for the paper you reference.  
%
\begin{references}

\reference Patz et al 2003, \adassxii, 249 % Year corrected, FO
\reference Estes et al 2000, \adassix , 457 % Year corrected, FO
\reference Zografou et al 1998, \adassvii, 391 % Year corrected, FO

\end{references}

% Do not place any material after the references section

\end{document}  % Leave intact
