%ADASS_PROCEEDINGS_FORM%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% SAMPLE2.TEX -- ADASS XII (2002) ASP Conference Proceedings sample
% paper with complicated markup. Based on ADASS XI (01) version.
%
% This is a comprehensive example, meaning that we have made use of each
% of the capabilities of the LaTeX + the ASPCONF macro package that we think
% you may need to use.  If you want to see a "base-bones" sample paper,
% take a look at sample1.tex.
%
% Much of the input will be enclosed by braces (i.e., { }).  The
% percent sign, "%", denotes the start of a comment; text after it
% will be ignored by LaTeX.  You might also notice in some of the
% examples below the use of "\ " after a period; this prevents LaTeX
% from interpreting the period as the end of a sentence and putting
% extra space after it.   
% 
% You should check your paper by processing it with LaTeX.  For
% details about how to run LaTeX as well as how to print out the User
% Guide, consult the README file.  
%
% If you do not have access to the LaTeX software or a laser printer
% at your site, you can still prepare your paper following the
% instructions in the User Guide.  In such cases, the editors will
% process the file and make any necessary editorial adjustments.
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 
\documentclass[11pt,twoside]{article}  % Leave intact
\usepackage{adassconf}
%\def\plotone#1{Image: $#1$}

% If you have the old LaTeX 2.09, and not the current LaTeX2e, comment
% out the \documentclass and \usepackage lines above and uncomment
% the following:

%\documentstyle[11pt,twoside,adassconf]{article}

\begin{document}   % Leave intact

%-----------------------------------------------------------------------
%			    Paper ID Code
%-----------------------------------------------------------------------
% Enter the proper paper identification code.  The ID code for your
% paper is the session number associated with your presentation as
% published in the official conference proceedings.  You can
% find this number locating your abstract in the printed proceedings
% that you received at the meeting or on-line at the conference web
% site; the ID code is the letter/number sequence proceeding the title
% of your presentation.
%
% This will not appear in your paper; however, it allows different
% papers in the proceedings to cross-reference each other.  Note that
% you should only have one \paperID, and it should not include a
% trailing period.
%

\paperID{P1-13}
%%%% ID=P1-13

%-----------------------------------------------------------------------
%		            Paper Title 
%-----------------------------------------------------------------------
% Enter the title of the paper.
%
% EXAMPLE: \title{A Breakthrough in Astronomical Software Development}
%
% If your title is so long as to fill the page header when you print it,
% then please supply a short form as a \titlemark.
%
% EXAMPLE:
%  \title{Rapid Development for Distributed Computing, with Implications
%         for the Virtual Observatory}
%  \titlemark{Rapid Development for Distributed Computing}
%

\title{Clustering the large VizieR catalogues, the CoCat experience}
\titlemark{Large VizieR catalogues}

%-----------------------------------------------------------------------
%		          Authors of Paper
%-----------------------------------------------------------------------
% Enter the authors followed by their affiliations.  The \author and
% \affil commands may appear multiple times as necessary.  List each
% author by giving the first name or initials first followed by the
% last name.  Authors with the same affiliations should grouped
% together. 
%
% Try to limit the front matter to no more than three \author
% commands.  Group authors with the same affiliations.  Too many
% \author commands fills the first page of the paper with little
% actual text.

\author{Fran\c cois Ochsenbein, S\'ebastien Derriere, 
   S\'ebastien Nicaisse, Andr\'e Schaaff}
\affil{Centre de Donn\'ees astronomiques de Strasbourg (CDS),
   Observatoire de Strasbourg, UMR 7550,
   11 rue de l'Universit\'e, 67000 Strasbourg, France
}

%\author{C.\ D.\ Biemesderfer\altaffilmark{3}}
%\affil{National Optical Astronomy Observatories, Tucson, AZ 85719}

% Notice that some of these authors have alternate affiliations, which
% are identified by the \altaffilmark after each name.  The actual alternate
% affiliation information is typeset in footnotes at the bottom of the
% first page, and the text itself is specified in \altaffiltext commands.
% There is a separate \altaffiltext for each alternate affiliation
% indicated above.

%\altaffiltext{1}{Visiting Astronomer, Cerro Tololo Inter-American Observatory. 
%CTIO is operated by AURA, Inc.\ under cooperative agreement with the National
%Science Foundation} 
%\altaffiltext{2}{Society of Fellows, Harvard University} 
%\altaffiltext{3}{Patron, Alonso's Bar and Grill}

%-----------------------------------------------------------------------
%			 Contact Information
%-----------------------------------------------------------------------
% This information will not appear in the paper but will be used by
% the editors in case you need to be contacted concerning your
% submission.  Enter your name as the contact along with your email
% address.

\contact{Fran\c cois Ochsenbein}
\email{francois@astro.u-strasbg.fr}

%-----------------------------------------------------------------------
%		      Author Index Specification
%-----------------------------------------------------------------------
% Specify how each author name should appear in the author index.  The 
% \paindex{ } should be used to indicate the primary author, and the
% \aindex for all other co-authors.  You MUST use the following
% syntax: 
%
% SYNTAX:  \aindex{LASTNAME, F. M.}
% 
% where F is the first initial and M is the second initial (if
% used).  This guarantees that authors that appear in multiple papers
% will appear only once in the author index.  

\paindex{Ochsenbein, F.}
\aindex{Schaaff, A.}
\aindex{Derriere, S.}

%-----------------------------------------------------------------------
%                     Author list for page header
%-----------------------------------------------------------------------
% Please supply a list of author last names for the page header. in
% one of these formats:
%
% EXAMPLES:
% \authormark{LASTNAME}
% \authormark{LASTNAME1 \& LASTNAME2}
% \authormark{LASTNAME1, LASTNAME2, ... \& LASTNAMEn}
% \authormark{LASTNAME et al.}
%
% Use the "et al." form in the case of seven or more authors, or if
% the preferred form is too long to fit in the header.

\authormark{Ochsenbein, Schaaff, Nicaisse, \& Derriere }

%-----------------------------------------------------------------------
%			Subject Index keywords
%-----------------------------------------------------------------------
% Enter up to 6 keywords describing your paper.  These will NOT be
% printed as part of your paper; however, they will be used to
% generate the subject index for the proceedings.  There is no
% standard list; however, you can consult the indices for past ADASS
% proceedings (http://iraf.noao.edu/ADASS/adass.html). 

\keywords{astronomy: large catalogues}

%-----------------------------------------------------------------------
%			       Abstract
%-----------------------------------------------------------------------
% Type abstract in the space below.  Consult the User Guide and Latex
% Information file for a list of supported macros (e.g. for typesetting 
% special symbols). Do not leave a blank line between \begin{abstract} 
% and the start of your text.

\begin{abstract}          % Leave intact
VizieR is a database containing about 4000 astronomical catalogues with
homogeneous descriptions. The major part of the catalogues is stored in a 
relational database but the large catalogues containing over 10 millions rows are 
stored as compressed binary files and have dedicated query
programs for very fast 
access by celestial coordinates.
%; this method proved to be much more efficient 
%both in terms of speed and disk usage than storing the huge tables in 
%relational databases. 
The CoCat (Co-processor Catalogue) project main goal is 
to parallelize the VizieR large catalogue treatments (data extraction, 
cross-matching) for reducing the response time.
\end{abstract}

%-----------------------------------------------------------------------
%			      Main Body
%-----------------------------------------------------------------------
% Place the text for the main body of the paper here.  You should use
% the \section command to label the various sections; use of
% \subsection is optional.  Significant words in section titles should
% be capitalized.  Sections and subsections will be numbered
% automatically. 

\section{Introduction}
The \htmladdnormallink{VizieR catalogue service}{http://vizier.u-strasbg.fr/} 
   (Ochsenbein et al., 2000)
is currently implemented on a Sun 4-processor server. 
In the recent years the competitivity of PCs dramatically increased,
with very high performances and ever decreasing costs,
%in terms of purchase but also of maintenance.
and in many circumstances, clusters
of Linux PCs are replacing the big standalone servers. 
In the VizieR case, the current load is high
and it was urgent to choose between a complete replacement or an
additional server.

\section{Organisation of the Catalogues}

VizieR catalogues are divided into two categories: {\em standard} and 
{\em large} catalogues, where  
large catalogues are defined, somewhat arbitrarily, as
having more than $10^7$ rows. Catalogues with up to a
few million records are managed by a standard relational DBMS, while 
each of the larger catalogs has a dedicated
query program which retrieves the records corresponding
to a some circular or rectangular region around a position in the sky.
Some details about the methods used to store the large catalogues
and their performances, in terms of speed and disk usage,
are given in Derriere et al. (2000);
the current list of these large catalogues is given in Fig.\ref{P1_13:fig2}.
It should be noted that both ``standard'' and ``large'' catalogues
share the same metadata descriptions --- the VizieR interface simply
translates the user's requests either into SQL queries, or into
some customized set of parameters interpretated by the dedicated query program.

\begin{figure}[h]
%\epsscale{.80}
%\plotone{P1-13_fig2.eps}
\begin{center}\small
\begin{tabular}{cc@{}rp{21em}r@{}c}
%\multicolumn{4}{c}{The following catalogues are stored on the cluster:}\\
\hline
       Acronym && Rows &  Title of the Catalogue 	& Size&\\ 
               && \multicolumn{1}{c}{$\times10^6$} & & Gbytes &\\ \hline
GSC-1.1 &$^\dag$&  25 & 
	The HST Guide Star Catalog, Version 1.1 (Lasker+ 1992)&0.3&\\
GSC-1.2 & &  25 & 
      	The HST Guide Star Catalog, Version 1.2 (Lasker+ 1996)&0.3&\\
GSC-ACT & &  25 & 
      	The HST Guide Star Catalog, Version GSC-ACT (Lasker+ 1996-99)&0.3&\\
USNO-A1.0&$^\dag$& 488 & 
	The PMM USNO-A1.0 Catalogue (Monet 1997)&3.3&\\
USNO-A2.0& & 526 & 
      	The USNO-A2.0 Catalogue (Monet+ 1998)&3.5&\\
USNO-B1.0& &1046 & 
      	The USNO-B1.0 Catalog (Monet+ 2003)&39.4&\\
GSC2.2   & & 456 & 
      	The Guide Star Catalog, Version 2.2 (STScI, 2001)&43.3&$^\ast$\\
APM-North& & 166 & 
      	The APM-North Catalogue (McMahon+, 2000)&10.1&\\
UCAC1    &$^\dag$&  27 & 
	The UCAC1 Catalogue (Zacharias+ 2000)&0.4&\\
UCAC2    & &  48 & 
      	The UCAC2 Catalogue (Zacharias+ 2003)&1.6&\\
2MASSIpsc &$^\dag$& 162 & 
	The 2MASS Catalog Intermediate Data Release (IPAC/UMass, 2000)&12.1&\\
2MASS-PSC & & 741 & 
      	The 2MASS All-Sky Catalog of Point Sources (Cutri+ 2003)&40.8&\\
DENIS-P &$^\dag$&  17 & 
	The DENIS database first release (Epchtein+, 1999)	&3.4&\\
DENIS-2 & & 195 & 
      	The DENIS database (DENIS Consortium, 2003)	&14.2&\\
\hline
\multicolumn{6}{l}{ $^\dag$ \quad obsolete version of the catalog}\\
\multicolumn{6}{l}{ $^\ast$ \quad no attempt was made to compress the GSC2.2 
   catalog}\\
\hline
\end{tabular}
\caption{The large catalogues in the cluster 
  {\em(version October 2003)}} \label{P1_13:fig2}
\end{center}
\end{figure}

\section{Which architecture?}

\begin{figure}[h]
\epsscale{.75}
\plotone{P1-13_fig1.eps}
\begin{center}
\caption{The CoCat cluster.} \label{P1_13:fig1}
\end{center}
\end{figure}

As the Sun server is becoming overloaded we decided to 
%keep it Sun server for normal catalogues request and to 
move the set of large catalogues to a Linux cluster 
(the {\em CoCat} cluster).
It then becomes easy to increase the computing power or 
%add a machine or to increase 
the storage capability at a very low cost;  %without spending too much money. 
it represents also a flexible solution for the future evolutions. 

A wide range of free or commercial clustering tools is available.
We started with %Our project is based on 
a new free clustering tool package, 
\htmladdnormallinkfoot{CLIC}{http://clic.mandrakesoft.com/} (Cluster LInux
pour le Calcul) which makes use of the MPI library 
(Message Passing Interface)
and is based on the Mandrake Linux 9.0 distribution. 
The CoCat
cluster involves one master node and five slave nodes (Fig~\ref{P1_13:fig1}).


\section{The Dispatcher}

Tools like MPI are designed to run parallelized CPU-intensive tasks 
on a cluster, but in
the CoCat case it is necessary to dispatch a large number of 
queries (typically $10^5$--$10^6$ daily requests) and their results. 
%CoCat project is not only a massive computing project but it is also 
%designed for data access. 
The large catalogues being stored in a compact form, it was possible
in a first step to replicate the data (about 200Gbytes) on each node.
With the increasing number of increasingly larger catalogues
it will be necessary in the near future to distribute the data
over several nodes, and it will become mandatory to describe
on which engines which part of which catalogue can be accessed:
this role is devoted to the Dispatcher, running on the master
node, and illustrated in Fig.~\ref{P1_13:fig3}.
%configuration files for all nodes and to implement a real dispatching engine
%able to follow strategies depending of the cluster context (data distribution,
%technical problems, etc.). The Dispatcher is the CoCat architecture head and is
%running on the master node.

\begin{figure}
\epsscale{.80}
\plotone{P1-13_fig3.eps}
\begin{center}
\caption{CoCat global architecture} \label{P1_13:fig3}
\end{center}
\end{figure}


%\section{On-going works}

%\clearpage
\section{The first tests}
The first tests showed that the performances are not as high
as expected: the overhead of the MPI library is large compared 
to the time required by the actual execution of the
requests initiated by the Dispatcher.
The CLIC package, while easing up the installation of the system
and the applications on the cluster nodes, requires
an identical hardware configuration of each node: this introduces
a severe lack of flexibility in the management and the evolution of the cluster.

We are currently testing new configurations for 
a more performant Dispatcher, where each node is considered
as an independent resource and where the Dispatcher assigns the tasks
according to its knowledge of the current load on each node. 
Such a method seems to work well in the current situation where
all catalogues are present on each node, but in a
near future we will have to 
%manage very large catalogues like SuperCosmos and we have to 
take some important decisions about:
\begin{itemize}
\item	which strategy to adopt about splitting
	the very large catalogues and how to distribute catalogue subsets
	on the different cluster nodes
\item	whether it would be useful to dedicate one or several nodes 
	to specific tasks (e.g. cross-matching)
\item	whether it would still be useful to implement a parallel processing
	(e.g. for cross-matching large catalogues) in the dispatcher.
\end{itemize}

%-----------------------------------------------------------------------
%			      References
%-----------------------------------------------------------------------
% Now comes the reference list.  Since we typed out the citations ourselves,
% the reference list is enclosed in a "references" environment.  Each
% new reference begins with a \reference command which sets up the proper
% indentation.  Typography that may be required in the reference list by
% the editorial staff must be included by the author.
%
% Observe the "standard" order for bibliographic material: author name(s),
% publication year, journal name, volume, and page number for articles.
% Some journal names are available as macros; see the package
% instructions for a listing of which ones have been "macro-ized".
%
% There is no need to engage in any other typographic manipulation.
%
% List your references below within the reference environment
% (i.e. between the \begin{references} and \end{references} tags).
% Each new reference should begin with a \reference command which sets
% up the proper indentation.  Observe the following order when listing
% bibliographical information for each reference:  author name(s),
% publication year, journal name, volume, and page number for
% articles.  Note that many journal names are available as macros; see
% the User Guide for a listing "macro-ized" journals.   
%
% Note the following are some of the tricks that can be used:
%
%   o  \& is used to format an ampersand symbol (&).
%   o  \'e and \`e puts an accent agu and accent grave, respectively,
%      over the letter e.  See the User Guide for details on
%      formatting special characters.
%   o  "\ " after a period prevents LaTeX from interpreting the period 
%      as an end of a sentence.
%   o  \aj is a macro that expands to "Astron. J."  See the User Guide
%      for a full list of journal macros
%   o  \adassviii is a macro that expands to the full title, editor,
%      and publishing information for the ADASS VIII conference
%      proceedings.  Such macros are defined for ADASS conferences I
%      through X.
%   o  When referencing a paper in the current volume, use the
%      \adassviii and \paperref macros.  The argument to \paperref is
%      the paper ID code for the paper you are referencing.  See the 
%      note in the "Paper ID Code" section above for details on how to 
%      determine the paper ID code for the paper you reference.  
%
\begin{references}
\reference Derriere, S., Ochsenbein, F., \& Egret, D. 
    2000, \adassix, 235
\reference Ochsenbein, F., Bauer, P., \& Marcout, J. 2000, \aaps, 143, 23 
\end{references}

% Do not place any material after the references section

\end{document}  % Leave intact
