%%% Modif FO
%APN3_PROCEEDINGS_FORM%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% TEMPLATE.TEX -- APN3 (2003) ASP Conference Proceedings template.
%
% Derived from ADASS VIII (98) ASP Conference Proceedings template
% Updated by N. Manset for ADASS IX (99), F. Primini for ADASS 2000,
% D.Bohlender for ADASS 2001, and H. Payne for ADASS XII and LaTeX2e.
%
% Use this template to create your proceedings paper in LaTeX format
% by following the instructions given below.  Much of the input will
% be enclosed by braces (i.e., { }).  The percent sign, "%", denotes
% the start of a comment; text after it will be ignored by LaTeX.
% You might also notice in some of the examples below the use of "\ "
% after a period; this prevents LaTeX from interpreting the period as
% the end of a sentence and putting extra space after it.
%
% You should check your paper by processing it with LaTeX.  For
% details about how to run LaTeX as well as how to print out the User
% Guide, consult the README file.  You should also consult the sample
% LaTeX papers, sample1.tex and sample2.tex, for examples of including
% figures, html links, special symbols, and other advanced features.
%
% If you do not have access to the LaTeX software or a laser printer
% at your site, you can still prepare your paper following the
% instructions in the User Guide.  In such cases, the editors will
% process the file and make any necessary editorial adjustments.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
\documentclass[11pt,twoside]{article}  % Leave intact
\usepackage{adassconf}

% If you have the old LaTeX 2.09, and not the current LaTeX2e, comment
% out the \documentclass and \usepackage lines above and uncomment
% the following:

%\documentstyle[11pt,twoside,adassconf]{article}

\begin{document}   % Leave intact

%-----------------------------------------------------------------------
%               Paper ID Code
%-----------------------------------------------------------------------
% Enter the proper paper identification code.  The ID code for your
% paper is the session number associated with your presentation as
% published in the official conference proceedings.  You can
% find this number locating your abstract in the printed proceedings
% that you received at the meeting or on-line at the conference web
% site; the ID code is the letter/number sequence proceeding the title
% of your presentation.
%
% This will not appear in your paper; however, it allows different
% papers in the proceedings to cross-reference each other.  Note that
% you should only have one \paperID, and it should not include a
% trailing period.
%
% EXAMPLE: \paperID{O4-1}
% EXAMPLE: \paperID{P7-7}
%

\paperID{P2-26}
%%%% ID=P2-26

%-----------------------------------------------------------------------
%                   Paper Title
%-----------------------------------------------------------------------
% Enter the title of the paper.
%
% EXAMPLE: \title{A Breakthrough in Astronomical Software Development}
%
% If your title is so long as to fill the page header when you print it,
% then please supply a short form as a \titlemark.
%
% EXAMPLE:
%  \title{Rapid Development for Distributed Computing, with Implications
%         for the Virtual Observatory}
%  \titlemark{Rapid Development for Distributed Computing}
%
\hyphenation{Sky-Node Sky-Nodes}
\title{Build Your Own SkyNode!}
%\titlemark{ }

%-----------------------------------------------------------------------
%                 Authors of Paper
%-----------------------------------------------------------------------
% Enter the authors followed by their affiliations.  The \author and
% \affil commands may appear multiple times as necessary (see example
% below).  List each author by giving the first name or initials first
% followed by the last name.  Authors with the same affiliations
% should grouped together.
%
% EXAMPLE: \author{Raymond Plante, Doug Roberts,
%                  R.\ M.\ Crutcher\altaffilmark{1}}
%          \affil{National Center for Supercomputing Applications,
%                 University of Illinois Urbana-Champaign, Urbana, IL
%                 61801}
%          \author{Tom Troland}
%          \affil{University of Kentucky}
%
%          \altaffiltext{1}{Astronomy Department, UIUC}
%
% In this example, the first three authors, "Plante", "Roberts", and
% "Crutcher" are affiliated with "NCSA".  "Crutcher" has an alternate
% affiliation with the "Astronomy Department".  The fourth author,
% "Troland", is affiliated with "University of Kentucky"

\author{Norbert Purger\altaffilmark{1},
Tam\'{a}s Budav\'{a}ri\altaffilmark{2}, Alexander S.
Szalay\altaffilmark{2}, Ani Thakar\altaffilmark{2} and Istv\'{a}n
Csabai\altaffilmark{1}}

\iffalse	%% Use a better style, FO
\altaffiltext{1}{Dept. of Physics of Complex Systems,
E\"{o}tv\"{o}s Lor\'{a}nd University, H-1117 Budapest, Hungary}
\altaffiltext{2}{Dept. of Physics \& Astronomy, Johns Hopkins
University, Baltimore, MD 21218, USA}
\else
\vspace*{2ex}   % Corrected style FO
\altaffiltext{1}{
   Dept. of Physics of Complex Systems,
   E\"{o}tv\"{o}s Lor\'{a}nd University, H-1117 Budapest, Hungary}
\altaffiltext{2}{
   Dept. of Physics \& Astronomy, Johns Hopkins
   University, Baltimore, MD 21218, USA}
\fi

%-----------------------------------------------------------------------
%            Contact Information
%-----------------------------------------------------------------------
% This information will not appear in the paper but will be used by
% the editors in case you need to be contacted concerning your
% submission.  Enter your name as the contact along with your email
% address.
%
% EXAMPLE:  \contact{Dennis Crabtree}
%           \email{crabtree@cfht.hawaii.edu}
%

\contact{Norbert Purger}
\email{npurger@complex.elte.hu}

%-----------------------------------------------------------------------
%             Author Index Specification
%-----------------------------------------------------------------------
% Specify how each author name should appear in the author index.  The
% \paindex{ } should be used to indicate the primary author, and the
% \aindex for all other co-authors.  You MUST use the following
% syntax:
%
% SYNTAX:  \aindex{Lastname, F. M.}
%
% where F is the first initial and M is the second initial (if
% used).  This guarantees that authors that appear in multiple papers
% will appear only once in the author index.
%
% EXAMPLE: \paindex{Crabtree, D.}
%          \aindex{Manset, N.}
%          \aindex{Veillet, C.}
%
% NOTE: this information is also used to build the author list that
% appears in the table of contents.  Authors will be listed in the order
% of the \paindex and \aindex commmands.
%

\paindex{Purger, N.} 
\aindex{Budav\'ari, T.} 
\aindex{Szalay, A. S.} 
\aindex{Thakar, A.} 
\aindex{Csabai, I.}

%-----------------------------------------------------------------------
%             Author list for page header
%-----------------------------------------------------------------------
% Please supply a list of author last names for the page header. in
% one of these formats:
%
% EXAMPLES:
% \authormark{Lastname}
% \authormark{Lastname1 \& Lastname2}
% \authormark{Lastname1, Lastname2, ... \& LastnameN}
% \authormark{Lastname et al.}
%
% Use the "et al." form in the case of seven or more authors, or if
% the preferred form is too long to fit in the header.

\authormark{Purger, Budav\'{a}ri, Szalay, Thakar \& Csabai}

%-----------------------------------------------------------------------
%           Subject Index keywords
%-----------------------------------------------------------------------
% Enter a comma separated list of up to 6 keywords describing your
% paper.  These will NOT be printed as part of your paper; however,
% they will be used to generate the subject index for the proceedings.
% There is no standard list; however, you can consult the indices
% for past proceedings (http://adass.org/adass/proceedings/).
%
% EXAMPLE:  \keywords{visualization, astronomy: radio, parallel
%                     computing, AIPS++, Galactic Center}
%
% In this example, the author noticed that "radio astronomy" appeared
% in the ADASS VII Index as "astronomy" being the major keyword and
% "radio" as the minor keyword.  The colon is used to introduce another
% level into the index.

\keywords{SkyQuery, SkyNode, Virtual Observatory, databases, HTM, 2MASS}

%-----------------------------------------------------------------------
%                  Abstract
%-----------------------------------------------------------------------
% Type abstract in the space below.  Consult the User Guide and Latex
% Information file for a list of supported macros (e.g. for typesetting
% special symbols). Do not leave a blank line between \begin{abstract}
% and the start of your text.

\begin{abstract}          % Leave intact
% Place the text of your abstract here - NO BLANK LINES
SkyQuery is an excellent VO prototype application that marries Web
Services technology with emerging VO standards to enable dynamic
cross-matching queries between different VO-enabled archives. The
archive data is stored in databases that are published online as
SkyNodes.
\par
As the available data from Sky Surveys and new
digital archives rapidly multiplies every year, more than 80
percent of the data will exist outside of large data centers at
any given moment, making it very important to have dynamic
cross-identification tools like SkyQuery.
\par
Loading an entire survey like 2MASS or SDSS into a database
involves making decisions about issues like data formats and
indices for tables. We describe the process of loading such a
large amount of data into a relational DBMS (SQL Server) and
generating a sky index using the Hierarchical Triangular Mesh
(HTM), which provides a really fast way to find objects. This can
be easily done even for a large survey like the 2MASS All-Sky Data
Release (150GB uncompressed, 471M objects) in as little as 2 days
including the required computation time for HTM.
\end{abstract}

%-----------------------------------------------------------------------
%                 Main Body
%-----------------------------------------------------------------------
% Place the text for the main body of the paper here.  You should use
% the \section command to label the various sections; use of
% \subsection is optional.  Significant words in section titles should
% be capitalized.  Sections and subsections will be numbered
% automatically.
%
% EXAMPLE:  \section{Introduction}
%           ...
%           \subsection{Our View of the World}
%           ...
%           \section{A New Approach}
%
% It is recommended that you look at the sample papers, sample1.tex
% and sample2.tex, for examples for formatting references, footnotes,
% figures, equations, html links, lists, and other special features.

\section{Building a SkyNode}

\subsection{Preparing The Data}

Sky Surveys usually provide data to the public in a form of ASCII
or FITS files and their brief descriptions. Using these
descriptions we are able to understand the structure of the files
and the data types included in them. If we would like to load this
data in a database, we'll have to create a representation of the
survey data types suitable for our database system. This means a
database schema and a parser to convert the files to our format.
\par
Most of the time we don't need to deal with lot's of different
tables, it is enough to make only one or two tables for all of our
data (see an example of a more complex schema in the next
section, Fig. \ref{P2-26:2mass}). This table (PhotoObj or SpecObj) will
hold all the data we can find in the public files (but with SQL
data types) completed with an identity primary key (ObjID) column.
The best way to make this ``create table" script is to write a
description after every column as a comment. By doing this, we'll
be able to generate important metadata to our tables and functions
later on.
\par
Our next step is the file conversion. In order to load our files
into a database, the clearest way is to convert them into a
character delimited (CSV) format using the new data types and our
symbol of a NULL value. Additionally, we must include our
coordinates in a float type J2000 format as well (ra, dec),
because the HTM coordinates will be calculated from these values.
If we couldn't find any sample code bundled with the public data,
we can still start thinking about using regular expressions with
our parser.

\begin{figure} [!h]{
   \begin{center}
   \epsscale{0.85}
   \plotone{P2-26_f1.eps}
   \caption{\label{P2-26:2df} Metadata of the 2dFGRS SkyNode at SkyQuery.net}
   \end{center}}
   \end{figure}

\subsection{Creating The Database}
SkyNodes are using HTM2 coordinates to index (htmID) and locate
survey objects. The HTM2 code that will calculate an htmID in the
database is realized by an extended stored procedure, which must
be installed on the server. Our next step is to create the
database that will hold all of our survey-related data
(130\%-150\% of the uncompressed data size). To avoid huge
transactional logs, this database should use Simple or Bulk-logged
recovery model. By now we are ready to create the structure of our
new SkyNode by executing our ``create table" script and the ones
that are required by every SkyNode (e.g. HTM2, neighbor and
matching functions).
\par
At this point we have a fully functional database without a row of
data. To fill it up, we can use ``bulk load" with the converted CSV
files, or advanced (graphical) tools (e.g. Data Transformation
Services in SQL Server). After this, the HTM coordinates will be
calculated and loaded into a separate table by using a function of
the installed extended stored procedure.
\par
To speed up the searches in our node, we should create additional
indices and define relationships between our tables (e.g. between
PhotoObj and HTM tables using ObjID)

\subsection{Publishing The Node}
If we would like to work with data from several different surveys,
it is really necessary to have some kind of description about the
actual data we are using. If we write comments to every new object
(tables, columns, functions) in the SQL scripts, we are able to
generate and load this important metadata into our database (Fig.
\ref{P2-26:2df}).

\par
In order to make a SkyNode, our last two steps are creating a user
to access the data and set up the web service using a modified
configuration file. Our user must have permissions to SELECT from
the tables, and EXEC the stored procedures. The SkyNode
configuration file must include the user name, password and a few
lines of description (name, location, ..).

\begin{figure} [!h]{
   \begin{center}
   \epsscale{1.0}
   \plotone{P2-26_f2.eps}
   \caption{\label{P2-26:2mass} Database schema for the 2MASS All-Sky Data Release}
   \end{center}}
   \end{figure}

\section{The 2MASS SkyNode}

    With it's more than 471 million objects the 2MASS All-Sky Data Release is a bit more challenging to handle.
    The Data Release includes 3 different type of ``catalog" (Point Source Catalog, Extended Source Catalog, ScanInfo),
    which makes the database schema more complex (Fig. \ref{P2-26:2mass}). Even if we have a more complex
    database, the biggest problem is still the time required to
    load the data and calculate the coordinate based indices.
    Using the new HTM2 code to create the htmID indices came up to be a surprisingly fast solution.
    After about 36 hours of parallel loading and computing time on 2 dual Xeon  processors,
    we had a 2MASS database in the SQL Server.

\section{New SkyNodes at SkyQuery.net}

    New, final data releases from great surveys made a good choice to extend the list of SkyNodes this year.
    After about one month of work, the following new nodes were set
    up at
    \htmladdnormallink{SkyQuery.net}{http://www.skyquery.net}:
    \begin{itemize}
    \item FIRST (2003 April 11 release)
    \item IRAS (point source catalog)
    \item 2dFGRS (APM Source and Spectra data )
    \item 2QZ (full catalog)
    \item NVSS (full catalog)
    \item PSCz (full catalog)
    \item 2MASS (All-Sky Data R., Point and Extended Source Catalog, ScanInfo)
    \end{itemize}

    \par
    To find more information about SkyNodes and SkyQuery, or to download
    prepared SQL scripts visit the following pages: \\
    \\
\makeURL{http://www.skyquery.net} \\
\makeURL{http://skyserver.elte.hu/skynode}

\acknowledgments
This work was supported by the National Science
Foundation's Information Technology Research Program under
Cooperative Agreement AST0122449 with The Johns Hopkins University
and by the Hungarian Scientific Research Fund OTKA T037548.


%-----------------------------------------------------------------------
%                 References
%-----------------------------------------------------------------------
% List your references below within the reference environment
% (i.e. between the \begin{references} and \end{references} tags).
% Each new reference should begin with a \reference command which sets
% up the proper indentation.  Observe the following order when listing
% bibliographical information for each reference:  author name(s),
% publication year, journal name, volume, and page number for
% articles.  Note that many journal names are available as macros; see
% the User Guide listing "macro-ized" journals.
%
% EXAMPLE:  \reference Hagiwara, K., \& Zeppenfeld, D.\  1986,
%                Nucl.Phys., 274, 1
%           \reference H\'enon, M.\  1961, Ann.d'Ap., 24, 369
%           \reference King, I.\ R.\  1966, \aj, 71, 276
%           \reference King, I.\ R.\  1975, in Dynamics of Stellar
%                Systems, ed.\ A.\ Hayli (Dordrecht: Reidel), 99
%           \reference Tody, D.\  1998, \adassvii, 146
%           \reference Zacharias, N.\ \& Zacharias, M.\ 2003,
%                \adassxii, \paperref{P7.6}
%
% Note the following tricks used in the example above:
%
%   o  \& is used to format an ampersand symbol (&).
%   o  \'e puts an accent agu over the letter e.  See the User Guide
%      and the sample files for details on formatting special
%      characters.
%   o  "\ " after a period prevents LaTeX from interpreting the period
%      as an end of a sentence.
%   o  \aj is a macro that expands to "Astron. J."  See the User Guide
%      for a full list of journal macros
%   o  \adassvii is a macro that expands to the full title, editor,
%      and publishing information for the ADASS VII conference
%      proceedings.  Such macros are defined for ADASS conferences I
%      through XI.
%   o  When referencing a paper in the current volume, use the
%      \adassxii and \paperref macros.  The argument to \paperref is
%      the paper ID code for the paper you are referencing.  See the
%      note in the "Paper ID Code" section above for details on how to
%      determine the paper ID code for the paper you reference.
%
\begin{references}

\reference Budav\'{a}ri, T., et al. \ 2003, \adassxii, \adassref{xii:O10-1}{31}
\reference Fekete, Gy., et al. \ 2004, \adassxiii, \paperref{P3-9}
\reference Malik, T., et al. \ 2002, CIDR'03, p.17, 
   {\it 'SkyQuery: A WebService Approach to Federate Databases' } 
\reference Thakar, A., et al. \ 2004, \adassxiii, \paperref{P1-6}

\end{references}

% Do not place any material after the references section

\end{document}  % Leave intact
