%APN3_PROCEEDINGS_FORM%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% TEMPLATE.TEX -- APN3 (2003) ASP Conference Proceedings template.
%
% Derived from ADASS VIII (98) ASP Conference Proceedings template
% Updated by N. Manset for ADASS IX (99), F. Primini for ADASS 2000,
% D.Bohlender for ADASS 2001, and H. Payne for ADASS XII and LaTeX2e.
%
% Use this template to create your proceedings paper in LaTeX format
% by following the instructions given below.  Much of the input will
% be enclosed by braces (i.e., { }).  The percent sign, "%", denotes
% the start of a comment; text after it will be ignored by LaTeX.  
% You might also notice in some of the examples below the use of "\ "
% after a period; this prevents LaTeX from interpreting the period as
% the end of a sentence and putting extra space after it.  
% 
% You should check your paper by processing it with LaTeX.  For
% details about how to run LaTeX as well as how to print out the User
% Guide, consult the README file.  You should also consult the sample
% LaTeX papers, sample1.tex and sample2.tex, for examples of including
% figures, html links, special symbols, and other advanced features.
%
% If you do not have access to the LaTeX software or a laser printer
% at your site, you can still prepare your paper following the
% instructions in the User Guide.  In such cases, the editors will
% process the file and make any necessary editorial adjustments.
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 
\documentclass[11pt,twoside]{article}  % Leave intact
\usepackage{adassconf}

% If you have the old LaTeX 2.09, and not the current LaTeX2e, comment
% out the \documentclass and \usepackage lines above and uncomment
% the following:

%\documentstyle[11pt,twoside,adassconf]{article}

\begin{document}   % Leave intact

%-----------------------------------------------------------------------
%			    Paper ID Code
%-----------------------------------------------------------------------
% Enter the proper paper identification code.  The ID code for your
% paper is the session number associated with your presentation as
% published in the official conference proceedings.  You can           
% find this number locating your abstract in the printed proceedings
% that you received at the meeting or on-line at the conference web
% site; the ID code is the letter/number sequence proceeding the title 
% of your presentation. 
%
% This will not appear in your paper; however, it allows different
% papers in the proceedings to cross-reference each other.  Note that
% you should only have one \paperID, and it should not include a
% trailing period.
%
% EXAMPLE: \paperID{O4-1}
% EXAMPLE: \paperID{P7-7}
%

\paperID{P2-20}
%%%% ID=P2-20

%-----------------------------------------------------------------------
%		            Paper Title 
%-----------------------------------------------------------------------
% Enter the title of the paper.
%
% EXAMPLE: \title{A Breakthrough in Astronomical Software Development}
% 
% If your title is so long as to fill the page header when you print it,
% then please supply a short form as a \titlemark.
%
% EXAMPLE: 
%  \title{Rapid Development for Distributed Computing, with Implications
%         for the Virtual Observatory}
%  \titlemark{Rapid Development for Distributed Computing}
%
\title{Publishing Links to Astronomical Data On-line}
%\titlemark{ }

%-----------------------------------------------------------------------
%		          Authors of Paper
%-----------------------------------------------------------------------
% Enter the authors followed by their affiliations.  The \author and
% \affil commands may appear multiple times as necessary (see example
% below).  List each author by giving the first name or initials first
% followed by the last name.  Authors with the same affiliations
% should grouped together. 
%
% EXAMPLE: \author{Raymond Plante, Doug Roberts, 
%                  R.\ M.\ Crutcher\altaffilmark{1}}
%          \affil{National Center for Supercomputing Applications, 
%                 University of Illinois Urbana-Champaign, Urbana, IL
%                 61801}
%          \author{Tom Troland}
%          \affil{University of Kentucky}
%
%          \altaffiltext{1}{Astronomy Department, UIUC}
%
% In this example, the first three authors, "Plante", "Roberts", and
% "Crutcher" are affiliated with "NCSA".  "Crutcher" has an alternate 
% affiliation with the "Astronomy Department".  The fourth author,
% "Troland", is affiliated with "University of Kentucky"

\author{Alberto Accomazzi, G{\"u}nther Eichhorn}
\affil{Harvard-Smithsonian Center for Astrophysics}

%-----------------------------------------------------------------------
%			 Contact Information
%-----------------------------------------------------------------------
% This information will not appear in the paper but will be used by
% the editors in case you need to be contacted concerning your
% submission.  Enter your name as the contact along with your email
% address.
% 
% EXAMPLE:  \contact{Dennis Crabtree}
%           \email{crabtree@cfht.hawaii.edu}
%

\contact{Alberto Accomazzi}
\email{aaccomazzi@cfa.harvard.edu}

%-----------------------------------------------------------------------
%		      Author Index Specification
%-----------------------------------------------------------------------
% Specify how each author name should appear in the author index.  The 
% \paindex{ } should be used to indicate the primary author, and the
% \aindex for all other co-authors.  You MUST use the following
% syntax: 
%
% SYNTAX:  \aindex{Lastname, F. M.}
% 
% where F is the first initial and M is the second initial (if
% used).  This guarantees that authors that appear in multiple papers
% will appear only once in the author index.  
%
% EXAMPLE: \paindex{Crabtree, D.}
%          \aindex{Manset, N.}        
%          \aindex{Veillet, C.}        
%
% NOTE: this information is also used to build the author list that
% appears in the table of contents.  Authors will be listed in the order
% of the \paindex and \aindex commmands.
%

\paindex{Accomazzi, A.}
\aindex{Eichhorn, G.}     % Remove this line if there is only one author

%-----------------------------------------------------------------------
%		      Author list for page header	
%-----------------------------------------------------------------------
% Please supply a list of author last names for the page header. in
% one of these formats:
%
% EXAMPLES:
% \authormark{Lastname}
% \authormark{Lastname1 \& Lastname2}
% \authormark{Lastname1, Lastname2, ... \& LastnameN}
% \authormark{Lastname et al.}
%
% Use the "et al." form in the case of seven or more authors, or if
% the preferred form is too long to fit in the header.

\authormark{Accomazzi \& Eichhorn}

%-----------------------------------------------------------------------
%			Subject Index keywords
%-----------------------------------------------------------------------
% Enter a comma separated list of up to 6 keywords describing your
% paper.  These will NOT be printed as part of your paper; however,
% they will be used to generate the subject index for the proceedings.
% There is no standard list; however, you can consult the indices
% for past proceedings (http://adass.org/adass/proceedings/).
%
% EXAMPLE:  \keywords{visualization, astronomy: radio, parallel
%                     computing, AIPS++, Galactic Center}
%
% In this example, the author noticed that "radio astronomy" appeared
% in the ADASS VII Index as "astronomy" being the major keyword and
% "radio" as the minor keyword.  The colon is used to introduce another
% level into the index.

\keywords{NASA ADS, data: archives, SOAP, web services}

%-----------------------------------------------------------------------
%			       Abstract
%-----------------------------------------------------------------------
% Type abstract in the space below.  Consult the User Guide and Latex
% Information file for a list of supported macros (e.g. for typesetting 
% special symbols). Do not leave a blank line between \begin{abstract} 
% and the start of your text.

\begin{abstract}          % Leave intact
We discuss the design and implementation of a scheme enabling authors
to refer and link to on-line datasets available from astronomical archives.
This will provide the readers of electronic papers with direct access to
the data discussed therein.  The software tools used to create and maintain
links from published papers to the datasets make use of Web-Services-based
technology.  The system has been designed in collaboration with the NASA
Astrophysics Data Centers, the American Astronomical Society, 
and the University of Chicago Press, and
will be maintained by the NASA Astrophysics Data System.
More information about this project is available at:
\htmladdnormallink{{\it http://vo.ads.harvard.edu/dv}}{http://vo.ads.harvard.edu/dv}.
\end{abstract}

%-----------------------------------------------------------------------
%			      Main Body
%-----------------------------------------------------------------------
% Place the text for the main body of the paper here.  You should use
% the \section command to label the various sections; use of
% \subsection is optional.  Significant words in section titles should
% be capitalized.  Sections and subsections will be numbered
% automatically. 
%
% EXAMPLE:  \section{Introduction}
%           ...
%           \subsection{Our View of the World}
%           ...
%           \section{A New Approach}
%
% It is recommended that you look at the sample papers, sample1.tex
% and sample2.tex, for examples for formatting references, footnotes,
% figures, equations, html links, lists, and other special features.  

\section{Introduction}

This paper describes the
Dataset Verification and Linking efforts underway among the
NASA Archives and Data Centers,
the American Astronomical Society (AAS), and the University of Chicago Press
(UCP, publisher of ApJ, AJ and PASP). This activity has taken place
under the auspices and guidance of the NASA Astrophysics Data
Centers Executive Council (ADEC), and aims at fulfilling the promise
of further integrating the astronomical literature and the on-line data
it is based upon.
 
The NASA Astrophysics Data System (ADS) is developing the tools needed
by publishers and users at large for both dataset verification and linking
through stable, top-level services that can be maintained for the
foreseeable future. Links created to datasets from on-line manuscripts
will always refer to a dataset via a URI created using a well-defined
identifier, and the URI will be turned into one or more URLs in
real-time by a central resolver provided by the ADS.
This will provide a high level of reliability and persistence to the links,
as well as providing an upgrade path into any future Virtual
Observatory (VO) efforts in this
direction.  Dataset citation, verification and linking will work as follows:

\begin{itemize}
\item Astronomy data centers and archives will start attaching permanent
dataset identifiers to the data they distribute.

\item Astronomers will write papers referencing the dataset they have
  used in their research. As per the instructions given to them by the
  AAS, they will start using the appropriate markup to identify datasets in the
  papers they publish.

\item During the publishing pipeline, UCP will extract the identifiers
  and send a query to a central dataset identifier service (hosted by
  the ADS) to find out if (a) the dataset is valid and (b) a URL can be
  associated to it.

\item The central dataset identifier verification service will query a
  number of (relevant) datacenters using its own protocol, will cache
  the results, and will return a status flag
  indicating if a dataset is known or not.

\item For the dataset identifiers that are known, URLs can be built by
  using the base URL of a dataset identifier resolver and the dataset
  identifier itself, e.g.
  \htmladdnormallink{{\it http://vo.ads.harvard.edu/dv/DataResolver.cgi?ADS/Sa.CXO\#15.}}{http://vo.ads.harvard.edu/dv/DataResolver.cgi?ADS/Sa.CXO\#15}
  If the verification is successful, UCP will include such a URL in its
  on-line article.

\item When the article goes on-line, a user clicking on the link
  associated with the dataset will be taken initially to the URL
  above. What happens next
  depends on whether the ADS has one or more datacenters claiming to have
  data relative to this dataset (there could even be different mirror
  sites for a given data center).
  If only one final URL is available for the dataset in question, the
  resolver will simply forward the user to it. If more than a single
  URL is available, a simple menu listing all the
  information we have about the available links will be displayed.

\end{itemize}

ADS will take the responsibility of maintaining services that are
aware of all relevant datacenters that may have datasets available
on-line, and datacenters profiles indicating which datasets are
available from each of them.

\section{Dataset Identifiers}
 
In order to allow easy integration of this effort in the emerging VO
framework, the ADEC has decided to adopt a syntax for the dataset
identifiers which is consistent with the current International Virtual
Observatory Alliance (IVOA) Dataset
Identifier draft (Plante et al 2003). This adoption will facilitate
integration of these identifiers and the tools that manipulate them in
the VO.

\subsection{IVOA Identifiers}

According to the IVOA
Identifiers Draft, the general URI format for an individual identifier
is a string of the kind:
{\it ivo://AuthorityId/ResourceKey\#PrivateId}.
While we refer the reader to the draft for a full explanation of the
syntax, a few things are worth pointing out:


\begin{itemize}
\item Use of the {\it ivo://} scheme denotes the fact that the rest of
  the identifier should be interpreted as a string abiding by the IVOA
  Identifiers specification, and that the identifier and the resource
  it refers to have been registered with an IVOA-compliant registry.

\item {\it AuthorityId} is a naming authority registered within the
  IVOA community; the use of this string within the identifier
  establishes a namespace within which the rest of the identifier can
  be considered unique. In general, the {\it AuthorityId} does not need to
  correspond to a specific institution but rather to an entity that
  has been granted use of the namespace.

\item {\it ResourceKey} is a name for a resource that is unique within
  the namespace estabilished by the {\it AuthorityId}. In general it will
  correspond to a unique resource made available to the VO by or on
  behalf of the {\it AuthorityId}. A typical example of a {\it ResourceKey} in
  this context is a data collection generated by a particular project
  or mission.

\item {\it PrivateId} represents a unique string within the
  {\it ResourceKey} and it denotes a particular dataset belonging to the
  collection. 

\end{itemize}

\subsection{Using Dataset Identifiers in the Literature}

Given the fact that much of the VO infrastructure is still under
design and development, the ADEC has decided on a specific
recommendation for referring to dataset identifiers in the
astronomical literature. The general form of these identifiers is:
{\it ADS/FacilityId\#PrivateId}.
Comparing these identifiers with the general IVOA syntax we can make
the following observations:

\begin{itemize}
\item No protocol scheme has been specified. This is due to the fact
  that until IVOA-compliant registries are available, and {\it AuthorityId}s
  can be established by them, it would be incorrect to claim that
  these identifiers are in fact IVOA compliant. However, it is to be
  expected that these identifiers can be resolved as IVOA identifiers
  in the not too distant future by a simple syntactic operation.

\item The {\it AuthorityId} string ``ADS" has been specified. This
  simply recognizes the current role of the ADS in managing the namespace
  used for these identifiers, in the absence of a community-wide
  namespace granting authority. It does not suggest nor imply that the
  ADS  controls or manages the dataset itself.

\item The {\it ResourceKey} token will be interpreted as a Facility. An
  ever-growing list of facilities is maintained by the ADS. Data centers
  should contact the ADS should they need to register new entries.

\item The {\it PrivateId} string can be anything that the data center
  desires, with the provision that the identifier string as a whole
  should abide by the general syntax of a URI, as required by the IVOA
  identifiers specification. 

\end{itemize}

\section{Generating Dataset Identifiers}

All Data Centers and Archives which provide public access to their
data should structure their databases and interfaces so that when a
particular dataset is released to the public, it is uniquely tagged by
an identifier ID created as discussed above. Users who download 
such a dataset should be made aware of the identifier associated with
it and how it should be referenced in the published literature.
In order for a datacenter to ensure that the identifiers it is
generating comply with the syntax endorsed by the ADEC, the following
must occur:

\begin{enumerate}

\item The identifier is in the form {\it ADS/FacilityId\#PrivateID}

\item The {\it FacilityId} has been registered with the ADS and is listed
  in the table of known facilities

\item The {\it PrivateId} is a unique identifier within the {\it
  FacilityId}, and its association with the dataset will not change.

\item A profile for the datacenter has been registered with the ADS,
  and {\it FacilityId} has been listed as one of the resources
  that the center has data for.

\item The datacenter provides a dataset verification service which will be used to verify the validity and location of identifiers published in the literature. 
\end{enumerate}

Once a datacenter has published a dataset ID, it should provide access
to it. This should be a human-readable page on its web server
displaying the dataset's relevant metadata and offers the user the
option to download the dataset itself in some form or fashion. It is
left up to the datacenter to decide what to do if and when a revised
version of a particular dataset is published. In general, however, it
is understood that access to the latest revision of a dataset should
be an option if not the default.


\section{Providing Data Verification Capabilities}

In order to promote an open framework that can be used for the
distributed verification of dataset identifiers across data centers,
the ADEC ITWG (Interoperability Technical Working Group) has created
the specification for a SOAP-based web service. The corresponding WSDL
file can be used to generate client and server interfaces to the
service. Each datacenter providing data verification services should
provide and maintain a service that abides by this specification.

In order for the ADS to coordinate the verification and linking of dataset
identifiers to the appropriate datacenters, it is necessary for
the datacenters to provide some basic metadata about its data holdings
and services. While it is expected that the appropriate metadata
will one day be made available by a public VO registry, its format
and access methods are at this time not available.
As an intermediate solution to the problem, we require that the
data centers maintain a simple profile which will provide the ADS with
the necessary metadata to maintain a central verification service that
fans out queries to the appropriate datacenters (during the
verification phase) and links to the individual datasets (during the
link resolution phase).

The data center profile is a simple XML document that lists the data center
name and description, the name and email address of the person responsible
for the maintenance of the profile, the URL of the web service to be used
for dataset verification, and the list of facilities that the datacenter
has data for.  The central verifier service will only attempt to
verify and link a dataset identifier with a datacenter if its profile
indicates that the datacenter archives the appropriate data collection.

To facilitate the deployment of verification services, the ADS also
developed a PERL toolkit that greatly simplifies the creation of a
compliant web service. Among other things, by defining a few variables
and installing a simple CGI script based on this toolkit a system manager
will be able to automatically define his/her site's profile described
above.  For more information, please see the project's description available at
\htmladdnormallink{{\it http://vo.ads.harvard.edu/dv.}}{http://vo.ads.harvard.edu/dv}

\acknowledgments
The NASA Astrophysics Data System is funded by NASA Grant NCC5-189.

%-----------------------------------------------------------------------
%			      References
%-----------------------------------------------------------------------
% List your references below within the reference environment
% (i.e. between the \begin{references} and \end{references} tags).
% Each new reference should begin with a \reference command which sets
% up the proper indentation.  Observe the following order when listing
% bibliographical information for each reference:  author name(s),
% publication year, journal name, volume, and page number for
% articles.  Note that many journal names are available as macros; see
% the User Guide listing "macro-ized" journals.   
%
% EXAMPLE:  \reference Hagiwara, K., \& Zeppenfeld, D.\  1986, 
%                Nucl.Phys., 274, 1
%           \reference H\'enon, M.\  1961, Ann.d'Ap., 24, 369
%           \reference King, I.\ R.\  1966, \aj, 71, 276
%           \reference King, I.\ R.\  1975, in Dynamics of Stellar 
%                Systems, ed.\ A.\ Hayli (Dordrecht: Reidel), 99
%           \reference Tody, D.\  1998, \adassvii, 146
%           \reference Zacharias, N.\ \& Zacharias, M.\ 2003,
%                \adassxii, \paperref{P7.6}
% 
% Note the following tricks used in the example above:
%
%   o  \& is used to format an ampersand symbol (&).
%   o  \'e puts an accent agu over the letter e.  See the User Guide
%      and the sample files for details on formatting special
%      characters.  
%   o  "\ " after a period prevents LaTeX from interpreting the period 
%      as an end of a sentence.
%   o  \aj is a macro that expands to "Astron. J."  See the User Guide
%      for a full list of journal macros
%   o  \adassvii is a macro that expands to the full title, editor,
%      and publishing information for the ADASS VII conference
%      proceedings.  Such macros are defined for ADASS conferences I
%      through XI.
%   o  When referencing a paper in the current volume, use the
%      \adassxii and \paperref macros.  The argument to \paperref is
%      the paper ID code for the paper you are referencing.  See the 
%      note in the "Paper ID Code" section above for details on how to 
%      determine the paper ID code for the paper you reference.  
%
\begin{references}

\reference Plante R. et al. 2003, IVOA Identifiers Working Draft v.0.2
(30 September 2003),
\htmladdnormallink{{\it http://www.ivoa.net/Documents/WD/Identifiers/WD-IDs.html}}{http://www.ivoa.net/Documents/WD/Identifiers/WD-IDs.html}

\end{references}

% Do not place any material after the references section

\end{document}  % Leave intact
