%APN3_PROCEEDINGS_FORM%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% TEMPLATE.TEX -- APN3 (2003) ASP Conference Proceedings template.
%
% Derived from ADASS VIII (98) ASP Conference Proceedings template
% Updated by N. Manset for ADASS IX (99), F. Primini for ADASS 2000,
% D.Bohlender for ADASS 2001, and H. Payne for ADASS XII and LaTeX2e.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 
\documentclass[11pt,twoside]{article}  % Leave intact
\usepackage{adassconf}

% If you have the old LaTeX 2.09, and not the current LaTeX2e, comment
% out the \documentclass and \usepackage lines above and uncomment
% the following:

%\documentstyle[11pt,twoside,adassconf]{article}

\begin{document}   % Leave intact

%-----------------------------------------------------------------------
%			    Paper ID Code
%-----------------------------------------------------------------------

\paperID{O7-1}
%%%% ID=O7-1

%-----------------------------------------------------------------------
%		            Paper Title 
%-----------------------------------------------------------------------

\title{Resource Registries for the Virtual Observatory}
\titlemark{Resource Registries for the VO}

%-----------------------------------------------------------------------
%		          Authors of Paper
%-----------------------------------------------------------------------

\author{Raymond Plante}
\affil{National Center for Supercomputing Applications (NCSA), 
       University of Illinois Urbana-Champaign, Urbana, IL 61801}

\author{Gretchen Greene, Robert Hanisch}
\affil{Space Telescope Science Institute (STScI)}

\author{Thomas McGlynn}
\affil{Goddard Space Flight Center, NASA}

\author{William O'Mullane}
\affil{Johns Hopkins University (JHU)}

\author{Roy Williams}
\affil{California Institute of Technology (Caltech)}

\author{Ramon Williamson}
\affil{National Center for Supercomputing Applications}

%-----------------------------------------------------------------------
%			 Contact Information
%-----------------------------------------------------------------------

\contact{Raymond Plante}
\email{rplante@ncsa.uiuc.edu}

%-----------------------------------------------------------------------
%		      Author Index Specification
%-----------------------------------------------------------------------
% Specify how each author name should appear in the author index.  The 
% \paindex{ } should be used to indicate the primary author, and the
% \aindex for all other co-authors.  You MUST use the following
% syntax: 
%
% SYNTAX:  \aindex{Lastname, F. M.}
% 
% where F is the first initial and M is the second initial (if
% used).  This guarantees that authors that appear in multiple papers
% will appear only once in the author index.  
%
% EXAMPLE: \paindex{Crabtree, D.}
%          \aindex{Manset, N.}        
%          \aindex{Veillet, C.}        
%
% NOTE: this information is also used to build the author list that
% appears in the table of contents.  Authors will be listed in the order
% of the \paindex and \aindex commmands.
%

\paindex{Plante, R.}
\aindex{Greene, G.}
\aindex{Hanisch, R.}
\aindex{McGlynn, T.}
\aindex{OMullane@O'Mullane, W.}
\aindex{Williamson, R.}

%-----------------------------------------------------------------------
%		      Author list for page header	
%-----------------------------------------------------------------------
% Please supply a list of author last names for the page header. in
% one of these formats:
%
% EXAMPLES:
% \authormark{Lastname}
% \authormark{Lastname1 \& Lastname2}
% \authormark{Lastname1, Lastname2, ... \& LastnameN}
% \authormark{Lastname et al.}
%
% Use the "et al." form in the case of seven or more authors, or if
% the preferred form is too long to fit in the header.

\authormark{Plante, Greene, Hanisch, McGlynn, O'Mullane, \& Williamson}

%-----------------------------------------------------------------------
%			Subject Index keywords
%-----------------------------------------------------------------------
% Enter a comma separated list of up to 6 keywords describing your
% paper.  These will NOT be printed as part of your paper; however,
% they will be used to generate the subject index for the proceedings.
% There is no standard list; however, you can consult the indices
% for past proceedings (http://adass.org/adass/proceedings/).
%
% EXAMPLE:  \keywords{visualization, astronomy: radio, parallel
%                     computing, AIPS++, Galactic Center}
%
% In this example, the author noticed that "radio astronomy" appeared
% in the ADASS VII Index as "astronomy" being the major keyword and
% "radio" as the minor keyword.  The colon is used to introduce another
% level into the index.

\keywords{Virtual Observatory, registry, metadata, archives, initiative, Data Inventory Service}

%-----------------------------------------------------------------------
%			       Abstract
%-----------------------------------------------------------------------
% Type abstract in the space below.  Consult the User Guide and Latex
% Information file for a list of supported macros (e.g. for typesetting 
% special symbols). Do not leave a blank line between \begin{abstract} 
% and the start of your text.
%
% Notes:
%   -- more on adapting to changes

\begin{abstract}          % Leave intact
Data discovery will be a core utility of the Virtual Observatory (VO).
Registries that contain high-level descriptions of resources such as
archives and services are essential for making data discovery
efficient in a distributed environment.  We review a framework
architecture for VO registries currently under development within a
International Virtual Observatory Alliance (IVOA) working group.  We
present an overview of a prototype implementation of the framework
developed as part of the National Virtual Observatory (NVO) project.
We illustrate how institutions can publish descriptions of their
resources within their own registries.  Other registries specialize in
harvesting these descriptions to centralized locations where users may
search them.  We show how our prototype registry supports
the NVO's first publicly released service, a Data Inventory Service.
\end{abstract}

%-----------------------------------------------------------------------
%			      Main Body
%-----------------------------------------------------------------------
% 
% Intro: what is a resource and a registry
% selected requirements
% IVOA registry working group
% IVOA registry model
% NVO prototype
%   * to support DIS
%   * components
% Metadata
% Publishing Registries
% Harvesting Interface
% Models for registring resources
% Searchable registry
% Application to DIS
% Summary

\section{A Common, Global Approach to Resource Discovery}

Registries are an important linchpin for the Virtual Observatory (VO):
they provide the mechanism for discovering the resources available to
applications.  By resources, we primarily mean data and services, but
we can view other things as ``resources'', such as organisations,
projects, and software.  A \emph{registry} is simply a list of
resource descriptions, expressed in terms of structured metadata to
enable automated processing and searching.  

The \htmladdnormallinkfoot{International Virtual Observatory Alliance}%
{http://www.ivoa.net} has established a Registry Working Group that is
actively developing a common framework for resource registries.  This
framework must address a number of requirements.  The most important
function of the framework is to allow users to select resources likely
to pertain to a scientific question based on various characteristics:
e.g. the type of resource (catalogs, image archive, educational
resources), coverage in space, time, and frequency, and where the data
comes from and who curates it.  The framework must recognize that
resources come and go, and so it must adapt accordingly.  A
distributed system not only avoids depending on a single point of
failure, it allows for multiple views of what is available in the VO.
The framework must also help preserve the data providers' control over
their data by letting them control what gets registered, what's
included in the description, and when it gets updated.  In particular,
registries should integrate well with a provider's existing resource
management tools (e.g. GLU).  Finally, the framework must allow
extension to describe new types of resources in the future.

\begin{figure}
\plotone{O7-1_f1.eps}
\caption{A distributed model for registries.}
\end{figure}

The distributed model for registries developed by the Registry Working
Group that addresses these requirements is illustrated in Figure 1.
It features three types of registries.  The \emph{full searchable
registry} is intended for use end-user applications and contain all
resource descriptions known to the VO.  These registries are filled by
collecting descriptions from many \emph{local publishing registries}
through a process called ``harvesting.''  Publishing registries are
distinguished from searchable ones in that, as the name implies, they
do not support searching; they simply expose resource descriptions to
the searchable ones (see Williamson \& Plante 2004).  There can be
multiple full searchable registries; thus, there needs to be a
``replicating'' process to keep them synchronized.  The third type of
registry is the \emph{local searchable registry}.  These are intended
for searching by end-user applications but do not contain everything
that is known to the VO.  Rather, they might specialize in a
particular type of resource or scientific topic, e.g. resources
related to supernova research.  Thus, these might carry out
``selective harvesting'' to populate themselves.  

\section{The NVO Prototype}

The US-based National Virtual Observatory (NVO) project has put
together a prototype implementation of this framework.  The purpose of
the prototype is to support a real end-user application, the Data
Inventory Service (DIS; McGlynn et al. 2004).  This service gives user
a listing of what data is available for some location of the sky; the
first step in this inventory service is to search a registry for
services serving catalog and image data.  

\subsection{Resource Metadata} 

The IVOA Registry Working Group is developing a 
\htmladdnormallinkfoot{standard set of metadata for describing
resources}{http://www.ivoa.net/twiki/bin/view/IVOA/IVOARegWp03}.  The
standard comes in two parts.  The first is a prose document that
defines the basic concepts (Hanisch et al. 2004).  The second is a set
of XML Schemas used to encode the concepts into XML.  

The XML version of the metadata takes advantage of XML Schema's
object-oriented modeling capabilities.  The core concepts are defined
in the ``VOResource'' schema.  At the center of the model is
a generic {\tt Resource} which contains metadata concepts that apply
to all resources.  More specific ``resource classes'' extend {\tt
Resource} by inheriting the core metadata and adding additional,
specialized concepts.  Examples of specific resource classes include
{\tt Organisation, DataCollection, Service, {\rm and} Registry}.
These extensions are defined in separate schemas so that applications
can pick and choose which extensions to use.  With these schemas, data
providers can describe various kinds of research organisations
(e.g. data centers, observatories, and missions), their data
collections and archives, and a variety of services.  In particular,
not only can they describe emerging VO standard services like 
\htmladdnormallinkfoot{Cone Search and Simple Image Access,}%
{http://www.us-vo.org/standards.html} they can also describe their
existing browser-based and CGI services. 

\subsection{Publishing Registries}

We established two prototype publishing registries: one at Caltech and
one at NCSA.  Each featured a web page form that data providers could
use to register their resources.  The motivation behind the
development of these registries was two-fold.  First, we needed to
build a listing of all known Simple Image Access implementations,
which we had not had before.  Second, we wanted to use these
prototypes to develop techniques for making the registration process
easier for data providers.  A more detailed discussion of the NCSA
registry is described in this volume by Williamson and Plante (2004).  
Both implementations store the descriptions entered by users as XML
documents.  These descriptions are exposed to a searchable registry
via the Protocol for Metadata Harvesting using
\htmladdnormallinkfoot{Open Archives Initiative}%
{http://www.openarchives.org} (OAI; see discussion in Williamson and
Plante 2004). 

It's worth noting that the specific manner in which a data provider
publishes data descriptions will likely depend on the number of
resources being described.  If the provider only has a few resources
that are fairly static, then the easiest thing to do will likely be to
just go to a site like those set up at NCSA and Caltech and fill out
the forms; such ``public'' registry sites would manage the
descriptions of the resources on behalf of the provider.  If the
provider has a moderate number of resources to register (say, a few
tens) that may be changing somewhat with time, she may wish to take
greater control over the descriptions; in this case, they may install
a generic registry (e.g. VORegistry-in-a-Box, Williamson and Plante
2004) at their own site.  If the provider has a very large number of
resources, which are perhaps highly dynamic, he may wish to implement
the OAI interface himself, perhaps plugging directly into his existing
database or other tools used for managing resources.   

\subsection{The Searchable Registry}

A prototype searchable registry was set up (jointly) at JHU and
STScI (described in further detail in this volume by Greene et
al. 2004).  It collects resource descriptions from the publishing
registries at Caltech and NCSA and loads them into a relational
database.  Searching is provided to client applications through a web
interface.  In this prototype, the interface is fairly specialized to
the needs of the Data Inventory Service; however, we expect this to be
standardized through the course of development by the Registry Working
Group.  

At the moment, there is no automated mechanism for regularly polling
the publishing registry for new records.  Instead, the web service
interface has a special operation for initiating the harvesting
process. 


%-----------------------------------------------------------------------
%			      References
%-----------------------------------------------------------------------
\begin{references}

\reference Greene, G., O'Mullane, W., Hanisch, R., \& Gaffney, N. 
    2004, \adassxiii, \paperref{P3-8}.

\reference  Hanisch, R., Linde, T., Plante, R., Richards, A., Auden,
E., Noddle, K., Greene, G., \& O'Mullane, W. 
    2004, \adassxiii, \paperref{P3-5}.

\reference McGlynn, T. Lee, J., Hanisch, R., O'Mullane, W., Greene, G. 
    2004, \adassxiii, \paperref{P3-18}.

\reference Williamson, R. \& Plante, R. 2004, \adassxiii, \paperref{P3-22}.

\end{references}

% Do not place any material after the references section

\end{document}  % Leave intact
