%%% Modifs FO
%APN3_PROCEEDINGS_FORM%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% TEMPLATE.TEX -- APN3 (2003) ASP Conference Proceedings template.
%
% Derived from ADASS VIII (98) ASP Conference Proceedings template
% Updated by N. Manset for ADASS IX (99), F. Primini for ADASS 2000,
% D.Bohlender for ADASS 2001, and H. Payne for ADASS XII and LaTeX2e.
%
% Use this template to create your proceedings paper in LaTeX format
% by following the instructions given below.  Much of the input will
% be enclosed by braces (i.e., { }).  The percent sign, "%", denotes
% the start of a comment; text after it will be ignored by LaTeX.  
% You might also notice in some of the examples below the use of "\ "
% after a period; this prevents LaTeX from interpreting the period as
% the end of a sentence and putting extra space after it.  
% 
% You should check your paper by processing it with LaTeX.  For
% details about how to run LaTeX as well as how to print out the User
% Guide, consult the README file.  You should also consult the sample
% LaTeX papers, sample1.tex and sample2.tex, for examples of including
% figures, html links, special symbols, and other advanced features.
%
% If you do not have access to the LaTeX software or a laser printer
% at your site, you can still prepare your paper following the
% instructions in the User Guide.  In such cases, the editors will
% process the file and make any necessary editorial adjustments.
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 
\documentclass[11pt,twoside]{article}  % Leave intact
\usepackage{adassconf}
\usepackage{graphicx}

% If you have the old LaTeX 2.09, and not the current LaTeX2e, comment
% out the \documentclass and \usepackage lines above and uncomment
% the following:

%\documentstyle[11pt,twoside,adassconf]{article}

\begin{document}   % Leave intact

%-----------------------------------------------------------------------
%			    Paper ID Code
%-----------------------------------------------------------------------
% Enter the proper paper identification code.  The ID code for your
% paper is the session number associated with your presentation as
% published in the official conference proceedings.  You can           
% find this number locating your abstract in the printed proceedings
% that you received at the meeting or on-line at the conference web
% site; the ID code is the letter/number sequence proceeding the title 
% of your presentation. 
%
% This will not appear in your paper; however, it allows different
% papers in the proceedings to cross-reference each other.  Note that
% you should only have one \paperID, and it should not include a
% trailing period.
%
% EXAMPLE: \paperID{O4-1}
% EXAMPLE: \paperID{P7-7}
%

\paperID{P2-1}
%%%% ID=P2-1

%-----------------------------------------------------------------------
%		            Paper Title 
%-----------------------------------------------------------------------
% Enter the title of the paper.
%
% EXAMPLE: \title{A Breakthrough in Astronomical Software Development}
% 
% If your title is so long as to fill the page header when you print it,
% then please supply a short form as a \titlemark.
%
% EXAMPLE: 
%  \title{Rapid Development for Distributed Computing, with Implications
%         for the Virtual Observatory}
%  \titlemark{Rapid Development for Distributed Computing}
%

\title{ SAADA: An Automatic Archival System for Astronomical Data}
\titlemark{SAADA}
%\titlemark{ }

%-----------------------------------------------------------------------
%		          Authors of Paper
%-----------------------------------------------------------------------
% Enter the authors followed by their affiliations.  The \author and
% \affil commands may appear multiple times as necessary (see example
% below).  List each author by giving the first name or initials first
% followed by the last name.  Authors with the same affiliations
% should grouped together. 
%
% EXAMPLE: \author{Raymond Plante, Doug Roberts, 
%                  R.\ M.\ Crutcher\altaffilmark{1}}
%          \affil{National Center for Supercomputing Applications, 
%                 University of Illinois Urbana-Champaign, Urbana, IL
%                 61801}
%          \author{Tom Troland}
%          \affil{University of Kentucky}
%
%          \altaffiltext{1}{Astronomy Department, UIUC}
%
% In this example, the first three authors, "Plante", "Roberts", and
% "Crutcher" are affiliated with "NCSA".  "Crutcher" has an alternate 
% affiliation with the "Astronomy Department".  The fourth author,
% "Troland", is affiliated with "University of Kentucky"

\author{Nguyen N.H., Michel L., Motch C.  }
\affil{ Observatoire Astronomique de Strasbourg }

%-----------------------------------------------------------------------
%			 Contact Information
%-----------------------------------------------------------------------
% This information will not appear in the paper but will be used by
% the editors in case you need to be contacted concerning your
% submission.  Enter your name as the contact along with your email
% address.
% 
% EXAMPLE:  \contact{Dennis Crabtree}
%           \email{crabtree@cfht.hawaii.edu}
%

\contact{Nguyen N.H. }
\email{nguyen@saada.u-strasbg.fr }

%-----------------------------------------------------------------------
%		      Author Index Specification
%-----------------------------------------------------------------------
% Specify how each author name should appear in the author index.  The 
% \paindex{ } should be used to indicate the primary author, and the
% \aindex for all other co-authors.  You MUST use the following
% syntax: 
%
% SYNTAX:  \aindex{Lastname, F. M.}
% 
% where F is the first initial and M is the second initial (if
% used).  This guarantees that authors that appear in multiple papers
% will appear only once in the author index.  
%
% EXAMPLE: \paindex{Crabtree, D.}
%          \aindex{Manset, N.}        
%          \aindex{Veillet, C.}        
%
% NOTE: this information is also used to build the author list that
% appears in the table of contents.  Authors will be listed in the order
% of the \paindex and \aindex commmands.
%

\paindex{Nguyen, N. H.}
\aindex{Michel, L.}     % Remove this line if there is only one author
\aindex{Motch, C.}
%-----------------------------------------------------------------------
%		      Author list for page header	
%-----------------------------------------------------------------------
% Please supply a list of author last names for the page header. in
% one of these formats:
%
% EXAMPLES:
% \authormark{Lastname}
% \authormark{Lastname1 \& Lastname2}
% \authormark{Lastname1, Lastname2, ... \& LastnameN}
% \authormark{Lastname et al.}
%
% Use the "et al." form in the case of seven or more authors, or if
% the preferred form is too long to fit in the header.

\authormark{Nguyen et al.}

%-----------------------------------------------------------------------
%			Subject Index keywords
%-----------------------------------------------------------------------
% Enter a comma separated list of up to 6 keywords describing your
% paper.  These will NOT be printed as part of your paper; however,
% they will be used to generate the subject index for the proceedings.
% There is no standard list; however, you can consult the indices
% for past proceedings (http://adass.org/adass/proceedings/).
%
% EXAMPLE:  \keywords{visualization, astronomy: radio, parallel
%                     computing, AIPS++, Galactic Center}
%
% In this example, the author noticed that "radio astronomy" appeared
% in the ADASS VII Index as "astronomy" being the major keyword and
% "radio" as the minor keyword.  The colon is used to introduce another
% level into the index.

\keywords{archives, object relational mapping, Java, SQL}

%-----------------------------------------------------------------------
%			       Abstract
%-----------------------------------------------------------------------
% Type abstract in the space below.  Consult the User Guide and Latex
% Information file for a list of supported macros (e.g. for typesetting 
% special symbols). Do not leave a blank line between \begin{abstract} 
% and the start of your text.

\begin{abstract}          % Leave intact
This papier presents an overview of SAADA,
a tool  designed to allow astronomers to easily create their own databases 
from archival files (images, spectra, tables, ...) or from imported data.
It aims to make the process of database creation as automatic as possible.
Its functionality will include java code generation, data loading,
automatic web interfacing, and some interoperability features.
Correlation links between records can easily be set up by astronomers 
in order to add scientific content to the database.
Data can either be accessed with the automatic Web interface or by 
handling persistent objects.
Through an API, SAADA will be able to
interoperate with external databases (using of VO standards).
It will also be able to achieve queries including constraints on correlation patterns.
% Place the text of your abstract here - NO BLANK LINES
\end{abstract}

%-----------------------------------------------------------------------
%			      Main Body
%-----------------------------------------------------------------------
% Place the text for the main body of the paper here.  You should use
% the \section command to label the various sections; use of
% \subsection is optional.  Significant words in section titles should
% be capitalized.  Sections and subsections will be numbered
% automatically. 
%
% EXAMPLE:  \section{Introduction}
%           ...
%           \subsection{Our View of the World}
%           ...
%           \section{A New Approach}
%
% It is recommended that you look at the sample papers, sample1.tex
% and sample2.tex, for examples for formatting references, footnotes,
% figures, equations, html links, lists, and other special features.  
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%	
\section{ Introduction }
The increasing capabilities of both hardware and networking offer the possibility to
astronomers to easily organize
their own data in local databases. 
Nevertheless, setting up such databases remains
difficult, especially for the complex and heterogeneous data used in astronomy.
The presented development, SAADA, (Syst\`eme Automatique d'Archivage de Donn\'ees 
Astronomiques
in French, Happiness in Arabic) aims at making the deployment of local databases easier.

SAADA is not a database system but a database generator. 
Databases created by SAADA will be hereafter refered as SAADA-DBs.
All SAADA-DBs rest on the same branches of a common data model, 
but have their own object layers (API and Web interface) and their own relational bases.

The architecture of the SAADA-DBs tries to take advantage of both relational 
and object worlds. 
Object model is convenient to deal with heterogeneous data and to provide a simple 
API whereas 
relational database model is a mature solution to share large sets of information and to manage concurrency, 
transactions and roll-backs.
A SAADA-DB is an object layer using a SQL RDBMS as repository.
The goal of SAADA is to create a database system ready to use (a SAADA-DB) just by 
analysing input data and by applying some rules given by the data owner. 
SAADA relies on freeware, standard compliant, multiplatform and object oriented programming. 
SAADA is totally written in Java using number of public APIs (J2EE- Flanagan 1999).
The purpose of this paper is more to explain the architecture of the SAADA-DBs than 
to describe the structure of SAADA itself (Figure 1).

\begin{figure}[ht]
\begin{center}
\scalebox{0.65}{\includegraphics{P2-1_f1.eps}}
\caption{SAADA}
\end{center}
\end{figure}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%	
\section{Features of a SAADA-DB}
A SAADA-DB can host tables, images, spectras and time series. 
Data are grouped in named collections (e.g. STARS,
GALAXIES) set up by the owner.
Once a new SAADA-DB is created and its data set loaded, 
it can be accessed through a web interface using
servlets.
The web interface provides browsing facilities and an editor for complex queries. 
It has functionnalities similar
to those of the XCAT-DB interface (Michel 2004).
A Java API allows database users to handle records in Java instances. The API is read-only by default but a
specific mode can be used by the SAADA-DB's owner to set-up persistent links 
between data (e.g. cross-match).
The API has not been designed to load data. This task is dedicated to the dataloader module 
by reading local products or by querying some external databases.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%	
\section{Architecture of a SAADA-DB}
Below is a short description of the SAADA-DB modules.
The organization of all SAADA-DB components is shown in Fig 2.

\begin{list}{-}
\item \bf API specific: \rm Generated API including classes modelling the persistent data.
\item \bf Data loader: \rm Module used to load data from the input files/streams.
\item \bf Module servlet: \rm Automatically generated web interface.
\item \bf Module web wervice: \rm This module handles database accesses by web services.
\item \bf Object cache: \rm Achieves the conversion of relational data into
  persistent objects.
\item \bf API generic: \rm Low level persistence functionalities.
\item \bf High level query engine: \rm Query optimizer.
\item \bf Module update: \rm This module is in charge of updating persistent objects. 
It can not create new objects but it can modify some attributes such as correlation links between instances.
\item \bf Open modules: \rm Built out functionalities.
\end{list}

\begin{figure}[ht]
\begin{center}
\scalebox{0.6}{\includegraphics{P2-1_f2.eps} }
\caption{SAADA Architecture }
\end{center}
\end{figure}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%	
\section{ Auto-configuration}
One of the most useful property of SAADA , its auto-configurability,
allows astronomers to create their own databases without writing any
line of code in Java or SQL. With SAADA, database owners must just set
a few rules at configuration time which specifies the 
mapping between input data and classes. 
Input data are identified by directory names, filename masks or some
inner keywords. The collection in which data must be stored are also
specified at configuration time. From this configuration file (XML)
and from the input data checking, SAADA is able to build SQL tables
and Java classes which together are going to form the new SAADA-DB.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%	
\section{Object-Relation Mapping}
As all SAADA-DBs are built on the top of the same data model, 
the object mapping is quite simpler than for any
other general purpose Object-Relationnal system. This simplicity added to considerations on low level
functionality and on performances lead us to choose to develop our own object mapping layer.
The mapping mechanism is classically (Rahayu 2000) based on the use of object identifiers (OIDs).
From any OID SAADA is able to:
\begin{list}{--}
\item determine the table where the object is.
\item identify the object class.
\item retrieve the instance content. 
\end{list}

\noindent %%FO
With a single OID, any data record can be retreived either in the relational world or in the object jungle (or
vice-versa).
OIDs are unique within a given SAADA-DB.




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%	
\section{Performance fonctionnalites}

\subsection{Object Cache}

The object cache is the hot spot of the system. It is in charge of transforming table records 
into Java instances and especially to minimize databse accesses.
Its setup has a fundamental impact on global performance.
Objects are handled by OIDs, their content is not read; 
but the first time an attribute is accessed,
the cache is invoked to build the full instance.
Objects no longer referenced by the application are removed from the 
cache by the JVM garbage collector only when the memory heap is full.

Complex objects are built into the cache by applying a lazy-loading strategy (Kircher 2001).


\subsection{Query Engine}

Queries are processed by a separate module. The query optimization obviously has  
a significant 
impact on the global efficiency of a SAADA-DB.
Users queries are translated into SQL queries including some built-in 
functions taking in account  their complexity.
Further local computation on SQL query results can be achieved 
before returning the final result. 
Queries only return sets of OIDs. Object contents can only be delivered by the cache.
SAADA systematically implements into SAADA-DBs some specific features necessary to speed up queries.
All data will be for instance referenced on a sky pixel map (e.g. Qbox, Page 2002) and specific indexes 
are setup for the processing of queries including constraints on correlated data patterns.


   
\section{Development Status}  
A SAADA-DB is under test. It includes all of the basic functionnalities (cache, web interface).
This prototype is built by a piece of software hosting the main modules of SAADA (auto-configuration, data
loader).
The first public distribution will be released in spring 2004.
SAADA status can be  seen at http://saada.u-strasbg.fr/

\acknowledgments %% FO\section{Acknowledgments}

This project of thesis is funded by the R\'egion Alsace (France) and by the 
Centre National d'Etudes Spatiales (CNES France).

%-----------------------------------------------------------------------
%			      References
%-----------------------------------------------------------------------
% List your references below within the reference environment
% (i.e. between the \begin{references} and \end{references} tags).
% Each new reference should begin with a \reference command which sets
% up the proper indentation.  Observe the following order when listing
% bibliographical information for each reference:  author name(s),
% publication year, journal name, volume, and page number for
% articles.  Note that many journal names are available as macros; see
% the User Guide listing "macro-ized" journals.   
%
% EXAMPLE:  \reference Hagiwara, K., \& Zeppenfeld, D.\  1986, 
%                Nucl.Phys., 274, 1
%           \reference H\'enon, M.\  1961, Ann.d'Ap., 24, 369
%           \reference King, I.\ R.\  1966, \aj, 71, 276
%           \reference King, I.\ R.\  1975, in Dynamics of Stellar 
%                Systems, ed.\ A.\ Hayli (Dordrecht: Reidel), 99
%           \reference Tody, D.\  1998, \adassvii, 146
%           \reference Zacharias, N.\ \& Zacharias, M.\ 2003,
%                \adassxii, \paperref{P7.6}
% 
% Note the following tricks used in the example above:
%
%   o  \& is used to format an ampersand symbol (&).
%   o  \'e puts an accent agu over the letter e.  See the User Guide
%      and the sample files for details on formatting special
%      characters.  
%   o  "\ " after a period prevents LaTeX from interpreting the period 
%      as an end of a sentence.
%   o  \aj is a macro that expands to "Astron. J."  See the User Guide
%      for a full list of journal macros
%   o  \adassvii is a macro that expands to the full title, editor,
%      and publishing information for the ADASS VII conference
%      proceedings.  Such macros are defined for ADASS conferences I
%      through XI.
%   o  When referencing a paper in the current volume, use the
%      \adassxii and \paperref macros.  The argument to \paperref is
%      the paper ID code for the paper you are referencing.  See the 
%      note in the "Paper ID Code" section above for details on how to 
%      determine the paper ID code for the paper you reference.  
%
\begin{references}
\reference Flanagan, D. 1999, Java Enterprise -In a nutshell, O'REILLY
\reference Kircher, M. 2001, Lazy Acquisition, 
   http://www.cs.wustl.edu/\verb+~+mk1/ \\LazyAcquisition.pdf
\reference Michel, L., Motch, C., Page, C. G., Watson M.~G. 
    2003, \adassxii, \adassref{xii:P5-7}{291}
\reference Michel, L., Motch, C., Pye, J., Watson, M. 2004, 
   ``XCAT-DB a Public Interface for the SSC XMM-Newton Catalogue'
    \adassxiii, \paperref{D1}\ok
\reference Page, C. 2002, Indexing the Sky,
   http://wiki.astrogrid.org/bin/view/ \\Astrogrid/SkyIndexing.
\reference Rahayu, J.W. 2000, A method for transforming 
   inheritance relationships in an object-oriented conceptual model
   to relational tables, ELSEVIER, Information and Software Technology 
   42(2000) 571-592.

\end{references}

% Do not place any material after the references section

\end{document}  % Leave intact
