Parent Directory | Revision Log

Revision **2434** -
(**show annotations**)

*Fri Feb 28 17:52:32 2014 UTC*
(7 years, 7 months ago)
by *norman.x.gray@gmail.com*

File MIME type: application/x-tex

File size: 93623 byte(s)

File MIME type: application/x-tex

File size: 93623 byte(s)

Last changes, following on-list discussion, before TCG review

1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |

2 | % For an conversion via cgiprint (HTX): |

3 | % See http://vizier.u-strasbg.fr/local/man/cgiprint.htx |

4 | \def\ifhtx{\iffalse} % Lines used only for the HTML version |

5 | \ifhtx |

6 | % . . . |

7 | % . . . Definitions in HTX context |

8 | % . . . |

9 | \else |

10 | \documentclass[11pt,notitlepage,onecolumn]{ivoa} |

11 | % . . . |

12 | % . . . Definitions in LaTeX context |

13 | % . . . |

14 | \fi |

15 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |

16 | |

17 | \def\SVN$#1: #2 ${\expandafter\def\csname SVN#1\endcsname{#2}} |

18 | \SVN$Revision$ |

19 | \SVN$Date$ |

20 | \SVN$HeadURL$ |

21 | |

22 | \usepackage{natbib} % use author-year citations |

23 | |

24 | \usepackage{prettyref} % ensure consistent cross-references |

25 | \newrefformat{sec}{Sect.~\ref{#1}} |

26 | \newrefformat{appx}{Appx.~\ref{#1}} |

27 | \newrefformat{fig}{Fig.~\ref{#1}} |

28 | \newrefformat{tab}{Table~\ref{#1}} |

29 | \usepackage{varioref} |

30 | \newrefformat{tabx}{Table~\vref{#1}} |

31 | |

32 | % Extend the {tabular} column types, so we can conveniently get |

33 | % raggedright (ie non-insanely-spaced) column entries. Follows the |

34 | % excellent answer at <http://tex.stackexchange.com/questions/12703/> |

35 | \usepackage{array} |

36 | \newcolumntype{L}[1]{>{\raggedright\let\newline\\\arraybackslash\hspace{0pt}}p{#1}} |

37 | |

38 | |

39 | %\usepackage{supertabular,multicol} |

40 | |

41 | % Physical units in \rm. Unstarred version includes leading |

42 | % \thinspace. Starred version doesn't, and is used when referring to |

43 | % the unit by itself (eg axis is $B/\units*T$), and is not qualifying |

44 | % a number |

45 | \makeatletter |

46 | \def\units{\@ifstar{\let\un@tsspace\relax \un@ts}% |

47 | {\let\un@tsspace\thinspace\un@ts}} |

48 | \newcommand{\un@ts}[1]{{\let~\thinspace |

49 | \ifmmode |

50 | \un@tsspace\mathrm{#1}% |

51 | \else |

52 | \nobreak$\un@tsspace\mathrm{#1}$% |

53 | \fi}} |

54 | |

55 | \newcommand*\hex[1]{\uppercase{#1}${}_{16}$} |

56 | %\newcommand*\hex[1]{\texttt{0x#1}} % alternative formatting |

57 | |

58 | \usepackage{verbatim} % for \verbatiminput |

59 | \def\verbatim@font{\fontsize{9}{11}\selectfont\ttfamily} |

60 | % \DeclareRobustCommand{\^}{% |

61 | % \ifmmode\nfss@text{\textasciicircum}\else\textasciicircum\fi} |

62 | |

63 | %\definecolor{normative}{rgb}{0.1,0.1,0.5} |

64 | \newcommand*\norm[1]{\textbf{\color{ivoacolor}#1}} |

65 | |

66 | \makeatother |

67 | |

68 | % abbreviation for 'e.g.', which (a) gets spacing right after the full |

69 | % stop, and (b) allows us to change the punctuation globally if we |

70 | % decide to. |

71 | \def\eg{e.g.,~} |

72 | |

73 | %% |

74 | %% If document is processed with latex, dvips and ps2pdf |

75 | %% |

76 | \ifx\pdftexversion\undefined |

77 | \usepackage[dvips]{graphicx} |

78 | \DeclareGraphicsExtensions{.eps,.ps} |

79 | %% Uncomment following line if you want PDF thumbnails |

80 | % \usepackage[ps2pdf]{thumbpdf} |

81 | % for old hyperref, use: |

82 | \usepackage[ps2pdf]{hyperref} |

83 | %% for recent hyperref, use: |

84 | % \usepackage[ps2pdf,bookmarks=true,bookmarksnumbered=true,hypertexnames=false,breaklinks=true,% |

85 | % colorlinks,linkcolor=blue,urlcolor=blue]{hyperref} |

86 | |

87 | %% |

88 | %% else if document is processed with pdflatex |

89 | %% |

90 | \else |

91 | \usepackage[pdftex]{graphicx} %% graphics for pdftex (supports .pdf .jpg .png) |

92 | \usepackage{epstopdf} %% requires epstopdf |

93 | %% this is to support .ps files : |

94 | \makeatletter |

95 | \g@addto@macro\Gin@extensions{,.ps} |

96 | \@namedef{Gin@rule@.ps}#1{{pdf}{.pdf}{`ps2pdf #1}} |

97 | \makeatother |

98 | %% comment above lines if you have included ps files |

99 | %\DeclareGraphicsExtensions{.pdf,.jpg,.png} |

100 | %% Uncomment following line if you want PDF thumbnails |

101 | % \usepackage[pdftex]{thumbpdf} |

102 | %% for old hyperref, use: |

103 | % \usepackage[ps2pdf]{hyperref} |

104 | % for recent hyperref, use: |

105 | \usepackage[pdftex,bookmarks=true,bookmarksnumbered=true,hypertexnames=false,breaklinks=true,% |

106 | colorlinks,allcolors=ivoacolor]{hyperref} |

107 | \pdfadjustspacing=1 |

108 | \fi |

109 | \usepackage[final]{pdfpages} |

110 | %\usepackage{tabulary} %% |

111 | %% Header of the document... |

112 | %% |

113 | % Provide a title for your document |

114 | \title{Units in the VO} |

115 | % Give date and version number |

116 | \date{1.0-20140226} |

117 | |

118 | % Choose one document type from below |

119 | %\ivoatype{IVOA Note} |

120 | %\ivoatype{IVOA Working Draft} |

121 | \ivoatype{IVOA Proposed Recommendation} |

122 | %\ivoatype{IVOA Recommendation} |

123 | |

124 | \version{1.0} |

125 | % Give author list: separate different authors with \\ |

126 | % You can add email addresses with links \url{mailto:yourname@ivoa.net} |

127 | \author{Markus Demleitner\\ |

128 | S\'{e}bastien Derri\`ere\\ |

129 | Norman Gray\\ |

130 | Mireille Louys\\ |

131 | Fran\c{c}ois Ochsenbein} |

132 | \editor{S\'{e}bastien Derri\`{e}re and Norman Gray} |

133 | |

134 | \urlthisversion{\footnotesize{\url{http://www.ivoa.net/Documents/VOUnits/20140226/}}} |

135 | \urllastversion{\footnotesize{\url{http://www.ivoa.net/Documents/VOUnits/}}} |

136 | \previousversion{\footnotesize{\url{http://www.ivoa.net/Documents/VOUnits/20131224/}}} |

137 | %\previousversion{\footnotesize{\url{http://www.ivoa.net/Documents/VOUnits/20130922/}}} |

138 | %\previousversion{\footnotesize{\url{http://www.ivoa.net/Documents/VOUnits/20131011/}}} |

139 | %\previousversion{\footnotesize{\url{http://www.ivoa.net/documents/VOUnits/20130724/PR-VOUnits-1.0-20130922.pdf}}} |

140 | %\previousversion{\footnotesize{\url{http://www.ivoa.net/Documents/VOUnits/20130429/}}} |

141 | %\previousversion{\footnotesize{\url{http://www.ivoa.net/Documents/VOUnits/20130225/}}} |

142 | %\previousversion{\footnotesize{\url{http://www.ivoa.net/internal/IVOA/UnitsDesc/WD-VOUnits-v1.0-20120522.pdf}}} |

143 | %\urlthisversion{\footnotesize{\url{http://www.ivoa.net/Documents/VOUnits/20130225/}}} |

144 | %\previousversion{\footnotesize{\url{http://www.ivoa.net/internal/IVOA/UnitsDesc/WD-VOUnits-v1.0-20120718.pdf}}} |

145 | %\previousversion{\footnotesize{\url{http://www.ivoa.net/Documents/VOUnits/20120801/}}} |

146 | |

147 | |

148 | |

149 | %%%%%%%%%%%%%%%%% |

150 | %mir \documentclass[12pt]{article} |

151 | %\usepackage{graphicx} |

152 | %\usepackage{hyperref} |

153 | %\usepackage{psfig} |

154 | %\usepackage{html} |

155 | %\usepackage{epsf} |

156 | %\usepackage{lscape} |

157 | %mir \textheight 9.0in \hoffset -0.5in \voffset -0.5in |

158 | %\newcommand{\Sensitiv}{Variation} |

159 | \definecolor{orange}{rgb}{0.7,0.5,0.0} |

160 | \newcommand{\unit}[1]{\texttt{\small\color{orange}#1}} |

161 | %\newcommand{\unit}[1]{\textbf{\textsf{\color{orange}#1}}} |

162 | \usepackage[T1]{fontenc} |

163 | \usepackage{longtable} |

164 | \usepackage{multirow} |

165 | %\font\symbo=psyr at 10pt |

166 | %\def\micro{{\symbo \char109}} |

167 | \def\micro{{\ensuremath \mu}} |

168 | |

169 | %Mir colors definitions |

170 | \newcommand{\bleu}[1]{\textcolor[rgb]{0.00,0.00,1.00}{#1}} |

171 | \newcommand{\blue}{\textcolor{blue}} |

172 | \newcommand{\violet}{\textcolor[rgb]{0.50,0.00,0.50}} |

173 | \newcommand{\brown}{\textcolor[rgb]{0.50,0.10,0.10}} |

174 | %%%%%%%%%%%%%%%%% |

175 | |

176 | %\usepackage{showlabels} |

177 | |

178 | |

179 | |

180 | \begin{document} |

181 | \maketitle % print header in standard form |

182 | \thispagestyle{empty} |

183 | \begingroup |

184 | %%\input{versions} |

185 | \vfill |

186 | %%\hbox to \textwidth{\hfil\tiny Volute: \SVNRevision, \SVNDate} |

187 | \hbox to \textwidth{\hfil\tiny code.google.com/p/volute, rev\SVNRevision, \SVNDate} |

188 | %\hbox to \textwidth{\hfil\tiny Volute: \SVNHeader} |

189 | \endgroup |

190 | \newpage |

191 | \tableofcontents |

192 | \newpage |

193 | \listoftables |

194 | \newpage |

195 | \section*{Abstract} |

196 | This document describes a recommended syntax for writing the string |

197 | representation of unit labels (`VOUnits'). In addition, it describes |

198 | a set of recognised and deprecated units, which is as far as possible |

199 | consistent with other relevant standards (BIPM, ISO/IEC and the IAU). |

200 | |

201 | The intention is that units written to conform to this specification |

202 | will likely also be parsable by other well-known parsers. To this |

203 | end, we include machine-readable grammars for other units syntaxes. |

204 | |

205 | \section*{Status of this document} |

206 | |

207 | This is an IVOA Proposed Recommendation made available for public review. |

208 | It is appropriate to reference this document only as a recommended standard |

209 | that is under review and which may be changed before it is accepted as a full recommendation. |

210 | |

211 | %This is an IVOA Working Draft for review by IVOA members and |

212 | %other interested parties. It is a draft document and may be updated, |

213 | %replaced, or rendered obsolete by other documents at any time. It is |

214 | %inappropriate to use IVOA Working Drafts as reference materials or to cite |

215 | %them as other than ``work in progress''. |

216 | |

217 | This document is a substantial update of the previous version 0.2 that |

218 | was written within the Data Model IVOA Working Group. As decided in previous |

219 | IVOA interoperability meetings, the Semantics working group is now in charge |

220 | of the document. This document is intended to become a full IVOA recommendation, |

221 | following agreement within the community and standard IVOA recommendation process. |

222 | |

223 | The place for discussions related to this document is the |

224 | Semantics IVOA mailing list {\tt semantics\@@ivoa.net}. |

225 | |

226 | A list of current IVOA recommendations and other technical documents can be found at |

227 | \url{http://www.ivoa.net/Documents/}. |

228 | |

229 | \subsection*{Note on conformance} |

230 | |

231 | Text within the following document is classified as either |

232 | `normative' or `informative'. |

233 | |

234 | \textbf{Normative} text means information that is required |

235 | to implement the Recommendation; an implementation of this |

236 | Recommendation is conformant if it abides by all the prescriptions |

237 | contained in normative text. \textbf{Informative} text is |

238 | information provided to clarify or illustrate a requirement but which |

239 | is not required for conformance. |

240 | |

241 | The sections and subsections of this Recommendation are labeled, |

242 | after the section heading, to specify whether they are normative or |

243 | informative. If a subsection is not labeled, it has the same |

244 | normativity as its parent section. References are normative if they |

245 | are referred to within normative text. |

246 | |

247 | When found within normative sections, the key words |

248 | \norm{must}, |

249 | \norm{must not}, |

250 | \norm{required}, |

251 | \norm{shall}, |

252 | \norm{shall not}, |

253 | \norm{should}, |

254 | \norm{should not}, |

255 | \norm{recommended}, |

256 | \norm{may}, |

257 | \norm{optional}, |

258 | thus formatted, are to be interpreted as described in RFC 2119 |

259 | \citep{std:rfc2119}. |

260 | |

261 | \section*{Acknowledgements} |

262 | |

263 | We thank all those participants in IVOA and EuroVO workshops who have |

264 | contributed by exposing use cases and providing comments, especially |

265 | Rick Hessman, |

266 | Paddy Leahy, |

267 | Jeff Lusted, |

268 | Jonathan McDowell, |

269 | Marco Molinaro, |

270 | Pedro Osuna, |

271 | Anita Richards, |

272 | Bruno Rino, |

273 | Arnold Rots, |

274 | Jesus Salgado |

275 | Mark Taylor, |

276 | Brian Thomas |

277 | and recent contributors on the DM and Semantics forums. |

278 | |

279 | \section{Introduction (informative)} |

280 | \label{sec:intro} |

281 | |

282 | This document describes a standardised use of units in the VO |

283 | (hereafter simply `VOUnits'). It aims to describe a syntax for unit |

284 | strings which is as far as possible in the intersection of existing |

285 | syntaxes, and to list a set of `known units' which is |

286 | the union of the `known units' of those standards. |

287 | We \emph{recommend}, therefore, that applications which write out |

288 | units should do so using \emph{only} the VOUnits syntax, and that |

289 | applications reading units should be able to read \emph{at least} the |

290 | VOUnits syntax, plus all of the units of \prettyref{sec:knownunits}. |

291 | It is not, however, quite possible for VOUnits to be in the |

292 | intersection of existing syntaxes; there is futher discussion of this |

293 | point in \prettyref{sec:deviations}. |

294 | |

295 | We also provide, for information, a set of self- and mutually-consistent |

296 | machine-readable grammars for all of the syntaxes discussed. |

297 | |

298 | The introduction gives the motivation for |

299 | this proposal in the context of the VO architecture, from the legacy |

300 | metadata available in the resource layer, to the requirements of the various |

301 | VO protocols and standards and applications. |

302 | |

303 | This document is organised as follows. \prettyref{sec:proposal} |

304 | details the proposal for VOUnits. \prettyref{sec:useCase} lists some |

305 | use cases and reference implementations. In \prettyref{appx:current}, |

306 | there is a brief review of current practices in the description and |

307 | usage of units; in \prettyref{appx:comparisons} there is a detailed |

308 | discussion of the differences between the various syntaxes; and |

309 | in \prettyref{appx:grammar} there are formal (yacc-style) grammars for |

310 | the four syntaxes discussed. |

311 | |

312 | The normative content of this document is \prettyref{sec:proposal} and \prettyref{appx:vougrammar}. |

313 | |

314 | \subsection{Units in the VO Architecture} |

315 | |

316 | % Why are the default LaTeX float parameters so _irritatingly_ cautious? |

317 | \renewcommand{\topfraction}{.85} |

318 | \renewcommand{\bottomfraction}{.7} |

319 | \renewcommand{\textfraction}{.15} |

320 | \renewcommand{\floatpagefraction}{.66} |

321 | |

322 | \begin{figure}%[htbp] |

323 | \centerline{\includegraphics[width=0.9\textwidth]{unitsInIVOA.pdf}} |

324 | \caption{Units is a core building block in the VO. Most parts of the |

325 | architecture rely on it: the User Layer with tools and clients, the |

326 | Resource Layer with data. Protocols, registries entries, and |

327 | data models also re-use these Units definitions.} |

328 | \label{fig:architecture} |

329 | \end{figure} |

330 | |

331 | Generally, every quantity provided in astronomy has a unit attached to |

332 | its value or is unitless (\eg a ratio, or a numerical multiplier). |

333 | |

334 | Units lie at the core of the VO architecture, as can be seen in \prettyref{fig:architecture}. |

335 | Most of the existing data and metadata collections accessible in the resource |

336 | layer have some legacy units, which are mandatory for any scientific use of |

337 | the corresponding data. Units can be embedded in data (\eg FITS headers) or be |

338 | implied by convention and/or (preferably) specified in metadata. |

339 | |

340 | Units also appear in the VOTable format \citep{ochsenbein11}, through the use |

341 | of a {\tt unit} attribute that can be used in the {\tt FIELD}, {\tt PARAM} and {\tt INFO} |

342 | elements. Because of the widespread dependency of many other VO standards on VOTable, |

343 | these standards inherit a dependency on Units. |

344 | |

345 | The Units also appear in many Data Models, through the use of dedicated elements in |

346 | the models and schemas. |

347 | At present, each VO standard either refers to some external reference document, or |

348 | provides explicit examples of the Units to be used in its scope, on a case-by-case |

349 | basis. |

350 | |

351 | The registry records can also contain units, for the description of table metadata. |

352 | The definition of VO Data Access protocols uses units by specifying in which units the input |

353 | parameters have to be expressed, or by restricting the possible units in which some |

354 | output must be returned. |

355 | |

356 | And last but not least, tools can interpret units, for example to display |

357 | heterogeneous data in a single diagram by applying conversions to a reference |

358 | unit on each axis. |

359 | |

360 | \subsection{Adopted terms and notations\label{sec:notations}} |

361 | |

362 | Discussions about units often suffer from misunderstandings arising from cultural |

363 | differences or ambiguities in the adopted vocabulary. For the sake of clarity, in this |

364 | document, the following concepts are used: |

365 | |

366 | %\begin{itemize} |

367 | %\item |

368 | A \textbf{quantity} is the combination of a (numerical) {\em |

369 | value}, measured for a {\em concept} and expressed in terms of a given |

370 | {\em unit}; there may be other structure to a quantity, such as |

371 | uncertainty or even provenance. |

372 | In the VO context, the nature of the concept can be expressed with a UCD or a utype. This document does not address the full issue of |

373 | representing quantities, but focusses on the {\em unit} part. |

374 | |

375 | %\item |

376 | A \textbf{unit} can be expressed in various forms: in natural language |

377 | (\eg \emph{metres per second squared}), with a combination of symbols |

378 | with typographic conventions (\eg m s$^{-2}$), or by a simplified text |

379 | label (\eg \unit{m.s-2}). VOUnit deals with the label form, which is |

380 | easier to standardize, parse and exchange. A VOUnit corresponds in the |

381 | most general case to a combination of several (possibly prefixed) |

382 | symbols with mathematical operations expressed in a controlled syntax. |

383 | |

384 | A \textbf{unit} consists of a sequence of \textbf{unit components}, |

385 | each of which represents a \textbf{base unit}, possibly modified by a |

386 | multiplicative \textbf{prefix} (of one or two characters), and raised |

387 | to an integer or rational power. The whole unit may (in some |

388 | syntaxes) be prefixed by a numerical \textbf{scale-factor}. |

389 | |

390 | Each of the \textbf{base units} (for example, the metre) is |

391 | represented by a \textbf{base symbol} (for example \unit{m}). Each |

392 | syntax has a number of \textbf{known units} |

393 | (\prettyref{sec:knownunits}), for each one of which there is at least |

394 | one symbol which identifies only that unit. |

395 | |

396 | A \textbf{symbol} is either a base symbol or a base symbol with a |

397 | scaling prefix. |

398 | |

399 | For example, in the unit of \unit{1.663e-1mm.s**-1}, the scalefactor |

400 | is $1.663\times10^{-1}$, the two unit-components are \texttt{mm} |

401 | and \texttt{s**-1}; the first symbol has base symbol \texttt{m} and |

402 | prefix \texttt{m} (for `milli'), and the second has base |

403 | symbol \texttt{s}, no prefix, and the power~$-1$. |

404 | |

405 | %% Remark: some complex questions, more related to data modeling than to units, such as how a quantity |

406 | %% is associated to its measurement error, or how groups of coordinates are described, are not addressed in this |

407 | %% document. They can always be broken down, with appropriate modeling, into smaller bits to which VOUnits can |

408 | %% be applied. |

409 | |

410 | |

411 | \subsection{Purpose of this document} |

412 | \label{sec:purpose} |

413 | |

414 | The purpose of this document is to provide a reference specification of how |

415 | to write VOUnits, in order to maximize interoperability within the VO; |

416 | the intention is that VOUnit strings should be reliably |

417 | parseable by humans \emph{and} computers, with a single interpretation. |

418 | This is broadly the case for the other existing |

419 | unit-string syntaxes, although there are some slight ambiguities in |

420 | the specifications of these syntaxes (cf \prettyref{appx:grammar}). |

421 | We therefore include a set of self- and mutually-consistent |

422 | machine-readable grammars for all of the syntaxes discussed. |

423 | |

424 | The unit syntax(es) described here are intended to be human-readable, |

425 | to the extent that, for example, a string such as \unit{mm.s**-2} is |

426 | human-readable (without this restriction, we could easily define a |

427 | much more regular machine-to-machine grammar). Having an explicit |

428 | unit-string grammar means that data providers can write human-readable |

429 | strings in the confidence that the result will \emph{additionally} be |

430 | machine-readable in a reliable and checkable way. Or, where a string |

431 | is not fully machine readable (because a data provider needs to use a |

432 | custom unit such as 'jupMass'; see \prettyref{sec:quoting}), that the |

433 | string is at least partially machine readable, and that that partial |

434 | readability is non-ambiguous. |

435 | |

436 | We aim not to reinvent the wheel, and to be as compliant as possible with |

437 | legacy metadata in major archives, and astronomers' habits. |

438 | |

439 | In particular: |

440 | \begin{itemize} |

441 | \item We describe (\prettyref{appx:current}) a number of existing unit |

442 | syntaxes, and mention some ambiguities in their |

443 | definition. Application authors should expect to encounter each of |

444 | the syntaxes mentioned in this document (FITS, OGIP and CDS); all of |

445 | these are broadly endorsed by this specification. |

446 | \item In addition to the unit syntaxes described above, there are |

447 | multiple specifications of base and known units |

448 | (we refer, in particular, to |

449 | specifications from BIPM, ISO/IEC and the IAU); |

450 | %\citet{si-brochure,std:iec80000-13,iau12}); |

451 | these are broadly, but not completely, mutually consistent. |

452 | \item Where there are some ambiguities in, or contradictions between, |

453 | these various specifications, we recommend that application authors should |

454 | resolve them as indicated in this specification. |

455 | \item This document defines a syntax, called `VOUnits', which is as |

456 | far as is feasible in the intersection of the three existing |

457 | syntaxes, and which we recommend that applications should use when |

458 | writing unit strings. This aim is not quite possible in fact, and |

459 | the extensions to it, and the mild deviations from it, are discussed |

460 | below in \prettyref{sec:proposal} and \prettyref{appx:grammar}; |

461 | there is a summary of the various units |

462 | in \prettyref{tabx:knownunits}. |

463 | \end{itemize} |

464 | |

465 | % Data providers are encouraged to follow the VOUnits specifications for expressing |

466 | % their metadata. And application developers can rely on these specifications in order |

467 | % to know what VOUnits they should expect to face. |

468 | |

469 | |

470 | |

471 | \subsection{What this document will not do} |

472 | \label{sec:outofscope} |

473 | |

474 | This Recommendation does \textbf{not} prescribe what units data |

475 | providers employ, except to the extent that we avoid giving a standard |

476 | interpretation for a unit in some cases (for example we do not |

477 | acknowledge the degree celsius or the century as units). Since we do |

478 | not forbid `unrecognised' units, this need not restrict data providers. |

479 | Nor do we demand that a given quantity be expressed in a |

480 | unique way (\eg all distances in \unit{m}). So long as data is |

481 | labelled in a recognised system, a translation layer can be |

482 | provided. Data providers can customise the translation tools if |

483 | required. Depending on preference and the operations required, the |

484 | user may have a choice of units for his or her query and for the |

485 | result. In particular, the Recommendation does not require that only |

486 | recognised units are used. While it is obviously desirable for data |

487 | providers to use recognised and non-deprecated units where possible, |

488 | there are occasions when this is unnecessary or undesirable. |

489 | |

490 | This Recommendation does not discuss \emph{quantities} at all. That |

491 | is, we do not discuss the combination of number and unit which refers |

492 | to a particular physical measurement, such as `2$\mathrm m\,\mathrm |

493 | s^{-1}$'. Though this might appear to be a trivial extension, it |

494 | raises questions of the representation of decimal numbers, the |

495 | representation of uncertainties, questions of unit conversion, and |

496 | other data-modelling imponderables which have in the past, possibly |

497 | surprisingly, generated a great deal of discussion within the |

498 | IVOA without, so far, a generally acceptable resolution. |

499 | |

500 | This Recommendation describes only isolated units, and not arrays, |

501 | records or other combinations of units. Several VO protocols require |

502 | embedding complex objects into result tables, and give string |

503 | serializations for those: geometries in TAP results are the most |

504 | common example. This specification does not cover this situation, |

505 | although we hope that where individual unit strings are required in |

506 | such instances, their syntax will conform to, or include, this |

507 | specification by reference. |

508 | |

509 | In general, this Recommendation is concerned almost exclusively with |

510 | the syntactic question of what is and is not a valid unit string, |

511 | leaving most questions of interpretation or enforcement to a higher layer in an |

512 | application stack. Specifically: |

513 | \begin{itemize} |

514 | \item The specification does not forbid `unknown' units. An |

515 | implementation of this specification should be able to recognise, and |

516 | communicate, that a unit is unknown, but it is not required to reject |

517 | a unit string on the grounds that it is unrecognised. |

518 | |

519 | \item Similarly, although \prettyref{tabx:knownunits} forbids some |

520 | units from having SI prefixes, a VOUnit implementation should not |

521 | itself reject a unit string which incorrectly includes a prefix, but |

522 | should instead just make available the information that this has been |

523 | detected, and that it is deprecated. |

524 | |

525 | \item The list of known units in \prettyref{sec:knownunits} is not |

526 | specific about the precise definitions of the units in question; for |

527 | example, it refers to the `second' without distinguishing between the |

528 | various possible definitions that the second may have. In a |

529 | particular context, a data provider may need to indicate which of a |

530 | number of possible definitions is being used in fact. That said, a |

531 | VOUnits processor must interpret the symbols |

532 | of \prettyref{tabx:knownunits} compatibly with the indicated units: |

533 | a \unit{m} is always a metre of one type or another, and may not be |

534 | interpreted as, for example, a minute. |

535 | |

536 | \item This Recommendation does not specify how an application should |

537 | compare units for equivalence; for example, an application may or may |

538 | not wish to deem \unit{m/s} and \unit{km/s} to be `equivalent'. |

539 | This Recommendation, similarly, does not specify how to compare units |

540 | with scalefactors (cf \prettyref{sec:scaleFactors}). |

541 | \end{itemize} |

542 | |

543 | \section{The VOUnits syntax (normative)\label{sec:proposal}} |

544 | |

545 | The rules for VOUnits are defined in this section. |

546 | Various aspects are addressed: |

547 | \begin{itemize} |

548 | \item how the labels are encoded; |

549 | \item what base symbols are allowed and how they are spelled; |

550 | \item what prefixes are allowed and how they are used; |

551 | \item how symbols are combined. |

552 | \end{itemize} |

553 | A formal grammar summarizing these conventions is given |

554 | in \prettyref{appx:vougrammar}. |

555 | |

556 | The text below is expected to be compatible with the prescriptions |

557 | of the SI standard \citep{si-brochure}, except where noted. |

558 | |

559 | \subsection{String representation and encoding\label{sec:encoding}} |

560 | |

561 | VOUnits may occur in legacy contexts, in which the presence of |

562 | non-ASCII characters may cause considerable technical inconvenience |

563 | (for example FITS cards). There are only a few non-ASCII characters |

564 | which we might wish to include in unit strings (for example \AA\ |

565 | or \micro), and we can find substitutes for these sufficiently easily, that we |

566 | feel there is little real benefit in permitting non-ASCII characters |

567 | in VOUnit strings. |

568 | |

569 | All the VOUnit characters in the specification below are printable ASCII |

570 | characters (that is, in the range hexadecimal 20 to 7E); any |

571 | extensions to this standard \norm{should} be restricted to this same range. |

572 | |

573 | All VOUnit strings \norm{must} be regarded as case-sensitive (the |

574 | strings in the other syntaxes are also case-sensitive). |

575 | |

576 | \subsection{Parsing unit strings -- overview\label{sec:parsing-components}} |

577 | |

578 | The unit strings \unit{unknown} and \unit{UNKNOWN} (that is, in |

579 | all-lowercase or all-uppercase) are reserved for cases |

580 | when the unit is unknown; that is, it is known that there should be a |

581 | unit, but the unit string has been lost or not been specified. These |

582 | strings are not, however, part of the list of known units or the VOUnits grammar, |

583 | and applications \norm{must} check for their presence before unit parsing. |

584 | |

585 | An empty unit string positively indicates that the corresponding |

586 | quantity is dimensionless. Since an empty string does not conform to |

587 | the grammars below, this also \norm{must} be checked for before |

588 | unit-parsing starts. |

589 | |

590 | A \textbf{symbol} within a unit-component \norm{should} be parsed as follows: |

591 | \begin{enumerate} |

592 | \item If it corresponds to a known \textbf{base symbol}, then it |

593 | \norm{must} be recognised as such (for example the \unit{Pa} must be |

594 | parsed as the known Pascal, and never as the peta-year). |

595 | |

596 | \item If the symbol starts with a multiplicative prefix, then this is |

597 | recognised independently of whether the resulting base symbol is a |

598 | known or unknown unit -- thus \unit{Mm} and \unit{Mfurlong} are parsed |

599 | as millions of metres and furlongs, but note that this implies, for |

600 | the sake of consistency, that \unit{furlong} is parsed as the |

601 | femto-`urlong'. |

602 | |

603 | \item In the VOUnits syntax (a significant divergence from the other |

604 | syntaxes), base symbols \norm{may} be put between single |

605 | quotes \unit{'...'} (ASCII character \hex{27}). |

606 | Such symbols \norm{must} be parsed as |

607 | unrecognised unit symbols which are not further examined. |

608 | See \prettyref{sec:quoting} for discussion. |

609 | \end{enumerate} |

610 | |

611 | A library which implements this specification \norm{should} be able to |

612 | distinguish known and unknown units, and identify deviations from the |

613 | restrictions on their use, below. It \norm{should} be able to |

614 | communicate such information to a caller, but it \norm{should not} |

615 | unilaterally reject unit strings which use unknown units or use known |

616 | units in disapproved ways (of course, a higher-level application is |

617 | free to reject unit strings for any reason it pleases). |

618 | |

619 | \subsection{Base units\label{sec:baseUnits}} |

620 | |

621 | There is good agreement for the base symbols across the different schemes |

622 | (see \prettyref{tabx:comparUnitBase}). |

623 | |

624 | The VOUnits base symbols are listed in \prettyref{tab:voubase} |

625 | |

626 | \begin{table}[ht] |

627 | \begin{center} |

628 | \def\arraystretch{1.2} |

629 | \begin{tabular}{|rl|rl|rl|rl|}\hline |

630 | \unit{m}&(metre) &\unit{g}&(gram) &\unit{J}&(joule) &\unit{Wb}&(weber)\\ |

631 | \unit{s}&(second of time) &\unit{rad}&(radian) &\unit{W}&(watt) &\unit{T}&(tesla)\\ |

632 | \unit{A}&(ampere) &\unit{sr}&(steradian) &\unit{C}&(coulomb) &\unit{H}&(henry)\\ |

633 | \unit{K}&(kelvin) &\unit{Hz}&(hertz) &\unit{V}&(volt) &\unit{lm}&(lumen)\\ |

634 | \unit{mol}&(mole) &\unit{N}&(newton) &\unit{S}&(siemens) &\unit{lx}&(lux)\\ |

635 | \unit{cd}&(candela) &\unit{Pa}&(pascal) &\unit{F}&(farad) &\unit{Ohm}&(ohm)\\\hline |

636 | \end{tabular} |

637 | \end{center} |

638 | \caption{\label{tab:voubase}VOUnits base units} |

639 | \end{table} |

640 | |

641 | For masses, the SI unit is \unit{kg}. However, existing specifications |

642 | recommend not using scale factors with \unit{kg}, but attaching them |

643 | only to \unit{g} instead. |

644 | |

645 | Recognising a known unit takes priority over parsing for prefixes. |

646 | Thus the string \unit{Pa} represents the Pascal, and not the |

647 | peta-year, and the string \unit{mol} will always be the mole, and |

648 | never a milli-`ol', for some unknown unit~`ol'. |

649 | |

650 | \subsection{Known units} |

651 | \label{sec:knownunits} |

652 | |

653 | In \prettyref{tabx:knownunits}, we indicate the `known units' for each of the |

654 | described syntaxes, which go beyond the physically motivated set of |

655 | base units. |

656 | There are a few units (namely `\unit{angstrom} or \unit{Angstrom}', |

657 | \unit{pix} or \unit{pixel}', `\unit{ph} or \unit{photon}' and `\unit{a} or \unit{yr}') for |

658 | which there are recognised alternatives in some syntaxes, and in these |

659 | cases `p' marks the preferred one. |

660 | |

661 | \emph{Unrecognised units \norm{should} be accepted by parsers}, |

662 | as long as they are parsed giving preference to the syntaxes and |

663 | prefixes described here. Thus, for example, the |

664 | string \unit{furlong/week} \norm{should} parse successfully (though |

665 | perhaps with suitably prominent warnings) as the femto-`urlong' per |

666 | week. |

667 | |

668 | The Unity library (\prettyref{sec:libraries}) recognises units with |

669 | respect to a subset of the QUDT unit framework~\cite{qudt}, with some |

670 | astronomy-specific additions. This is a particularly comprehensive |

671 | collection of units, and we commend it to the IVOA community as |

672 | a \emph{lingua franca} for this type of work. |

673 | |

674 | Sections \ref{sec:binary} to \ref{sec:other} below, discussing the set |

675 | of known units, are longer than one might expect would be necessary. |

676 | Most of the discussion concerns rather arcane edge-cases, or attempts |

677 | to reconcile the minor deviations between the relevant existing |

678 | standards. In all cases, we have attempted to be as uninnovative and |

679 | unsurprising as possible. |

680 | |

681 | Future versions of this specification may add to the set of known units. |

682 | |

683 | \begin{table} |

684 | \hbox to \textwidth{\hss |

685 | \catcode`\%=11 |

686 | \begin{tabular}{rlcccc|rlcccc} |

687 | \emph{unit}&\emph{description}&\emph{fits}&\emph{ogip}&\emph{cds}&\emph{vou}& |

688 | \emph{unit}&\emph{description}&\emph{fits}&\emph{ogip}&\emph{cds}&\emph{vou}\\ |

689 | \input{known-units.tex} |

690 | \end{tabular} |

691 | \hss} |

692 | \caption[Known units in the various syntaxes] |

693 | {\label{tabx:knownunits}Known units in the various syntaxes. |

694 | In the table, and for a given syntax, a `$\cdot$' indicates that the unit is recognised, |

695 | an~`s' that it is additionally permitted to have SI prefixes, |

696 | a~`b' that binary prefixes will be recognised, |

697 | and a~`d' that it is recognised but deprecated. |

698 | For those units which have alternative symbols for a given unit, |

699 | a~`p' indicates the preferred one.} |

700 | \end{table} |

701 | |

702 | \subsection{Binary units} |

703 | \label{sec:binary} |

704 | |

705 | The symbol~`b' is sometimes used for `bits', but this is the SI symbol |

706 | for `barn', and this Recommendation aligns with the SI standard in |

707 | this respect. Since the same symbol is sometimes used for `bytes', it |

708 | is probably best avoided in any case. |

709 | |

710 | \citet[item 13-9.c]{std:iec80000-13} notes that the term `byte' |

711 | `has been used for numbers of bits other than eight' in the past, but |

712 | that it should now always be used for eight-bit bytes; we recommend |

713 | the same interpretation here. The same source notes the theoretical |

714 | confusion between the symbol \unit{B} for `byte' and for `Bel'. We |

715 | believe it would be perverse in our present context to recommend |

716 | against using `B' for byte, and resolve this here |

717 | in favour of `byte' by mandating that \unit{B} \norm{must} be parsed |

718 | as indicating the `byte', that the \unit{dB} is an |

719 | unprefixable special-case unit (as discussed below), and by |

720 | implication that the `dB' |

721 | \norm{must not} be interpreted as a tenth of a byte.\footnote{We have no |

722 | evidence that this has been a common source of confusion within the |

723 | IVOA, or indeed anywhere else.} |

724 | |

725 | \subsection{Scale factors\label{sec:scaleFactors}} |

726 | |

727 | Units \norm{may} be prefixed by any of the 20 SI scale factors, |

728 | and a subset \norm{may} be prefixed by the eight binary scale factors. |

729 | The SI scale factors -- provided in \prettyref{tab:vouscalefactors}a -- |

730 | are the same as those of \citet{si-brochure}, |

731 | of \citet[\S6.5.4]{std:iso80000-1}, |

732 | and of \citet[Table~5]{pence10} |

733 | (see also \prettyref{tabx:comparUnitScale} for further comparisons). |

734 | %\medskip |

735 | \begin{table} |

736 | \def\arraystretch{1.2} |

737 | \begin{center} |

738 | \def\pfx#1#2{#1, $10^{#2}$} |

739 | \begin{tabular}{|rl|rl|}\hline |

740 | \unit{Y}&\pfx{yotta}{24}& |

741 | \unit{y}&\pfx{yocto}{-24}\\ |

742 | \unit{Z}&\pfx{zetta}{21}& |

743 | \unit{z}&\pfx{zepto}{-21}\\ |

744 | \unit{E}&\pfx{exa}{18}& |

745 | \unit{a}&\pfx{atto}{-18}\\ |

746 | \unit{P}&\pfx{peta}{15}& |

747 | \unit{f}&\pfx{femto}{-15}\\ |

748 | \unit{T}&\pfx{tera}{12}& |

749 | \unit{p}&\pfx{pico}{-12}\\ |

750 | \unit{G}&\pfx{giga}{9}& |

751 | \unit{n}&\pfx{nano}{-9}\\ |

752 | \unit{M}&\pfx{mega}{6}& |

753 | \unit{u}&\pfx{micro}{-6}\\ |

754 | \unit{k}&\pfx{kilo}{3}& |

755 | \unit{m}&\pfx{milli}{-3}\\ |

756 | \unit{h}&\pfx{hecto}{2}& |

757 | \unit{c}&\pfx{centi}{-2}\\ |

758 | \unit{da}&\pfx{deca}{1}& |

759 | \unit{d}&\pfx{deci}{-1}\\ |

760 | \hline |

761 | \end{tabular} |

762 | \qquad |

763 | \def\pfx#1#2{#1, $2^{#2}$} |

764 | \begin{tabular}{|rl|}\hline |

765 | \unit{Ki}&\pfx{kibi}{10}\\ |

766 | \unit{Mi}&\pfx{mebi}{20}\\ |

767 | \unit{Gi}&\pfx{gibi}{30}\\ |

768 | \unit{Ti}&\pfx{tebi}{40}\\ |

769 | \unit{Pi}&\pfx{pebi}{50}\\ |

770 | \unit{Ei}&\pfx{exbi}{60}\\ |

771 | \unit{Zi}&\pfx{zebi}{70}\\ |

772 | \unit{Yi}&\pfx{yobi}{80}\\ |

773 | \hline |

774 | \end{tabular} |

775 | \end{center} |

776 | \caption[VOUnits prefixes]{\label{tab:vouscalefactors}VOUnits prefixes: |

777 | (a, left) decimal prefixes; |

778 | (b, right) binary prefixes} |

779 | \end{table} |

780 | |

781 | Writers of unit strings \norm{must not} use compound prefixes (that is, |

782 | more than one SI prefix). Prefixes are concatenated to the base |

783 | symbol without space, and \norm{must not} be used without a base symbol. |

784 | |

785 | The SI prefixes of \prettyref{tab:vouscalefactors}a \emph{\norm{must} |

786 | always refer to multiples of 1000}, even when applied to binary units |

787 | such as bit or byte; this follows the stipulations (and clarifying note) of |

788 | \citet[\S3.1]{si-brochure}, and the proscription |

789 | of \citet[\S6.5.4]{std:iso80000-1}. |

790 | If data providers wish to use multiples of 1024 (ie, $2^{10}$) for |

791 | units such as bytes or bits, they \norm{must} use the the binary prefixes |

792 | of \citet[\S4]{std:iec80000-13}, reproduced in \prettyref{tab:vouscalefactors}b |

793 | (these originated in \citet{std:ieee1541-2002}). |

794 | |

795 | Note: the~`s' and~`b' annotations in \prettyref{tabx:knownunits} |

796 | are not symmetric: the~`s' annotation indicates that SI |

797 | prefixes are permitted in the given syntax, which means that they are |

798 | also recognised when preceding unknown units (which have no |

799 | restrictions on them); in contrast, binary prefixes are recognised |

800 | exclusively on units with a~`b' annotation, which means that they |

801 | are \emph{not} recognised with unknown units. That is, |

802 | the \unit{Mifurlong} is the mega-\texttt{ifurlong} and |

803 | the \unit{Kifurlong} is the unknown unit \texttt{Kifurlong}. |

804 | |

805 | Note: The letter \unit{u} is used instead of the |

806 | \micro\ symbol to represent a factor of $10^{-6}$, |

807 | following the character set defined in \prettyref{sec:encoding}. |

808 | |

809 | \subsection{Astronomy symbols} |

810 | |

811 | \prettyref{tabx:comparUnitAstro} lists symbols used in astronomy to |

812 | describe times, angles, distances and a few additional quantities. |

813 | The subset of these used by this specification are |

814 | listed in \prettyref{tab:vouadopted}. |

815 | |

816 | \begin{table}[t] |

817 | \begin{center} |

818 | \def\arraystretch{1.2} |

819 | \begin{tabular}{|rl|rl|rl|}\hline |

820 | \unit{min}&(minute of time) &\unit{deg}&(degree of angle) &\unit{Jy}&(jansky) \\ |

821 | \unit{h}&(hour of time) &\unit{arcmin}&(arcminute) &\unit{pc}&(parsec) \\ |

822 | \unit{d}&(day) &\unit{arcsec}&(arcsecond) &\unit{eV}&(electron volt) \\ |

823 | \unit{a}, \unit{yr}&(year) &\unit{mas}&(milliarcsecond) &\unit{AU}&(astronomical\\ |

824 | \unit{u}&(atomic mass) & & & & unit)\\ |

825 | \hline |

826 | \end{tabular} |

827 | \end{center} |

828 | \caption{\label{tab:vouadopted}Additional astronomy symbols} |

829 | \end{table} |

830 | |

831 | |

832 | Minutes, hours, and days of time \norm{must} be represented in VOUnits by the |

833 | symbols \unit{min}, \unit{h} and \unit{d}; however the \unit{cd} is |

834 | the candela, not the centi-day.\footnote{We therefore rule out |

835 | interpreting \units{dB/cd} as 0.9\units{mbit/s}.} The year \norm{may} be expressed by |

836 | \unit{yr} (common practice), |

837 | or \unit{a}, |

838 | as recommended by ISO \citep[Annex C]{std:iso80000-3} |

839 | and the IAU \citep[Table 6]{wilkins89}. |

840 | However peta-year must only be written \unit{Pyr}, |

841 | to avoid the collision with the pascal, \unit{Pa}. |

842 | |

843 | There are no VOUnit symbols for degrees celsius or century. |

844 | Temperatures are expressed in kelvin (\unit{K}), |

845 | and a century corresponds to \unit{ha} or \unit{hyr}. |

846 | Note that \emph{this is a mild deviation from the SI standard}, |

847 | which states that the `hectare', with unit symbol \unit{ha}, |

848 | is a `non-SI unit accepted for use' as a measure of land area~\citep[table~6]{si-brochure}, |

849 | and which acknowledges neither `a' nor `yr' as a symbol for year.\footnote{If |

850 | large telescope arrays feel they must talk of attojoules per |

851 | hectare per century, for some reason, they're going to have to be |

852 | careful how they do so; it's probably best not to even think about atto-Henrys.} |

853 | |

854 | The astronomical unit \norm{should} be expressed in upper-case, \unit{AU}, in |

855 | order to follow legacy practice. It may also be written \unit{au}, in |

856 | the VOUnits syntax, on the ground that it would be perverse to prefer |

857 | the atto-atomic-mass to the astronomical unit, in an astronomical unit |

858 | specification. |

859 | \emph{This is a deviation} from the SI recommendation of |

860 | `ua'~\citep[Table 7]{si-brochure}, but conformant with the IAU's |

861 | recommendation of `au'~\citep{iau12}.% |

862 | \footnote{If you feel a burning desire to write about micro-years or |

863 | atto atomic-mass, this document is not the place you need to look |

864 | for help.} |

865 | |

866 | Because of the near-degeneracy between the decimal prefixes \texttt{d} |

867 | and \texttt{da}, there is an ambiguity when parsing the |

868 | unit \unit{dadu} -- is this the deka-\unit{du} or the deci-\unit{adu}? |

869 | The only cases where this ambiguity is possible are those involving |

870 | known units starting with~`a' (\texttt{da} is unambiguously a |

871 | deci-year for the same reason that \texttt{d} is unambiguously a day, |

872 | because the presence of a bare unit prefix would be ungrammatical). |

873 | We can think of no cases where the prefix is useful enough that |

874 | resolving the ambiguity is worth the specification effort, so we deem |

875 | the parse of \texttt{da.*} to be \textbf{unspecified}. %\footnote{The |

876 | %% Working Group was informed at the specification stage that the |

877 | %% dekagramme is still in use in parts of Europe for certain categories |

878 | %% of delicious comestibles; this was deemed insufficient to save the |

879 | %% prefix, since grocery shopping is not a core VOUnit use-case.} |

880 | In consequence, data providers \norm{must not} use the \texttt{da} |

881 | prefix, and \norm{should not} use the \texttt d prefix (as noted |

882 | in \prettyref{sec:other}, the decibel, \unit{dB} is listed as a `known |

883 | unit', as opposed to a deci-Bel). |

884 | |

885 | \subsection{Other symbols, and other remarks} |

886 | \label{sec:other} |

887 | |

888 | \prettyref{tabx:comparUnitDeprecated} corresponds to Table~7 in the IAU document, and the IAU strongly |

889 | recommends no longer using these units. |

890 | Data producers are strongly advised to prefer the equivalent notation using symbols and prefixes listed in |

891 | Tables~\ref{tabx:comparUnitBase}, \ref{tabx:comparUnitScale} and \ref{tabx:comparUnitAstro}. |

892 | |

893 | However, in order to be compatible with legacy metadata, VOUnit |

894 | parsers \norm{should} be able to interpret symbols \unit{angstrom} |

895 | or \unit{Angstrom} (for \aa{}ngstr\"om), \unit{barn}, \unit{erg} |

896 | and \unit{G} (for gauss). |

897 | |

898 | \prettyref{tabx:comparUnitOther} compares other miscellaneous symbols. |

899 | The last set of VOUnits symbols, derived from this comparison, is in |

900 | \prettyref{tab:voumisc} |

901 | |

902 | %\medskip |

903 | \begin{table}[ht] |

904 | \begin{center} |

905 | \def\arraystretch{1.2} |

906 | \begin{tabular}{|l|l|L{3cm}|l|}\hline |

907 | \unit{mag} (magnitude) &\unit{pix} or \unit{pixel} &\unit{solMass} (solar mass) &\unit{R} (rayleigh) \\ |

908 | \unit{Ry} (rydberg) &\unit{voxel} &\unit{solLum} (solar luminosity)&\unit{chan} (channel) \\ |

909 | \unit{lyr} (light year) &\unit{bit} &\unit{solRad} (solar radius) &\unit{bin} \\ |

910 | \unit{ct} or \unit{count} &\unit{byte} (8 bits) &\unit{Sun} (relative to the Sun, e.g. abundances)&\unit{beam} \\ |

911 | \unit{ph} or \unit{photon} &\unit{adu} &\unit{D} (Debye) &\unit{unknown} (\prettyref{sec:parsing-components})\\\hline |

912 | \end{tabular} |

913 | \end{center} |

914 | \caption[Miscellaneous VOUnits] |

915 | {\label{tab:voumisc}Miscellaneous VOUnits.} |

916 | \end{table} |

917 | |

918 | A few symbols which might theoretically be ambiguous are listed in |

919 | \prettyref{tab:ambiguous}, |

920 | with their consensus VOUnit interpretation. |

921 | |

922 | \begin{table}[bht] |

923 | \begin{center} |

924 | \begin{tabular}{|r|l|l|} |

925 | \hline |

926 | \textbf{VOUnit}&\textbf{Correct interpretation}&\textbf{Incorrect}\\ |

927 | \unit{Pa}&pascal&peta-year\\ |

928 | \unit{ha}&hecto-year&hectare\\ |

929 | \unit{cd}&candela¢i-day\\ |

930 | \unit{dB}&decibel&deci-byte\\ |

931 | \unit{B}&byte&bel\\ |

932 | \unit{au}&astronomical unit&atto-atomic-mass\\ |

933 | \hline |

934 | \end{tabular} |

935 | \end{center} |

936 | \caption{\label{tab:ambiguous}Possibly ambiguous units} |

937 | \end{table} |

938 | |

939 | It can be noted that some of the units listed in \prettyref{tabx:comparUnitOther} are |

940 | questionable. They arise in fact from a need to describe quantities, when the only |

941 | piece of metadata available is the unit label. Count, photon, pixel, bin, voxel, bit, |

942 | byte are concepts, just as apple or banana. The associated quantities could be fully |

943 | described with a UCD, a value and a void unit label. |

944 | It is possible to count a number of bananas, or to express a distance measured in |

945 | bananas, but this does not make a banana a reference unit. |

946 | |

947 | The FITS document provides the most general description of all the compared schemes, |

948 | and VOUnits adopts similar definitions, for the sake of legacy metadata. |

949 | The VOUnits symbol for magnitudes is \unit{mag}. |

950 | %% The symbol \unit{Sun} is used to express ratios relative to solar |

951 | %% values, for example abundances or metallicities. |

952 | Note that all symbols like \unit{count}, \unit{photon}, \unit{pixel} |

953 | are always used in lower case and singular form. |

954 | |

955 | The decibel, \unit{dB} is listed in the SI specification |

956 | \citep[Table 8]{si-brochure} amongst a set of `other non-SI units', |

957 | and mentioned by \citet[\S0.5]{std:iso80000-3} in a `Remark on |

958 | logarithmic quantities'. The \unit{dB} is included in the list of |

959 | `known units' of \prettyref{tabx:knownunits} and so \norm{must} be parsed as a |

960 | unit by itself -- as opposed to being parsed as the prefix~`d' |

961 | qualifying the unit `Bel' -- and both the decibel and Bel \norm{must |

962 | not} be used with other scaling prefixes. |

963 | |

964 | If there is no unit associated with a quantity (for example a quantity |

965 | that is a character string, or unitless), data providers \norm{should} |

966 | indicate this with an empty string rather than blanks or dashes. |

967 | |

968 | |

969 | |

970 | \subsection{Mathematical expressions containing symbols} |

971 | |

972 | \prettyref{tabx:comparUnitCombine} summarizes how, |

973 | in the various existing syntaxes, mathematical operations may |

974 | be applied on unit symbols for exponentiation, multiplication, |

975 | division, and other computations. |

976 | |

977 | The combination rules are where the largest discrepancies between the |

978 | different schemes appear. The FITS document discusses the problem of |

979 | trying to best accommodate the existing schemes |

980 | \cite[\S4.3.1]{pence10}, without really resolving the problem. |

981 | \label{sec:fitsquote} |

982 | This and other ambiguities are discussed in the detailed syntaxes of \prettyref{appx:grammar}. |

983 | |

984 | VOUnits follow a subset of the FITS rules, |

985 | as summarized in \prettyref{tab:VOUnitCombine}. |

986 | |

987 | \begin{table}%[ht] |

988 | \begin{center} |

989 | \def\arraystretch{1.2} |

990 | \begin{tabular}{|r|l|} |

991 | \hline |

992 | %\unit{str1 str2} & Multiplication (discouraged -- see text)\\ |

993 | %\unit{str1*str2} & Multiplication \\ |

994 | \unit{str1.str2} & Multiplication \\ |

995 | \unit{str1/str2} & Division \\ |

996 | \unit{str1**expr} & Raised to the power expr \\ |

997 | %\unit{str1\^{}expr} & Raised to the power expr \\ |

998 | %\unit{str1expr} & Raised to the power expr \\ |

999 | \unit{fn(str1)} & Function applied to a unit string\\ |

1000 | %% \unit{log(str1)} & Common Logarithm (to base 10) \\ |

1001 | %% \unit{ln(str1)} & Natural Logarithm \\ |

1002 | %% \unit{exp(str1)} & Exponential (e$^\mathrm{str1}$) \\ |

1003 | %% \unit{sqrt(str1)} & Square root \\ |

1004 | \hline |

1005 | \end{tabular} |

1006 | \end{center} |

1007 | \caption[Combination rules and mathematical expressions for VOUnits] |

1008 | {\label{tab:VOUnitCombine}Combination rules and mathematical expressions for VOUnits. |

1009 | See \prettyref{appx:vougrammar} for the complete grammar.} |

1010 | \end{table} |

1011 | |

1012 | As illustrated in \prettyref{tab:VOUnitCombine}, units may include a |

1013 | limited set of functional dependencies on other units. The set of |

1014 | functions recognised within VOUnits is the same as the set recommended |

1015 | by FITS, and listed in \prettyref{tab:functions}. As with |

1016 | unrecognised units, |

1017 | \emph{parsers \norm{should} accept unrecognised functions without error}, |

1018 | even if they deprecate them at some later processing stage. As |

1019 | described in \prettyref{sec:quoting}, functions may be quoted to |

1020 | indicate that they \norm{must not} be interpreted as in this table. |

1021 | \begin{table}%[ht] |

1022 | \begin{center} |

1023 | \def\arraystretch{1.2} |

1024 | \begin{tabular}{|r|l|} |

1025 | \hline |

1026 | \unit{log(str1)} & Common Logarithm (to base 10) \\ |

1027 | \unit{ln(str1)} & Natural Logarithm \\ |

1028 | \unit{exp(str1)} & Exponential (e$^{\mathrm{str1}}$) \\ |

1029 | \unit{sqrt(str1)} & Square root \\ |

1030 | \hline |

1031 | \end{tabular} |

1032 | \end{center} |

1033 | \caption{\label{tab:functions}Functions of units.} |

1034 | \end{table} |

1035 | Note that since functions such as `log' require dimensionless |

1036 | arguments, when a quantity~$x$ is (for example) represented by numbers |

1037 | labelled with units \unit{log(Hz)}, that indicates that the numbers |

1038 | are related to~$x$ by the function |

1039 | $\log\bigl(x/(\mathrm{1\,Hz})\bigr)$. |

1040 | |

1041 | %\subsection{Quantities} |

1042 | %\label{sec:quantities} |

1043 | % |

1044 | %A quantity, \eg a measurement of a physical value like the speed of |

1045 | %light, has a value (2.998 10+5), a ucd (phys.veloc), units (km.s-1) |

1046 | %and is coded using a numerical type (real). |

1047 | % |

1048 | %Some quantities are also reused as units. Many units are expressed, or |

1049 | %converted, in terms of physical constants such as the speed of light, |

1050 | %\begin{itemize} |

1051 | % \item \texttt{$c=$~2.998 10+8~m.s-1;} |

1052 | % \item Boltzman's constant, \texttt{$K_{\mathrm{B}}=$1.38065~10-23} |

1053 | % \item \texttt{1 AU $=$ 1.499 10+11 m.} |

1054 | %\end{itemize} |

1055 | % |

1056 | % Many of these are used as units in their own right, \eg velocities may be expressed as a |

1057 | %fraction or multiple of c, but c is also used to convert between |

1058 | %wavelength and frequency, etc. These are combinations of units with |

1059 | %scaling factors applied, and so can be treated in the same way as any |

1060 | %other compound unit \eg the \texttt{Jy} (\texttt{10-26 W.m-2.Hz-1}) . |

1061 | % |

1062 | %We need to ensure that we are consistent with the IVOA Quantity model, |

1063 | %where appropriate. |

1064 | |

1065 | \subsection{The numerical scale-factor} |

1066 | \label{sec:scalefactor} |

1067 | |

1068 | A VOUnits unit string \norm{may} start with a numerical scale-factor |

1069 | to indicate a derived unit. For example, the inch might appear as the |

1070 | unit of \unit{25.4mm}. See \prettyref{appx:vougrammar} for the syntax |

1071 | of the VOUnits numerical string. |

1072 | |

1073 | A data provider may choose to use such a unit in order to represent a |

1074 | unit which is not listed as one of the VOUnit `known units'. For |

1075 | example, given a VOTable column of masses relative to Jupiter's mass, |

1076 | one might label it as having units of \unit{1.898E27kg} rather than |

1077 | \unit{'jupiterMass'} (an `unknown unit'). |

1078 | The \emph{advantage} of doing so is that the data consumer can |

1079 | translate the column data into well-known physical units without further |

1080 | information, and the data source is thus self-contained. |

1081 | The \emph{disadvantage} of doing so is (i) that the intention might be |

1082 | obscured (this is a type of provenance information); |

1083 | and (ii) that the measurements may be relative to the actual |

1084 | jupiter mass rather than merely expressed in those terms, so that they |

1085 | should change if the actual mass were to be refined as a result of |

1086 | a recalibration. The data provider retains the choice of which |

1087 | strategy to take. |

1088 | |

1089 | This Recommendation does not prescribe how many significant figures |

1090 | should be in a scale-factor, nor whether it should be interpreted as |

1091 | single- or double-precision, nor how units with scale-factors should |

1092 | be compared for equality. All of these are implementation choices for |

1093 | the software which is handling the units. |

1094 | |

1095 | \subsection{Quoting unknown units\label{sec:quoting}} |

1096 | |

1097 | The VOUnits syntax permits the use of `unknown units' (that is, units not listed |

1098 | in \prettyref{tabx:knownunits}). There need be no syntactic indication that |

1099 | a unit is `unknown'; this is convenient, but creates some minor |

1100 | ambiguities. |

1101 | |

1102 | In the VOUnits syntax, base symbols may be put between single |

1103 | quotes \unit{'...'} (a significant divergence from the other |

1104 | syntaxes). Such symbols \norm{must} be parsed as |

1105 | unrecognised unit symbols which are not further examined. |

1106 | |

1107 | This has two consequences. Firstly, it means that an unknown symbol |

1108 | which happens to start with an SI prefix is not broken |

1109 | into a base symbol and prefix: thus \unit{'furlong'} is parsed as |

1110 | expected, whereas \unit{furlong} would be the femto-`urlong'. |

1111 | Secondly, a quoted symbol is parsed as an unrecognised unit, even if |

1112 | it would otherwise indicate a known unit; thus the unit \unit{'m'} is |

1113 | parsed as an unknown unit `m', and does not indicate the metre. |

1114 | |

1115 | This facility means that a data provider may label data with units of, |

1116 | for example, \unit{'martianDay'} or the \unit{'B'}, while still |

1117 | remaining conformant with the VOUnits Recommendation, and without |

1118 | risking the leading \texttt{m} being misparsed as an SI prefix, or the |

1119 | `B' being misparsed as a `byte'. |

1120 | |

1121 | Quoted units can take prefixes (they are `unknown units', so there are |

1122 | no restrictions on their usage), so that \unit{m'furlong'} is a |

1123 | milli-furlong, and \unit{m'm'} is a milli-`m'. The only permissible |

1124 | prefixes are those of \prettyref{tab:vouscalefactors}. |

1125 | |

1126 | \subsection{General rationale (informative)} |

1127 | \label{sec:rationale} |

1128 | |

1129 | \subsubsection{Deviations from other syntaxes} |

1130 | \label{sec:deviations} |

1131 | |

1132 | The aspiration of the VOUnits work was that the syntax should be as |

1133 | much as possible in the intersection of the various pre-existing |

1134 | syntaxes, so that a unit string which conformed to the VOUnits syntax |

1135 | would be parseable in each of those other syntaxes. This has not been |

1136 | possible in fact, for four reasons. |

1137 | \begin{enumerate} |

1138 | \item The CDS syntax permits only a dot to indicate a product, and the |

1139 | OGIP syntax only a star, while FITS permits both. The VOUnits syntax |

1140 | uses a dot, so that non-trivial OGIP unit strings are therefore |

1141 | necessarily invalid VOUnits strings in this one respect. |

1142 | \item The VOUnits syntax permits (but does not require) a scale-factor |

1143 | at the beginning of the string, which is not a power of 10. Only the |

1144 | CDS syntax permits a similar factor. |

1145 | See \prettyref{sec:scalefactor} for discussion. |

1146 | \item Only the VOUnits syntax permits quoted units. |

1147 | \item Only the VOUnits syntax permits the use of the binary prefixes |

1148 | of \prettyref{tab:vouscalefactors}. |

1149 | \end{enumerate} |

1150 | The first is both unavoidable in specification, and largely |

1151 | unavoidable in practice; the others are VOUnit extensions which a data |

1152 | provider may of course decline to take advantage of. |

1153 | |

1154 | The scalefactor and quoted-units extensions are intended to support |

1155 | the case where the data provider wishes to distribute data including a |

1156 | unit which is `unknown', but which the provider nonetheless feels is |

1157 | necessary or useful; this should be done only after weighing the |

1158 | considerations of Sects.~\ref{sec:scalefactor} and~\ref{sec:quoting}. |

1159 | For the sake of consistency, and in order to allow |

1160 | constructions such as \texttt{M'jupiterMass'}, the grammar permits quoted |

1161 | units to take scaling prefixes; this is not often likely to be a good idea. |

1162 | |

1163 | A VOUnits string which avoids the three extensions above will be |

1164 | parseable, with the same meaning, in the CDS and FITS syntaxes, and |

1165 | will be parseable by an OGIP parser if dots are replaced by stars. |

1166 | |

1167 | \subsubsection{Restrictions to ASCII} |

1168 | |

1169 | As described above, VOUnit unit strings are restricted to printable |

1170 | ASCII characters. While the two most prominent uses of these strings |

1171 | will be within VOTable attributes (\verb|unit="..."|) and in XML |

1172 | serialisations of a data model (for example \verb|<unit>...</unit>|), |

1173 | we also intend them to be usable within FITS files and within |

1174 | databases. Neither of the latter two contexts is necessarily |

1175 | unicode-friendly, so permitting non-ASCII characters in a unit string |

1176 | (such as \AA\ or $\mu$) is more likely than not to cause trouble. |

1177 | |

1178 | Similarly, forbidding spaces within VOUnit strings removes one (minor) |

1179 | complication when recognising them in use. |

1180 | |

1181 | \subsubsection{Other units, and unit-like expressions} |

1182 | |

1183 | As noted above, the VOUnits syntax does not include structures such as |

1184 | arrays or tuples of numbers. We include in this category sexagesimal |

1185 | coordinates, calendar dates (in ISO-8601 form or otherwise), |

1186 | RA-Dec pairs, and other structured quantities serialised as strings. |

1187 | Each of these is well-specified elsewhere, and would require a |

1188 | separate parser if encountered in data. |

1189 | |

1190 | Existing VO standards already recommend that coordinates be expressed |

1191 | in decimal degrees. |

1192 | |

1193 | Quantities like the Modified Julian Date (MJD) are also not recognized |

1194 | VOUnits. As described in \prettyref{sec:notations}, the quantity MJD |

1195 | can be seen as a concept (described by the appropriate UCD or utype), |

1196 | and the corresponding value will most likely be expressed in days, so |

1197 | the VOUnit will be \unit{d}. There is no need to overload VOUnits to |

1198 | incorporate the description of concepts themselves. |

1199 | |

1200 | The notion of unit conversion and quantity manipulation is discussed in |

1201 | \prettyref{sec:conversion}. |

1202 | |

1203 | \section{Use cases and applications (informative)\label{sec:useCase}} |

1204 | |

1205 | \subsection{Unit parsing} |

1206 | |

1207 | The rules defined in \prettyref{sec:proposal} allow us to build VOUnit parsers. |

1208 | Several services can be built on top of a VOUnit parser: |

1209 | |

1210 | \begin{enumerate} |

1211 | \item Validation. A service checking that a VOUnit is well written. The output |

1212 | of such a service can have different levels: fully valid unit; valid syntax, but |

1213 | not the preferred one (\eg use of deprecated symbols); parsing error. |

1214 | \item Explanation. A service returning a plain-text explanation of the unit label. |

1215 | \item Typesetting. A service returning an equivalent of the unit label suitable for inclusion in |

1216 | a \LaTeX\ or HTML document. |

1217 | \item Dimensional equation. As described by \citet{osuna05}, VOUnits can be translated |

1218 | into a dimensional equation, allowing to build up conversions methods from one string |

1219 | representation to another one (see also \prettyref{sec:conversion}). |

1220 | \end{enumerate} |

1221 | |

1222 | \subsection{Libraries\label{sec:libraries}} |

1223 | |

1224 | There are a few existing libraries able to interpret unit labels. |

1225 | In all cases, |

1226 | some software effort is required if they are to be used in translating |

1227 | between data provider unit labels, and those to be adopted by |

1228 | the IVOA for internal use. |

1229 | |

1230 | One of the most widely-used specialised |

1231 | astronomical libraries is AST which includes a unit conversion |

1232 | facility attached to astronomical coordinate systems \citep{berry12}. |

1233 | |

1234 | Another library has been developed at |

1235 | CDS\footnote{\url{http://cds.u-strasbg.fr/resources/doku.php?id=units}}, |

1236 | and can be tested online\footnote{\url{http://cdsweb.u-strasbg.fr/cgi-bin/Unit}}. This library covers all |

1237 | the symbols and notations defined in the standard for astronomical catalogues \citep[\S3.2]{cds00}, as well as |

1238 | additional symbols and notations. |

1239 | |

1240 | The Unity library\footnote{\url{https://bitbucket.org/nxg/unity}} is a new |

1241 | standalone library intended to parse unit strings in the VOUnits, |

1242 | OGIP, StdCats and FITS syntaxes; it was used as a vehicle for |

1243 | developing and testing the grammars and |

1244 | ideas for this present document. It provides yacc-style grammars for |

1245 | the various syntaxes, as well as implementing them in parsers written |

1246 | in Java and~C. The grammars of \prettyref{appx:grammar} are extracted |

1247 | from the Unity distribution. |

1248 | |

1249 | \subsection{Unit conversion and quantity transformation\label{sec:conversion}} |

1250 | |

1251 | Unit conversion is the simple task of converting a quantity expressed |

1252 | in a given unit into a different unit, while the concept remains the |

1253 | same. For example, such a library might be able to convert a distance |

1254 | in \unit{pc} into a distance in \unit{AU} or \unit{km}, or convert a |

1255 | flux from \unit{mJy} to \unit{W.m-2.Hz-1}. This is rather easy with |

1256 | existing libraries, using dimensional analysis or SI units as a |

1257 | reference. |

1258 | |

1259 | Quantity transformation consists in deriving a new quantity from one or several original |

1260 | quantities. It is more complex, because it requires having a precise model |

1261 | (a simple equation in simple cases) for computing the transformation. The model involves |

1262 | quantities, each described with a UCD or utype, value and VOUnit. Some of the quantities |

1263 | involved might be physical constants (\eg Boltzmann's constant $k_{\mathrm{B}}$). |

1264 | |

1265 | Examples of such transformations can be: |

1266 | \begin{itemize} |

1267 | \item linear unit conversion: a distance is measured in \unit{pixel} in an image, and needs to be transformed in |

1268 | the corresponding angular separation in \unit{arcsec}. This can be done if the quantity representing the pixel |

1269 | scale is given, with its value and a compatible unit like \unit{deg/pixel}. |

1270 | \item converting a photon wavelength in the corresponding photon energy or frequency. |

1271 | \item deriving the flux for a given photon emission rate (in \units* W) from Planck's |

1272 | constant ($6.63 \times 10^{-34}\units{J~s}$), the radiation frequency (in \units{GHz}), and the |

1273 | number of photons emitted per second. |

1274 | \item transforming a magnitude into a flux, as needed for SED building. |

1275 | \end{itemize} |

1276 | |

1277 | VOUnits can help in quantity transformation if all quantities are qualified with proper VOUnits. |

1278 | |

1279 | \subsection{Query languages} |

1280 | |

1281 | Including VOUnits in queries is not an easy task. Some guidelines were defined in the |

1282 | reflexion on ADQL. |

1283 | |

1284 | \begin{enumerate} |

1285 | \item All data providers should be encouraged to supply units for each column |

1286 | of a table. Columns should also have associated UCDs, so that quantities can be |

1287 | properly identified. |

1288 | |

1289 | %In most published tables in Astronomical journals and Vizier server as well, unitless values are |

1290 | %represented by "---". This could be adopted for the VO convention as well. |

1291 | \item The IVOA needs to provide a parser to relate the native units to the standard IVOA |

1292 | labels (in this context, the `native units' are the units of the |

1293 | underlying database table or metadata). |

1294 | |

1295 | \item |

1296 | The default response to a query which does not specify units, will be |

1297 | in the native units of the table. |

1298 | %\emph{We recommand that the output units will be labelled using the IVOA standard label ???} |

1299 | |

1300 | \item |

1301 | Where queries involve combining or otherwise operating on the content |

1302 | of columns to produce an output column with modified units, we can |

1303 | provide libraries and a parser to assist in assigning and checking a |

1304 | new unit, and attach this to the returned values via the SQL CAST |

1305 | operator. |

1306 | This is implemented already in database related applications such as |

1307 | Saada\footnote{\url{http://saada.unistra.fr/}}, for instance. |

1308 | If any column used in responding to a query lacks a necessary unit, the output |

1309 | involving that column will be unitless. |

1310 | |

1311 | \item |

1312 | If the user wants to change the output units with respect to the table |

1313 | units, this could be done by specifying the units in the initial |

1314 | SELECT statement. There are several issues to consider: |

1315 | \begin{enumerate} |

1316 | \item Does the user also need to include the conversion expression, or does the unit |

1317 | parser take care of that? |

1318 | \item Can the user use this to assign units (based on prior knowledge) to output from a |

1319 | column lacking a unit? |

1320 | \end{enumerate} |

1321 | \end{enumerate} |

1322 | |

1323 | |

1324 | \subsection{Broader use in the VO} |

1325 | |

1326 | \begin{figure}[thb] |

1327 | % Requires \usepackage{graphicx} |

1328 | \includegraphics[width=\textwidth]{./units2.jpg} |

1329 | \caption{This shows the levels at which conversions might be done. |

1330 | \textcolor{blue}{Plain arrows}: At the point where an astronomer or |

1331 | data provider submits input to the VO, we should provide tools to |

1332 | ensure that units are labeled consistently according to VOUnits. |

1333 | This implies that a units parsing step is included prior to metadata ingestion into the VO. |

1334 | \brown{Dashed arrows}: Conversions required to supply results to |

1335 | the user in specified or user-prefered units \eg \texttt{J.s-1} to \texttt{W}, are done where and when they are required.} |

1336 | \label{fig:units2} |

1337 | \end{figure} |

1338 | |

1339 | Different VO entities require and consume metadata with units attached like registries, |

1340 | applications and interoperate via protocols. \prettyref{fig:units2} illustrates the places where the IVOA |

1341 | could intervene to ensure consistent use of units. |

1342 | |

1343 | |

1344 | \clearpage |

1345 | % Put a \clearpage before each appendix -- these are fairly distinct, |

1346 | % and users may well want to refer to individual ones, so make it easy |

1347 | % to jump between them. |

1348 | |

1349 | \appendix |

1350 | |

1351 | \section{Current use of units (informative)} |

1352 | \label{appx:current} |

1353 | |

1354 | Many other projects have already produced lists of preferred |

1355 | representations of units. Those most commonly used in |

1356 | astronomy are described in this section. |

1357 | |

1358 | The four first schemes described below are used as references for the |

1359 | comparison tables presented later in this document. |

1360 | |

1361 | \subsection{IAU 1989\label{appx:IAU}} |

1362 | |

1363 | In the section 5.1 of its Style Manual, the IAU gives a set |

1364 | of recommendations for representing units in publications \citep{wilkins89}. This document |

1365 | therefore provides useful reference guidelines, but is not directly |

1366 | applicable to VOUnits because the recommendations are more intended |

1367 | for correct typesetting in journals than for standardized metadata exchange. |

1368 | The IAU style will be summarized in the second column of the comparison tables. |

1369 | |

1370 | \subsection{OGIP 1993} |

1371 | |

1372 | NASA has defined a list of character strings specifying the basic physical units |

1373 | used within OGIP (Office of Guest Investigator Programs) FITS files \citep{george95}. Rules and guidelines on the construction |

1374 | of compound units are also outlined. |

1375 | |

1376 | HEASARC datasets follow these conventions, presented in the third column |

1377 | of the comparison tables. |

1378 | |

1379 | \subsection{Standards for astronomical catalogues} |

1380 | |

1381 | The conventions adopted at CDS are summarized in the Standards for Astronomical |

1382 | Catalogues, Version 2.0 \citep[\S3.2]{cds00}. They are presented in the fourth column |

1383 | of the comparison tables. |

1384 | |

1385 | \subsection{FITS 2010} |

1386 | |

1387 | In Section 4.3 of the reference FITS paper, \citet{pence10} describe how unit strings are to be expressed in |

1388 | FITS files. The recommendations are presented in the fifth column |

1389 | of the comparison tables. |

1390 | |

1391 | \subsection{Other usages} |

1392 | |

1393 | \begin{itemize} |

1394 | \item |

1395 | \violet{\footnotesize{\url{http://arxiv.org/pdf/astro-ph/0511616}}}\\ |

1396 | Dimensional Analysis applied to spectrum handling in VO context~\citep{osuna05} |

1397 | offers a mathematical framework to guess and recompute |

1398 | SI units for any quantity in astronomy. |

1399 | \item |

1400 | \violet{\footnotesize{\url{http://www.mel.nist.gov/msid/sima/07_ndml.htm}}}\\ |

1401 | NIST (National Institute of Standards \& Technology) project |

1402 | UnitsXML builds up an XML representation of units at the granularity |

1403 | level of a simple symbol string |

1404 | \item \violet{\footnotesize{\url{https://jsr-275.dev.java.net/}}}\\ |

1405 | JAVA JSR-275 specifies Java packages for the programmatic |

1406 | handling of physical quantities and their expression as numbers of |

1407 | units. |

1408 | \item \texttt{aips++} |

1409 | \violet{\footnotesize{\url{http://aips2.nrao.edu/docs/aips++.html}}} and\\ |

1410 | \texttt{casacore} \violet{\footnotesize{\url{http://code.google.com/p/casacore/}}}\\ contain modules handling units and |

1411 | quantities with high precision. The packages are mainly in use for |

1412 | radio astronomy but are designed to be modular and adaptable. (NB |

1413 | contrary to the statement on the casacore link, aips++ is still very much in |

1414 | use as the toolkit behind the {\sc casa} package.) |

1415 | %\item IAU SOFA |

1416 | %\violet{\footnotesize{\url{http://www.iau-sofa.rl.ac.uk/}}} and\\ |

1417 | %USNO NOVAS |

1418 | %\violet{\footnotesize{\url{http://aa.usno.navy.mil/software/novas/novas_info.php}}}\\ |

1419 | %implement the IAU 2000 recommendations. |

1420 | \end{itemize} |

1421 | |

1422 | \clearpage |

1423 | \section{History: Comparison of syntaxes (informative)\label{appx:comparisons}} |

1424 | %\section{History: Comparison of unit-string schemes (informative)\label{appx:comparisons}} |

1425 | |

1426 | In this section, we compare the existing unit-string syntaxes and the |

1427 | proposed standard. We have included these comparisons for |

1428 | more-or-less historical reasons, to try to highlight the variations |

1429 | between syntaxes, and so illustrate the motivation motivation for this |

1430 | Recommendation, namely that the current practice, though it may at |

1431 | first appear to have rough consensus, is disturbingly heterogeneous. |

1432 | |

1433 | \begin{table}[ht] |

1434 | \begin{tabular}{|L{0.2\linewidth}|p{0.12\linewidth}|p{0.12\linewidth}|p{0.12\linewidth}|p{0.12\linewidth}|p{0.12\linewidth}|} |

1435 | \hline |

1436 | & IAU & OGIP & StdCats & FITS & VOUnits\\\hline |

1437 | Units are strings of chars & & YES & & YES & YES\\\hline |

1438 | Case sensitive & YES & YES & YES & YES & YES\\\hline |

1439 | Character set & & & No spaces & ASCII text & ASCII printable\\\hline |

1440 | \end{tabular} |

1441 | \caption{Comparison of string representation and encoding.} |

1442 | \label{tabx:comparUnitEncoding} |

1443 | \end{table} |

1444 | |

1445 | \begin{table}[ht] |

1446 | \begin{tabular}{|L{0.2\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|} |

1447 | \hline |

1448 | & IAU & OGIP & StdCats & FITS & VOUnits\\\hline |

1449 | %\multicolumn{6}{|c|}{Base units} \\\hline |

1450 | The 6+1 base & \multicolumn{4}{c|}{\unit{m, s, A, K, mol, cd}} & idem \\ |

1451 | \cline{2-6} |

1452 | SI units (use \unit{s}, not sec, for seconds) & (1) & \unit{kg} & \unit{g} & \unit{kg}, but \unit{g} allowed & \unit{g}\\ |

1453 | \hline |

1454 | Dimensionless & \multicolumn{4}{c|}{\unit{rad, sr}} & idem \\ |

1455 | \cline{2-5} |

1456 | planar and solid angle& & & & (2) & \\\hline |

1457 | Derived units & \multicolumn{4}{c|}{\unit{Hz, N, Pa, J, W, C, V,}} & \\ |

1458 | with symbols & \multicolumn{4}{c|}{\unit{S, F, Wb, T, H, lm, lx}} & idem \\ |

1459 | & \unit{$\Omega$} & \unit{ohm} & \unit{Ohm} & \unit{Ohm} & \unit{Ohm}\\\hline |

1460 | \end{tabular} |

1461 | \caption[Comparison of base units]{Comparison of base units. Notes: (1) unit is \unit{kg}, but use \unit{g} with prefixes; (2) \unit{deg} preferred for decimal angles} |

1462 | \label{tabx:comparUnitBase} |

1463 | \end{table} |

1464 | |

1465 | %\subsection{Scale factors} |

1466 | |

1467 | \begin{table}[ht] |

1468 | \begin{tabular}{|L{0.2\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|} |

1469 | \hline |

1470 | & IAU & OGIP & StdCats sec.~3.2.3 & FITS & VOUnits\\\hline |

1471 | Scale factors, & \multicolumn{4}{c|}{\unit{d, c, m, n, p, f, a}} & idem \\ |

1472 | (multiple) & \multicolumn{4}{c|}{\unit{da, h, k, M, G, T, P, E}} & \\ |

1473 | prefixes & \unit{\micro} & \multicolumn{3}{c|}{\unit{u}} & \unit{u}\\ |

1474 | & & \multicolumn{3}{c|}{\unit{z, y, Z, Y}} & \unit{z, y, Z, Y}\\\hline |

1475 | Prefix--symbol concatenation & (1) & (2) & no space & no space (implicit) & no space\\\hline |

1476 | Prefix-able symbols & Not \unit{kg}: use \unit{g} & (3) & all & all & (4) \\\hline |

1477 | Use compound prefixes & should not & should never & must not & must not & must not\\\hline |

1478 | \end{tabular} |

1479 | \caption[Comparison of scale factors]{Comparison of scale factors. |

1480 | Notes: (1) no space, regarded as single symbol; |

1481 | (2)~no space, regarded as a single unit string; |

1482 | (3)~all units above, and \unit{eV, pc, Jy, Crab} Only \unit{mCrab} allowed; |

1483 | (4)~all (except \unit{P} for \unit{a}).} |

1484 | \label{tabx:comparUnitScale} |

1485 | \end{table} |

1486 | |

1487 | \begin{table}[ht] |

1488 | \begin{tabular}{|L{0.2\linewidth}|L{0.15\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.15\linewidth}|} |

1489 | \hline |

1490 | & IAU & OGIP & StdCats & FITS & VOUnits\\\hline |

1491 | minute & \unit{min, $^\mathrm{m}$} & \unit{min} & \unit{min} & \unit{min} & \unit{min}\\\hline |

1492 | hour & \unit{h, $^\mathrm{h}$} & \unit{h} & \unit{h} & \unit{h} & \unit{h}\\\hline |

1493 | day & \unit{d, $^\mathrm{d}$} & \unit{d} & \unit{d} & \unit{d} & \unit{d}\\\hline |

1494 | year & \unit{a} & \unit{yr} & \unit{a, yr} & \unit{a, yr} (1)& like FITS\\\hline |

1495 | arcsecond & \unit{''} & \unit{arcsec} & \unit{arcsec} & \unit{arcsec} & \unit{arcsec}\\\hline |

1496 | arcminute & \unit{'} & \unit{arcmin} & \unit{arcmin} & \unit{arcmin} & \unit{arcmin}\\\hline |

1497 | degree (angle) & \unit{$^\circ$} & \unit{deg} & \unit{deg} & \unit{deg} & \unit{deg}\\\hline |

1498 | milliarcsecond & \unit{mas} (use \unit{nrad}!) & & \unit{mas} & \unit{mas} & \unit{mas}\\\hline |

1499 | microarcsec & & & \unit{uarcsec} & & (2)\\\hline |

1500 | cycle & \unit{c, $^\mathrm{c}$} & & & & not used\\\hline |

1501 | astronomical unit & \unit{au} & \unit{AU} & \unit{AU} & \unit{AU} & \unit{AU}\\\hline |

1502 | parsec & \multicolumn{4}{c|}{\unit{pc}} & \unit{pc}\\\hline |

1503 | atomic mass & \unit{u} & & & \unit{u} & \unit{u}\\\hline |

1504 | electron volt & \multicolumn{4}{c|}{\unit{eV}} & \unit{eV}\\\hline |

1505 | jansky & \multicolumn{4}{c|}{\unit{Jy}} & \unit{Jy}\\\hline |

1506 | celsius degree & \unit{$^\circ$C} for meteorology, other use \unit{K}& & & & not used\\\hline |

1507 | century & (3)& & & & (4)\\\hline |

1508 | \end{tabular} |

1509 | \caption[Comparison of astronomy-related units]{Comparison of astronomy-related units. |

1510 | Notes: (1) Pa (peta-a) forbidden; |

1511 | (2) no dedicated symbol, use \unit{uarcsec}; |

1512 | (3) ha, cy should not be used; |

1513 | (4) no dedicated symbol, use \unit{ha} or \unit{hyr}} |

1514 | \label{tabx:comparUnitAstro} |

1515 | \end{table} |

1516 | |

1517 | \begin{table}[ht] |

1518 | \begin{tabular}{|L{0.2\linewidth}|L{0.15\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.15\linewidth}|} |

1519 | \hline |

1520 | & IAU & OGIP & StdCats & FITS & VOUnits\\\hline |

1521 | %\multicolumn{6}{|c|}{IAU (Table 7) strongly recommends to no longer use these} \\\hline |

1522 | \aa{}ngstr\"om & \unit{\AA} & \unit{angstrom} & 0.1nm & \unit{Angstrom} & \unit{angstrom}, \unit{Angstrom}\\\hline |

1523 | micron & \unit{\micro} & & & & not used \\\hline |

1524 | fermi & no symbol & & & & not used \\\hline |

1525 | barn & \unit{b} & \unit{barn} & \unit{barn} & \unit{barn} & \unit{barn}\\\hline |

1526 | cubic centimetre & \unit{cc} & & & & no dedicated symbol\\\hline |

1527 | dyne & \unit{dyn} & \unit{} & \unit{} & \unit{} & not used \\\hline |

1528 | erg & \unit{erg} & \unit{erg} & (1) & \unit{erg} & \unit{erg} \\\hline |

1529 | % erg & \unit{erg} & \unit{erg} & No symbol. \unit{mW/m2} used for erg.cm-2.s-1 & \unit{erg} & \unit{erg} \\\hline |

1530 | calorie & \unit{cal} & \unit{} & \unit{} & \unit{} & not used \\\hline |

1531 | bar & \unit{bar} & \unit{} & \unit{} & \unit{} & not used \\\hline |

1532 | atmosphere & \unit{atm} & \unit{} & \unit{} & \unit{} & not used \\\hline |

1533 | gal & \unit{Gal} & \unit{} & \unit{} & \unit{} & not used \\\hline |

1534 | eotvos & \unit{E} & \unit{} & \unit{} & \unit{} & not used \\\hline |

1535 | gauss & \unit{G} & \unit{G} & \unit{} & \unit{G} & \unit{G} \\\hline |

1536 | gamma & \unit{$\gamma$} & \unit{} & \unit{} & \unit{} & not used \\\hline |

1537 | oersted & \unit{Oe} & \unit{} & \unit{} & \unit{} & not used \\\hline |

1538 | Imperial, non-metric & should not be used & \unit{} & \unit{} & \unit{} & not used \\\hline |

1539 | \end{tabular} |

1540 | \caption[Comparison of symbols deprecated by IAU]{Comparison of |

1541 | symbols deprecated by IAU (from \citet{wilkins89}: ``Table 7. Non-SI |

1542 | units and symbos whose continued use is deprecated''). |

1543 | Note: (1) no symbol -- \unit{mW/m2} used for \units{erg\,cm^{-2}\,s^{-1}}.} |

1544 | \label{tabx:comparUnitDeprecated} |

1545 | \end{table} |

1546 | |

1547 | \begin{table}[ht] |

1548 | \begin{tabular}{|p{0.2\linewidth}|p{0.15\linewidth}|p{0.12\linewidth}|p{0.12\linewidth}|p{0.12\linewidth}|p{0.15\linewidth}|} |

1549 | \hline |

1550 | & IAU & OGIP & StdCats & FITS & VOUnits\\\hline |

1551 | magnitude & \multicolumn{4}{c|}{\unit{mag}} & \unit{mag}\\\hline |

1552 | rydberg & \unit{} & \unit{} & \unit{Ry} & \unit{Ry} & \multirow{19}{0.15\linewidth}{same as FITS} \\\hline |

1553 | solar mass & \unit{$\mathrm{M}_\odot$} & & \unit{solMass} & \unit{solMass} &\\\cline{1-5} |

1554 | solar luminosity & \unit{} & \unit{} & \unit{solLum} & \unit{solLum} &\\\cline{1-5} |

1555 | solar radius & \unit{} & \unit{} & \unit{solRad} & \unit{solRad} &\\\cline{1-5} |

1556 | light year & \unit{} & \unit{lyr} & \unit{} & \unit{lyr} &\\\cline{1-5} |

1557 | count & \unit{} & \unit{count} & \unit{ct} & \unit{ct, count} &\\\cline{1-5} |

1558 | photon & \unit{} & \unit{photon} & \unit{} & \unit{photon, ph} &\\\cline{1-5} |

1559 | rayleigh & \unit{} & \unit{} & \unit{} & \unit{R} &\\\cline{1-5} |

1560 | pixel & \unit{} & \unit{pixel} & \unit{pix} & \unit{pix, pixel} &\\\cline{1-5} |

1561 | debye & \unit{} & \unit{} & \unit{D} & \unit{D} &\\\cline{1-5} |

1562 | relative to Sun & \unit{} & \unit{} & \unit{Sun} & \unit{Sun} &\\\cline{1-5} |

1563 | channel & \unit{} & \unit{chan} & \unit{} & \unit{chan} &\\\cline{1-5} |

1564 | bin & \unit{} & \unit{bin} & \unit{} & \unit{bin} &\\\cline{1-5} |

1565 | voxel & \unit{} & \unit{voxel} & \unit{} & \unit{voxel} &\\\cline{1-5} |

1566 | bit & \unit{} & \unit{} & \unit{bit} & \unit{bit} &\\\cline{1-5} |

1567 | byte & \unit{} & \unit{byte} & \unit{byte} & \unit{byte} &\\\cline{1-5} |

1568 | adu & \unit{} & \unit{} & \unit{} & \unit{adu} &\\\cline{1-5} |

1569 | beam & \unit{} & \unit{} & \unit{} & \unit{beam} &\\\hline |

1570 | & \unit{} & \unit{Crab} avoid use & \unit{} & \unit{} & not used \\\hline |

1571 | No unit, dimensionless & \unit{} & blank string & \unit{-} & \unit{} & empty string \\\hline |

1572 | Percent & & & \unit{\%} & & \unit{\%} \\\hline |

1573 | unknown & \unit{} & {\tiny\unit{UNKNOWN}} & \unit{} & \unit{} & \unit{unknown} \\\hline |

1574 | \end{tabular} |

1575 | \caption{Miscellaneous other symbols.} |

1576 | \label{tabx:comparUnitOther} |

1577 | \end{table} |

1578 | |

1579 | \begin{table}[th] |

1580 | \begingroup |

1581 | \begin{tabular}{|L{0.2\linewidth}|L{0.15\linewidth}|L{0.15\linewidth}|L{0.15\linewidth}|L{0.15\linewidth}|} |

1582 | \hline |

1583 | & IAU & OGIP & StdCats & FITS \\\hline |

1584 | %\multicolumn{6}{|c|}{Compound units} \\\hline |

1585 | Multiplication & space or dot (1) |

1586 | & space or star (2) |

1587 | & dot |

1588 | & space or\hfil\break star (3) \\\hline |

1589 | Division & per (4) |

1590 | & \unit{/} (5) |

1591 | & \unit{/}, no space |

1592 | & \unit{/}, no space\\\hline |

1593 | Use of multiple / & never |

1594 | & allowed |

1595 | & allowed |

1596 | & discouraged (6) \\\hline |

1597 | \unit{sym} raised to the power $y$ & superscript |

1598 | & (7) |

1599 | & (8) |

1600 | & (9) \\\hline |

1601 | Exponential of \unit{sym} & & \unit{exp(sym)} & & \unit{exp(sym)} \\\hline\hline |

1602 | Natural log of \unit{sym} & & \unit{ln(sym)} & & \unit{ln(sym)} \\\hline\hline |

1603 | Decimal log of \unit{sym} & & \unit{log(sym)} & \unit{[sym]} & \unit{log(sym)} \\\hline |

1604 | Square root of \unit{sym} & & \unit{sqrt(sym)} & & \unit{sqrt(sym)} \\\hline |

1605 | Other math & & (10) & & not used \\\hline |

1606 | ( ) & & allowed & allowed & optional around powers \\\hline |

1607 | powers & superscripts & (11) & integers & (12) \\\hline |

1608 | Numeric factor & not used & (13) & allowed & (14) \\\hline |

1609 | \end{tabular} |

1610 | \endgroup |

1611 | \caption[Mathematical expressions and combinations]{Mathematical expressions and symbol combinations. |

1612 | \label{tabx:comparUnitCombine} |

1613 | Notes: (1) space, except if previous unit ends with superscript; dot (\unit{.}) may be used; |

1614 | (2)~one or more spaces OR one asterisk (\unit{*}) with optional spaces on either side; |

1615 | (3)~single space OR asterisk (\unit{*}, no spaces) OR dot (\unit{.}, no spaces); |

1616 | (4)~use negative index or solidus (\unit{/}); |

1617 | (5)~solidus (\unit{/}) with optional spaces on either side, space not recommended after / OR negative index; |

1618 | (6)~may be used, but discouraged, `math precedence rule'; |

1619 | (7)~\unit{sym**($y$)} parenthesis optional if $y>0$; |

1620 | (8)~nothing -- \unit{sym$y$}, and use $+/-$ sign for \unit{10+21}; |

1621 | (9)~\unit{sym$y$} OR \unit{sym**($y$)} OR \unit{sym\^{}($y$)}, no space; |

1622 | (10)~\unit{$f$(sym)}, where $f$ is |

1623 | \unit{sin}, \unit{cos}, \unit{tan}, \unit{asin}, \unit{acos}, \unit{atan}, \unit{sinh}, \unit{cosh}, \unit{tanh}; |

1624 | (11)~decimal and integer fractions allowed; |

1625 | (12)~integer (sign and () optional), OR decimal or ratio between (); |

1626 | (13)~should be avoided; only powers of 10 allowed; should precede any unit string; |

1627 | (14)~optional 10**k, 10\texttt{\^}k, or 10$\pm$k.} |

1628 | \end{table} |

1629 | |

1630 | \iffalse |

1631 | \begin{longtable}[th]{|L{0.2\linewidth}|L{0.2\linewidth}|L{0.12\linewidth}|L{0.12\linewidth}|L{0.22\linewidth}|} |

1632 | \hline |

1633 | & IAU & OGIP & StdCats & FITS \\\hline |

1634 | %\multicolumn{6}{|c|}{Compound units} \\\hline |

1635 | Multiplication & space, except if previous unit ends with superscript; dot (\unit{.}) may be used |

1636 | & one or more spaces OR one asterisk (\unit{*}) with optional spaces on either side |

1637 | & dot (\unit{.}), no space |

1638 | & single space OR asterisk (\unit{*}, no spaces) OR dot (\unit{.}, no spaces) \\\hline |

1639 | Division & per. Use negative index or solidus (\unit{/}) |

1640 | & solidus (\unit{/}) with optional spaces on either side, space not recommended after / OR negative index |

1641 | & \unit{/} with no spaces |

1642 | & \unit{/} with no spaces \\\hline\hline |

1643 | Use of multiple / & MUST never use two / |

1644 | & allowed |

1645 | & allowed |

1646 | & may be used, discouraged, math precedence rule \\\hline\hline |

1647 | \unit{sym} raised to the power $y$ & superscript |

1648 | & \unit{sym**($y$)} parenthesis optional if $y>0$ |

1649 | & nothing: \unit{sym$y$} use +/- sign for \unit{10+21} |

1650 | & \unit{sym$y$} OR \unit{sym**($y$)} OR \unit{sym\^{}($y$)}, no space \\\hline\hline |

1651 | Exponential of \unit{sym} & & \unit{exp(sym)} & & \unit{exp(sym)} \\\hline\hline |

1652 | Natural log of \unit{sym} & & \unit{ln(sym)} & & \unit{ln(sym)} \\\hline\hline |

1653 | Decimal log of \unit{sym} & & \unit{log(sym)} & \unit{[sym]} & \unit{log(sym)} dimensionless argument \\\hline\hline |

1654 | Square root of \unit{sym} & & \unit{sqrt(sym)} & & \unit{sqrt(sym)} \\\hline\hline |

1655 | Other math & & {\small \unit{sin(sym), cos(sym), tan(sym), asin(sym), acos(sym), atan(sym), sinh(sym), cosh(sym), tanh(sym)} } & & not used \\\hline\hline |

1656 | ( ) & & allowed & allowed & optional around powers \\\hline\hline |

1657 | powers & superscripts & decimal and integer fractions allowed & integers only & integer (sign and () optional), OR decimal or ratio between () \\\hline |

1658 | Numeric factor & not used & should be avoided; only powers of 10 allowed; should precede any unit string & allowed & optional 10**k, 10\verb|^|k, or 10$\pm$k \\\hline\hline |

1659 | \caption{Comparison of mathematical expressions and symbol combinations.} |

1660 | \label{tabx:comparUnitCombine} |

1661 | \end{longtable} |

1662 | \fi |

1663 | |

1664 | \clearpage |

1665 | \section{Formal grammars\label{appx:grammar}} |

1666 | % These grammars are extracted from http://bitbucket.org/nxg/unity: |

1667 | % % cd src/grammars |

1668 | % % make unity-grammars.zip |

1669 | |

1670 | \emph{Subsection \ref{appx:vougrammar} is Normative, the other |

1671 | subsections are Informative.} |

1672 | |

1673 | In this section we provide formal (yacc-style) grammars for the four |

1674 | ASCII-based syntaxes discussed in this document. The FITS, OGIP and |

1675 | CDS grammars are not normative: the corresponding specification |

1676 | documents do not provide grammars, and instead describe the syntaxes |

1677 | in text, so that the grammars here are deductions from the |

1678 | specification text. |

1679 | This unfortunately means that some of these syntaxes are ambiguous. |

1680 | These ambiguities are discussed in the sections below. We recommend |

1681 | that VO applications parse these syntaxes in a way which is consistent |

1682 | with the grammars here. |

1683 | % |

1684 | The grammar for the VOUnits syntax, in \prettyref{appx:vougrammar}, is normative. |

1685 | |

1686 | We believe that the grammars below are such that if a string |

1687 | successfully parses in two distinct grammars, it means the same in |

1688 | both. |

1689 | |

1690 | The grammars here are from the `Unity' package at |

1691 | \url{https://bitbucket.org/nxg/unity}, which includes machine-readable |

1692 | grammars, lists of recommended units, and a collection of test cases. These are also extracted in |

1693 | machine-readable form |

1694 | at \url{https://code.google.com/p/volute/source/browse/trunk/projects/std-vounits/unity-grammars.zip}. |

1695 | |

1696 | In these grammars, the common terminals are as given in |

1697 | \prettyref{tabx:terminals}. Lexers \norm{must not} swallow whitespace |

1698 | in generating these terminals; whitespace is permitted in a units |

1699 | string only where the corresponding grammar permits |

1700 | the \texttt{WHITESPACE} terminal. |

1701 | |

1702 | \begin{table}[ht] |

1703 | \begin{tabular}{rL{9cm}} |

1704 | \texttt{CARET}&the \texttt{\^{}} character (\hex{5e})\\ |

1705 | \texttt{DIVISION}&the solidus, \texttt{/} (\hex{2f})\\ |

1706 | \texttt{DOT}&the dot/period/full-stop character (\hex{2e})\\ |

1707 | \texttt{FLOAT}&a string matching the regular expression |

1708 | \texttt{[-+]?[0-9]+\textbackslash.[0-9]+}\\ |

1709 | \texttt{LIT10}&a literal string `\texttt{10}' (the sequence \hex{31} \hex{30})\\ |

1710 | \texttt{OPEN\_P} / \texttt{CLOSE\_P}&parentheses (\hex{28} and \hex{29})\\ |

1711 | \texttt{SIGNED\_INTEGER}&an integer with a required leading sign, so |

1712 | matching the regular expression \texttt{[-+][0-9]+}\\ |

1713 | \texttt{STAR}&the asterisk (\hex{2a})\\ |

1714 | \texttt{STARSTAR}&a pair of asterisks, \texttt{**}\\ |

1715 | \texttt{STRING}&a non-empty sequence of letters \texttt{[a-zA-Z]+}\\ |

1716 | \texttt{UNSIGNED\_INTEGER}&an integer with no leading sign \texttt{[0-9]+}\\ |

1717 | \texttt{WHITESPACE}&a non-empty string of space characters (\hex{20} only)\\ |

1718 | \end{tabular} |

1719 | \caption[The terminals used in the grammars] |

1720 | {\label{tabx:terminals}The terminals used in the grammars; the |

1721 | notation \hex{nn} indicates hexadecimal ASCII character numbers; |

1722 | the digits are \hex{30} to \hex{39}, the letters are \hex{41} to \hex{5a} and \hex{61} to |

1723 | \hex{7a}, and the sign characters are \hex{2b} and \hex{2d}.} |

1724 | \end{table} |

1725 | |

1726 | \subsection{The FITS grammar (informative)} |

1727 | \label{appx:fitsgrammar} |

1728 | |

1729 | For the FITS units syntax, see section~4.3 of~\cite{pence10}, and its |

1730 | associated tables. Our preferred FITS grammar is in |

1731 | \prettyref{tabx:fitsgrammar}. |

1732 | |

1733 | As noted above in \prettyref{sec:fitsquote}, |

1734 | the FITS specification isn't completely clear on the topic of |

1735 | solidi, saying ``[t]he IAU style manual forbids |

1736 | the use of more than one solidus (/) character in a units |

1737 | string. However, since normal mathematical precedence rules apply |

1738 | in this context, more than one solidus may be used but is |

1739 | discouraged''. This does not really resolve the question of whether, for |

1740 | example, \texttt{kg/m s} should be parsed as \units{kg~m^{-1}~s^{-1}} |

1741 | or as \units{kg~m^{-1}~s}, since this is a question of both operator |

1742 | precedence and (left-)associativity, where there might be different |

1743 | rules internationally, and conflicts between mathematical and |

1744 | programming-language rules. Most people would \emph{probably} parse |

1745 | it as \units{kg~m^{-1}~s^{-1}}, but we trust that most educators would |

1746 | oblige students to rewrite the expression on the grounds that any |

1747 | ambiguity is too much. |

1748 | Here, we resolve the ambiguity by declaring that there can |

1749 | be only a single expression to the right of the solidus. |

1750 | |

1751 | It is a consequence of this that nothing can be |

1752 | successully parsed in two different grammars, with different |

1753 | meanings. If the right-hand-side of the division could be a |

1754 | \texttt{product\_of\_units}, then \texttt{kg /m s} would parse in both |

1755 | the FITS and OGIP syntaxes, |

1756 | but mean \units{kg~m^{-1}~s^{-1}} in the FITS syntax, and |

1757 | \units{kg~m^{-1}~s} in the OGIP one. |

1758 | |

1759 | The FITS specification permits a leading numeric multiplier, but |

1760 | ``[c]reators of FITS files are encouraged to use the numeric |

1761 | multiplier only when the available standard scale factors of [SI] will |

1762 | not suffice''. |

1763 | |

1764 | The FITS specification permits \texttt{m(2)}, to indicate the square of |

1765 | unit~`m'. The grammar has to special-case this, in order to |

1766 | distinguish it from function application. |

1767 | |

1768 | Other ambiguities: |

1769 | \begin{itemize} |

1770 | \item The FITS specification may or may not be intended to permit |

1771 | \texttt{10+3 /m}, but we don't. |

1772 | \item It is possible to read the FITS spec as permitting |

1773 | \texttt{m\^{}1.5}, without parentheses. We take it to be |

1774 | invalid here. |

1775 | \end{itemize} |

1776 | |

1777 | \clearpage |

1778 | \begin{table}[t] |

1779 | \verbatiminput{unity-grammars/unity-fits.txt} |

1780 | \caption[The FITS grammar]{\label{tabx:fitsgrammar}The FITS grammar. |

1781 | See \prettyref{appx:fitsgrammar}.} |

1782 | \end{table} |

1783 | \clearpage |

1784 | |

1785 | \subsection{The OGIP grammar (informative)} |

1786 | \label{appx:ogipgrammar} |

1787 | |

1788 | For the OGIP units syntax, see \cite{george95}. Our preferred OGIP |

1789 | grammar is in \prettyref{tabx:ogipgrammar}. |

1790 | |

1791 | The OGIP specification somewhat reluctantly concedes (in its section |

1792 | 3.2) that ``occasionally it may be preferable to include [leading |

1793 | scale] factors on the grounds of user-friendliness'', but that ``[t]he |

1794 | inclusion of numerical factors should therefore be avoided wherever |

1795 | possible'', and it is ``suggested'' that the scale factor should in any case |

1796 | be restricted to powers of~10. |

1797 | %On these grounds, the \texttt{FLOAT} |

1798 | %terminal in the grammar should be extra-syntactically restricted to |

1799 | %being a round power of ten. |

1800 | |

1801 | Specification ambiguities: |

1802 | \begin{itemize} |

1803 | \item The OGIP specification permits a space between the leading |

1804 | factor and the rest of the unit (by implication from the provided |

1805 | examples). |

1806 | \item The specification does not indicate the format of the numerical |

1807 | factor in the case where it is not a power of ten. We have |

1808 | suggested \texttt{FLOAT} here (see \prettyref{tabx:terminals}). |

1809 | \item OGIP \emph{recommends} having no whitespace after the division |

1810 | solidus, but does not forbid it; therefore we permit it in this |

1811 | grammar. |

1812 | \item From its specification text, OGIP appears to permit |

1813 | \texttt{str1**y}, where \texttt{y} can be a float, even though none |

1814 | of its examples include this. The same interpretive logic would |

1815 | appear to permit \texttt{m**3/2}, but this seems to run too great a |

1816 | risk of being misparsed, and we forbid it here. |

1817 | \item In the same place, the text suggests that \texttt{str1**y} may |

1818 | omit the brackets `if~\texttt y is positive', but the context |

1819 | suggests that the intention is to permit this if~\texttt y is |

1820 | unsigned. In the grammar here, we permit the omission of the |

1821 | brackets only if~\texttt y is unsigned -- that is, \texttt{m**+2}, |

1822 | like \texttt{m**-2}, is forbidden. |

1823 | \end{itemize} |

1824 | |

1825 | %\clearpage |

1826 | \begin{table}[ht] |

1827 | \verbatiminput{unity-grammars/unity-ogip.txt} |

1828 | \caption[The OGIP grammar]{\label{tabx:ogipgrammar}The OGIP grammar. |

1829 | Note that the \texttt{FLOAT} in the \texttt{scalefactor} production |

1830 | must be a power of ten. |

1831 | See \prettyref{appx:ogipgrammar}.} |

1832 | \end{table} |

1833 | \clearpage |

1834 | |

1835 | \subsection{The CDS grammar (informative)} |

1836 | \label{appx:cdsgrammar} |

1837 | |

1838 | For the CDS units syntax, see \cite[\S3.2]{cds00}. Our preferred CDS |

1839 | grammar is in \prettyref{tabx:cdsgrammar}. It requires additional |

1840 | terminals, described in \prettyref{tabx:cdsterminals}. |

1841 | |

1842 | Specification ambiguities: |

1843 | \begin{itemize} |

1844 | \item The CDS document indicates that units should be raised to powers by |

1845 | concatenation of the unit string with an integer, but does so rather |

1846 | elliptically, so that it is not clear whether \texttt{m+2} is |

1847 | permitted (the relevant examples show this as \texttt{m2}). We take |

1848 | this to be permitted in this grammar. |

1849 | \item The specification does not indicate the format of the numerical |

1850 | factor in the case where it is not a power of ten and not |

1851 | a \texttt{CDSFLOAT}. We have suggested \texttt{FLOAT} here |

1852 | (see \prettyref{tabx:terminals}). |

1853 | \item The document does not specify or illustrate how \texttt{kg/m/s} |

1854 | should be parsed. Since the document mentions the OGIP standard (even |

1855 | though it does not permit OGIP's syntax for powers, \texttt{m**2}), we |

1856 | take it that this is valid, and equivalent to \units{kg~m^{-1}~s^{-1}}. |

1857 | \end{itemize} |

1858 | |

1859 | This specification places no restrictions on the leading scale factor. |

1860 | |

1861 | \begin{table}[ht] |

1862 | \verbatiminput{unity-grammars/unity-cds.txt} |

1863 | \caption[The CDS grammar]{\label{tabx:cdsgrammar}The CDS grammar. |

1864 | See \prettyref{appx:cdsgrammar} for discussion, |

1865 | and \prettyref{tabx:cdsterminals} for the additional terminals.} |

1866 | \end{table} |

1867 | \begin{table}[ht] |

1868 | \begin{tabular}{rL{10cm}} |

1869 | \texttt{CDSFLOAT}&a string matching the regular |

1870 | expression \texttt{[0-9]+\textbackslash.[0-9]+x10[-+][0-9]+} |

1871 | (that is, something resembling \texttt{1.5x10+11})\\ |

1872 | \texttt{OPEN\_SQ}&the open square bracket `\texttt{[}' (indicates logs |

1873 | in this syntax)\\ |

1874 | \texttt{CLOSE\_SQ}&the close square bracket `\texttt{]}'\\ |

1875 | \texttt{PERCENT}&the percent character `\%' |

1876 | \end{tabular} |

1877 | \caption[Extra CDS terminals]{\label{tabx:cdsterminals}Extra terminals |

1878 | for the CDS grammar} |

1879 | \end{table} |

1880 | \clearpage |

1881 | |

1882 | |

1883 | |

1884 | \subsection{The VOUnits grammar (normative)} |

1885 | \label{appx:vougrammar} |

1886 | |

1887 | The VOUnits grammar is defined by this section, by the grammar in |

1888 | \prettyref{tabx:vougrammar} |

1889 | (with the terminals of \prettyref{tabx:terminals} |

1890 | plus the extra ones listed in \prettyref{tabx:vounitsterminals}) |

1891 | and by the list of known units of \prettyref{tabx:knownunits}. |

1892 | |

1893 | The intention of the VOUnits grammar is that if a VOUnits string |

1894 | does not use the scalefactor, quoted-units or binary-prefix |

1895 | extensions |

1896 | (that is, if it avoids the \texttt{VOUFLOAT} |

1897 | and \texttt{QUOTED\_STRING} terminals and is restricted to SI decimal prefixes), |

1898 | then it will be parseable, with the same semantics, by FITS |

1899 | and CDS parsers, and that it will be parseable by an OGIP parser if |

1900 | dots are replaced by stars. |

1901 | See \prettyref{sec:deviations} for discussion. |

1902 | In particular: |

1903 | \begin{itemize} |

1904 | \item The product of units is indicated only by a dot, with no |

1905 | whitespace: \texttt{N.m}. |

1906 | \item Raising a unit to a power is done only with a double-star: |

1907 | \texttt{kg.m**2.s**-2}. |

1908 | \item There may be at most one division sign at the top level of an |

1909 | expression. |

1910 | \end{itemize} |

1911 | |

1912 | In \prettyref{tabx:vougrammar}, the \texttt{VOUFLOAT} terminal is a |

1913 | string matching either of the regular expressions |

1914 | \begin{itemize} |

1915 | \item\texttt{0\textbackslash.[0-9]+([eE][+-]?[0-9]+)?} |

1916 | \item\texttt{[1-9][0-9]*(\textbackslash.[0-9]+)?([eE][+-]?[0-9]+)?} |

1917 | \end{itemize} |

1918 | (that is, something resembling \texttt{0.123} or \texttt{1.5e+11}). |

1919 | |

1920 | |

1921 | \begin{table}[ht] |

1922 | \verbatiminput{unity-grammars/unity-vounits.txt} |

1923 | \caption[The VOUnits grammar]{\label{tabx:vougrammar}The VOUnits |

1924 | grammar. See \prettyref{appx:vougrammar} for discussion, |

1925 | and \prettyref{tabx:vounitsterminals} for additional terminals.} |

1926 | \end{table} |

1927 | \begin{table}[ht] |

1928 | \begin{tabular}{rL{10cm}} |

1929 | \texttt{VOUFLOAT}&see text, \prettyref{appx:vougrammar}\\ |

1930 | \texttt{QUOTED\_STRING}&a \texttt{STRING} between single quote marks |

1931 | (ASCII \hex{27}) |

1932 | \end{tabular} |

1933 | \caption[Extra VOUnits terminals]{\label{tabx:vounitsterminals}Extra terminals |

1934 | for the VOUnits grammar} |

1935 | \end{table} |

1936 | \clearpage |

1937 | |

1938 | \section{Updates of this document (informative)} |

1939 | \begin{itemize} |

1940 | \item 1.0-20131224: |

1941 | \begin{itemize} |

1942 | \item Grammar changes: minor (now incorporates the grammars of Unity v0.11). |

1943 | \item Various clarifications to the text, following on-list discussion. |

1944 | \end{itemize} |

1945 | \item 1.0-20131025: |

1946 | \begin{itemize} |

1947 | \item Grammar changes: The `\%' character is now treated as a special |

1948 | case, rather than being a permitted 'STRING' character; it's only |

1949 | the CDS syntax that permits this character. Some readability |

1950 | adjustments to the grammars. Unit strings with leading slashes |

1951 | (eg \unit{/m3}) are no longer supported in the VOUnits syntax. |

1952 | The grammars now match Unity v0.10. |

1953 | \item Changed discussion/rationale for forbidding non-ASCII |

1954 | characters. |

1955 | \item Clarified that `?' -- which is specified as indicating an |

1956 | unknown unit -- is not part of the VOUnits grammar, and should be |

1957 | spotted by a caller before parsing begins. |

1958 | \item Clarified the extra terminals which some grammars use. |

1959 | \item Clarified that the ambiguity in \unit{dadu} should remain |

1960 | unresolved, and the correct behaviour unspecified (is it |

1961 | deci-\texttt{adu} or deka-\texttt{du}?). |

1962 | \end{itemize} |

1963 | \item 1.0-20131011: Changed gramme in gram; removed color property to distinguish arrows in fig .2; |

1964 | Removed astro'l unit abbreviation from known-units.tex |

1965 | \item 1.0-20130922: Responding to RFC and mailing list comments. |

1966 | Addition of quoted units and arbitrary scale-factor (so updates to |

1967 | grammars, which now match Unity v0.9). Some reformatting of tables. |

1968 | \item 1.0-20130724: Rephrasing and clarification, responding to RFC |

1969 | comments. Update unity grammars to current version (ie, version of 2013-07-22 18:40). |

1970 | \item 1.0-20130701: Simplified Architecture diagram. Added example |

1971 | with scientific notation. Adjusted locations of grammar tables to try |

1972 | to keep them closer to the associated text. |

1973 | \item 1.0-20130429: Some restructuring, some rephrasing, and a few layout changes. |

1974 | \item 1.0-20130225: Large tables from section 3 moved to Appendix A. Short summaries of symbols added |

1975 | to section 3. Changes to table of known units for consistency with text. Added explanations for units Sun and byte. |

1976 | \item 1.0-20121212: |

1977 | Minor typographical fixes. Added definition of OGIP. Removed last sentence from acknowledgements, which have been moved to the beginning of the document. Changed figure 1 to move Units in Semantics. Added 'discouraged' in first line of \prettyref{tab:VOUnitCombine}. Color change in figure 2 and its label. |

1978 | \item 1.0-20120801: |

1979 | Minor typographical fixes |

1980 | \item 1.0-20120801: |

1981 | \begin{itemize} |

1982 | \item Included yacc-style grammars in document. |

1983 | \end{itemize} |

1984 | \item 1.0-20120718: |

1985 | \begin{itemize} |

1986 | \item Removed external tables refs in tables to avoid confusion. |

1987 | \item Removed refs to SOFA and NOVAS. |

1988 | \item Precision on the "no unit" case in text. |

1989 | \item Added formal grammar in annex. |

1990 | \item Minor editing and typo fixes. |

1991 | \end{itemize} |

1992 | \item 1.0-20120521: |

1993 | \begin{itemize} |

1994 | \item Typos fixed, removed F. Bonnarel from authors. |

1995 | \item One sentence rephrased in section 1.2 for clarity. |

1996 | \item Clarification of \unit{g} and \unit{kg} issue in \prettyref{sec:baseUnits}. |

1997 | \item Added remark on \unit{Pa} in \prettyref{sec:scaleFactors}. |

1998 | \item Micro-arcsecond and century explained in \prettyref{tabx:comparUnitAstro}. |

1999 | \item \prettyref{tabx:comparUnitDeprecated} completed. |

2000 | \item Added numeric factors in \prettyref{tabx:comparUnitCombine} and discussion in text. |

2001 | \end{itemize} |

2002 | \item 1.0-20111216: Major rework of the document. |

2003 | \item 0.3: initial public release. |

2004 | %\item version 0.1 to 0.2 |

2005 | % \begin{itemize} |

2006 | % \item 20090521 |

2007 | % \begin{itemize} |

2008 | % \item added UCD to Quantity in point 4 of subsection ~\ref{sec:labels} |

2009 | % \item added `.' in the notation in unit strings in section ~\ref{sec:simpleuse} |

2010 | % \item added a sentence on the help of UCd in quantity in section ~\ref{sec:UML} |

2011 | % \end{itemize} |

2012 | % \item 20090522 |

2013 | % \begin{itemize} |

2014 | % \item clarified the scope of the model in Section \ref{sec:purpose} |

2015 | % \item added references in Section \ref{sec:vocab} |

2016 | % \item added requirement to be consistent with Quantity DM in |

2017 | % Section~\ref{sec:quantities} |

2018 | % \item minor clarification and subediting |

2019 | % \end{itemize} |

2020 | % \end{itemize} |

2021 | \end{itemize} |

2022 | |

2023 | \clearpage |

2024 | \bibliographystyle{plainnat-eprints} |

2025 | \bibliography{bib} |

2026 | |

2027 | |

2028 | \end{document} |

Name | Value |
---|---|

svn:keywords |
Date Revision HeadURL |

msdemlei@ari.uni-heidelberg.de | ViewVC Help |

Powered by ViewVC 1.1.26 |