netKarma/GEC14_report: MDOD_Provenance_0.2.xsd

File MDOD_Provenance_0.2.xsd, 18.8 KB (added by scjensen@umail.iu.edu, 12 years ago)

Proposed MDOD Schema with Provenance

Line 
1<?xml version="1.0" encoding="UTF-8"?>
2<xsd:schema targetNamespace="http://www.geni.net/namespaces/2012/07/mdod"
3            xmlns:mdod="http://www.geni.net/namespaces/2012/07/mdod"
4            xmlns:doi="http://www.doi.org/2010/DOISchema"
5            xmlns:opm="http://openprovenance.org/model/v1.1.a"
6            xmlns:xsd="http://www.w3.org/2001/XMLSchema"
7            xml:lang="en"
8            elementFormDefault="qualified"
9            attributeFormDefault="unqualified" >
10
11        <xsd:annotation>
12                <xsd:documentation>
13                        This draft of an MDOD is an interpretation based
14                        on the initial draft MDOD presented by Harry Mussmann
15                        at GEC11 plus suggested modifications by IU.
16                       
17                        This draft version has also been influenced by comments
18                        on the MDOD from the Instrumentation and Measurement
19                        group at GEC11, draft MDODs by Jason Zurawski, and
20                        documentation from the IF-MAP group.
21                       
22                        Version 0.2 of this schema has also been influenced by
23                        discussions at GEC13 with the Instrumentation and Measurement
24                        group, well as general feedback and summary of MDOD
25                        status and issues by Giridhar Manepalli (CNRI) at GEC13, and
26                        the NetKarma provenance repository and schema.
27                       
28                        author: Scott Jensen, Indiana University
29                </xsd:documentation>
30        </xsd:annotation>
31       
32        <xsd:import namespace="http://www.doi.org/2010/DOISchema" schemaLocation="DOIMetadataKernel_v2.0_120308.xsd"/>
33        <xsd:import namespace="http://openprovenance.org/model/v1.1.a" schemaLocation="opm.1_1.xsd"/>
34       
35        <xsd:element name="mdoDescriptor" type="mdod:mdoDescriptorType"/>
36
37        <xsd:complexType name="mdoDescriptorType">
38                <xsd:annotation>
39                        <xsd:documentation>
40                                Should the MDOD be a single measuremet stream (e.g., from a single MP),
41                                a collection of measurements, or measurements plus derived products
42                                such as analysis or presentations prepared based on transformations of
43                                the measurement data. Initial drafts of the MDOD described as all of the
44                                measurements for an experiment.
45                               
46                                This draft assumes that the MDOD is a collection of measurements,
47                                provenance, and derived products or transformations of the measurement data,
48                                that describe an experiment or related set of experiments.  There could
49                                be multiple data objects in the MDOD that are derived from
50                                subsets of the measurement data where some subsets overlap, but the
51                                relationships between data objects are not strictly a tree (i.e., an MDO
52                                could have multiple results derived from it within the MDOD).
53                               
54                                This schema currently has 5 top-level elements, but only minimal identification is required:
55                                identification:     This is the identification for the MDOD as a collection.
56                                provenance:         This is an OPM provenance graph - such as the graph
57                                                    generated by NetKarma for an experiment.  For an aggregate
58                                                    this can represent how the MDOD itself is created.
59                                security:           optional element for setting policies.  Can also be set
60                                                    at the underlying dataDescriptors or inheritied there from
61                                                    the MDOD level.
62                                dataDescriptor:     There would be a seperate instance of this element for
63                                                    each MDO or derived data product described within the MDOD.
64                                mdodReference:      An MDOD could include other MDODs by reference either
65                                                    locally or based on a URL.
66                        </xsd:documentation>
67                </xsd:annotation>
68                <xsd:sequence>
69                        <xsd:element name="identification" type="mdod:identificationType"/>
70                        <xsd:element name="provenance" type="mdod:provenanceType" minOccurs="0"/>
71                        <xsd:element name="security" type="mdod:securityType" minOccurs="0"/>
72                        <xsd:element name="dataDescriptor" type="mdod:dataDescriptorType" minOccurs="0" maxOccurs="unbounded"/>
73                        <xsd:element name="mdodReference" type="mdod:mdodReferenceType" minOccurs="0" maxOccurs="unbounded"/>
74                </xsd:sequence>
75                <xsd:attribute name="lastUpdated" type="xsd:dateTime" use="required"/>
76        </xsd:complexType>
77       
78
79        <xsd:complexType name="identificationType">
80                <xsd:annotation>
81                        <xsd:documentation>
82                                When an MDOD represents a bundle that is archived and shared
83                                it would be assigned a DOI identifier.  Otherwise it can
84                                have an internal ID.
85                               
86                                For an internal ID, the format could be required to be consistent
87                                with the draft at GEC11 where an ID follows the format:
88                                domain:subdomain+object_type+object_name
89                        </xsd:documentation>
90                </xsd:annotation>
91                <xsd:sequence>
92                        <xsd:choice>
93                                <xsd:element name="doi" type="doi:doiName"/>
94                                <xsd:element name="mdodId" type="xsd:string"/>
95                        </xsd:choice>
96                        <xsd:element name="owner" type="mdod:geniContactType"/>
97                        <xsd:element name="projectId" type="xsd:string" minOccurs="0"/>
98                        <xsd:element name="experimentId" type="xsd:string" minOccurs="0"/>
99                        <xsd:element name="runId" type="xsd:string" minOccurs="0"/>
100                        <xsd:element name="title" type="xsd:string" minOccurs="0"/>
101                        <xsd:element name="abstract" type="xsd:string" minOccurs="0"/>
102                        <xsd:element name="subject" type="xsd:string" minOccurs="0"/>
103                        <xsd:element ref="mdod:keywordset" minOccurs="0" maxOccurs="unbounded"/>
104                </xsd:sequence>
105        </xsd:complexType>
106       
107       
108        <xsd:element name="keywordset" type="mdod:keywordsetType"/>
109       
110        <xsd:complexType name="keywordsetType">
111                <xsd:sequence>
112                        <xsd:element name="source" type="xsd:string"/>
113                        <xsd:element name="keyword" type="xsd:string" maxOccurs="unbounded"/>
114                </xsd:sequence>
115        </xsd:complexType>
116       
117
118        <xsd:complexType name="provenanceType">
119                <xsd:sequence>
120                        <xsd:element ref="opm:opmGraph"/>
121                </xsd:sequence>
122                <xsd:attribute name="workflowId" type="xsd:string" use="optional"/>
123        </xsd:complexType>             
124       
125               
126        <xsd:complexType name="dataDescriptorType">
127                <xsd:annotation>
128                        <xsd:documentation>
129                                The dataDescriptor is for measurement or other data objects
130                                local to the MDOD that are not stored or accessible from their
131                                own MDOD.  The data descriptor could represent data stored in
132                                another location.
133                                The dataDescriptor maps to the descriptor in the draft MDOD version 0.2.1
134                                All of the content of the descriptorSecurity element is optional.
135                                Should there instead be only nested MDODs?
136                        </xsd:documentation>
137                </xsd:annotation>
138                <xsd:sequence>
139                        <xsd:element name="descriptorIdentification" type="mdod:descriptorIdentificationType"/>
140                        <xsd:element name="descriptorSecurity" type="mdod:securityType"/>
141                        <xsd:element name="dataDescription" type="mdod:dataDescriptionType"/>
142                </xsd:sequence>
143        </xsd:complexType>
144       
145
146        <xsd:complexType name="descriptorIdentificationType">
147                <xsd:annotation>
148                        <xsd:documentation>
149                                Both the MDOD itself and the descriptor have identification elements. The
150                                identification section within the data descriptor would pertain to a single
151                                data object whereas the MDOD identification section relates to the MDOD as a
152                                whole, which can represent a set of measurements and derived data products.
153                               
154                                Since the dataDescriptor's identification should be populated automatically
155                                if possible (users do not enter metadata), the abstract and subject are moved
156                                to the MDOD level and eliminated here.
157                        </xsd:documentation>
158                </xsd:annotation>
159                <xsd:sequence>
160                        <xsd:element ref="mdod:locator" maxOccurs="unbounded"/>
161                        <xsd:element name="title" type="xsd:string" minOccurs="0"/>
162                        <!-- Do we need an abstract or subject within each descriptor? -->
163                        <!-- These are included at the MDOD level, and require human   -->
164                        <!-- input - so they cannot be automated.                      -->
165                        <!-- xsd:element name="abstract" type="xsd:string" minOccurs="0"/ -->
166                        <!-- xsd:element name="subject" type="xsd:string" minOccurs="0"/ -->
167                        <xsd:element ref="mdod:keywordset" minOccurs="0" maxOccurs="unbounded"/>
168                        <xsd:element name="objectType" type="mdod:sourcedStringType"/>
169                        <xsd:element name="dataCollectionGeographicLocation" type="xsd:string" minOccurs="0"/>
170                        <xsd:choice minOccurs="0">
171                                <xsd:element ref="mdod:dataCollectionTimeRange"/>
172                                <xsd:element name="datacollectionTime" type="xsd:dateTime" maxOccurs="unbounded"/>
173                        </xsd:choice>
174                        <!-- Since the descriptor is local to the bundle, project, experiment and run ID -->
175                        <!-- were moved up to the MDOD identification level.                             -->
176                        <xsd:element name="sliceId" type="xsd:string" minOccurs="0"/>
177                </xsd:sequence>
178        </xsd:complexType>
179       
180        <xsd:element name="locator" type="mdod:locatorType"/>
181       
182        <xsd:complexType name="locatorType">
183                <xsd:annotation>
184                        <xsd:documentation>
185                                The original MDOD had the type and value separate with path, url,
186                                and other as the types and text for the value.  Here they are a
187                                choice and type is indicated by which element is used.
188                               
189                                Contact was made optional.  If the data being described is local,
190                                the contact could be redundant.
191                               
192                                Is the scope necessary? a path would be local, and any other value
193                                would be external?
194                               
195                                Are locators other than paths or URLs needed?
196                        </xsd:documentation>
197                </xsd:annotation>
198                <xsd:sequence>
199                        <xsd:element name="scope" type="mdod:locatorScopeType"/>
200                        <xsd:choice>
201                                <xsd:element name="locatorPath" type="xsd:string"/>
202                                <xsd:element name="locatorUrl" type="xsd:anyURI"/>
203                                <xsd:element name="locatorOther" type="xsd:string"/>
204                        </xsd:choice>
205                        <xsd:element name="accessMethod" type="xsd:string"/>
206                        <xsd:element name="contact" type="mdod:geniContactType" minOccurs="0"/>
207                </xsd:sequence>
208        </xsd:complexType>
209       
210        <xsd:simpleType name="locatorScopeType">
211                <xsd:annotation>
212                        <xsd:documentation>
213                                The original MDOD schema had three options for the scope of the locator:
214                                global, per_association, and within_holder.  Are these needed? If path
215                                based it's local and if URL based it would be global.  Is there a need
216                                for other alternatives such as association? 
217                        </xsd:documentation>
218                </xsd:annotation>
219                <xsd:restriction base="xsd:string">
220                        <xsd:enumeration value="GLOBAL"/>
221                        <xsd:enumeration value="ASSOCIATION"/>
222                        <xsd:enumeration value="LOCAL"/>
223                </xsd:restriction>
224        </xsd:simpleType>
225       
226        <xsd:element name="dataCollectionTimeRange" type="mdod:dataCollectionTimeRangeType"/>
227       
228        <xsd:complexType name="dataCollectionTimeRangeType">
229                <xsd:sequence>
230                        <xsd:element name="startTime" type="xsd:dateTime"/>
231                        <xsd:element name="endTime" type="xsd:dateTime" minOccurs="0"/>
232                        <xsd:element name="frequency" type="mdod:measuredIntType" minOccurs="0"/>
233                </xsd:sequence>
234        </xsd:complexType>
235       
236
237        <xsd:complexType name="securityType">
238                <xsd:annotation>
239                        <xsd:documentation>
240                                In this draft the policy and method elements are sourced strings.  This
241                                approach would accomodate standardized policies within GENI that could be
242                                specified based on a controlled vocabulary, but the ability to express
243                                more complex policies may be desirable.
244                        </xsd:documentation>
245                </xsd:annotation>
246                <xsd:sequence>
247                        <xsd:element name="dataCollectionPolicy" type="mdod:sourcedStringType" minOccurs="0"/>
248                        <xsd:element name="encryptionMethod" type="mdod:sourcedStringType" minOccurs="0"/>
249                        <xsd:element name="anonymizationMethod" type="mdod:geniPolicyType" minOccurs="0"/>
250                        <xsd:element name="sharingMethod" type="mdod:geniPolicyType" minOccurs="0"/>
251                        <xsd:element name="disposalMethod" type="mdod:geniPolicyType" minOccurs="0"/>
252                </xsd:sequence>
253        </xsd:complexType>
254       
255        <xsd:complexType name="dataDescriptionType">
256                <xsd:annotation>
257                        <xsd:documentation>
258                                The MDOD can describe both measurements and transformations such
259                                as the analysis or a presentation generated from the measurement
260                                data, or even an external publication of the results.
261                                If these different types are described by fundementally different
262                                metadata, the dataDescription should contain additional alternatives
263                                other than the measurement event or analysis event.
264                        </xsd:documentation>
265                </xsd:annotation>
266                <xsd:sequence>
267                        <xsd:choice>
268                                <xsd:element ref="mdod:measurementEvent" />
269                                <xsd:element ref="mdod:analysisEvent" />
270                        </xsd:choice>
271                </xsd:sequence>
272        </xsd:complexType>
273       
274        <xsd:element name="measurementEvent" type="mdod:measurementEventType"/>
275       
276        <xsd:complexType name="measurementEventType">
277                <xsd:annotation>
278                        <xsd:documentation>
279                                A prior version of the measurement event was based on the MDOD
280                                version 0.2.1 draft discussed at GEC11.  There was discussion as
281                                to whether it should be extended to capture different measurement
282                                tools and vendor extentions that allow for future development.
283                               
284                                Initial versions included more detailed elements such as flowrate and size. 
285                                The MDOD and dataDescriptor should describe what was captured, not
286                                the measurements themselves, so do we need that level of detail?
287                               
288                                Interpretation method is included as a string based on a controlled vocabulary
289                                which is the source.  Do we need to extend this further to be machine readable?
290                                Would the interpretation method need to be able to specify configuration parameters?
291                        </xsd:documentation>
292                </xsd:annotation>
293                <xsd:sequence>
294                        <xsd:element name="category" type="mdod:sourcedStringType"/>
295                        <xsd:element name="format" type="mdod:sourcedStringType"/>
296                        <xsd:element name="interpretationMethod" type="mdod:sourcedStringType"/>
297                        <xsd:element ref="mdod:measurementParameter" minOccurs="0" maxOccurs="unbounded"/>
298                </xsd:sequence>
299        </xsd:complexType>
300       
301        <xsd:element name="measurementParameter" type="mdod:measurementParameterType"/>
302       
303        <xsd:complexType name="measurementParameterType">
304                <xsd:sequence>
305                        <xsd:element name="name" type="mdod:sourcedStringType"/>
306                        <xsd:element name="dataType" type="mdod:sourcedStringType"/>
307                        <xsd:element name="uom" type="mdod:sourcedStringType"/>
308                </xsd:sequence>
309        </xsd:complexType>
310       
311        <xsd:element name="analysisEvent" type="mdod:analysisEventType"/>
312       
313        <xsd:complexType name="analysisEventType">
314                <xsd:annotation>
315                        <xsd:documentation>
316                                The analysis even would capture derived products such as an
317                                analysis based on measurement data or a presentation. 
318                        </xsd:documentation>
319                </xsd:annotation>
320                <xsd:sequence>
321                        <xsd:element name="category" type="mdod:sourcedStringType"/>
322                        <xsd:element name="format" type="mdod:sourcedStringType"/>
323                        <!-- Need to determine what metadata would describe derived data products -->
324                </xsd:sequence>
325        </xsd:complexType>
326       
327       
328        <xsd:complexType name="mdodReferenceType">
329                <xsd:annotation>
330                        <xsd:documentation>
331                                The path based reference should include the ID of the referenced MDOD.
332                        </xsd:documentation>
333                </xsd:annotation>
334                <xsd:sequence>
335                        <xsd:choice>
336                                <xsd:element name="doi" type="doi:doiName"/>
337                                <xsd:element name="mdodId" type="xsd:string"/>
338                                <xsd:element name="mdodPath" type="xsd:string"/>
339                        </xsd:choice>
340                </xsd:sequence>
341        </xsd:complexType>
342       
343        <!-- ********** Reused Types ********** -->
344        <xsd:complexType name="geniContactType">
345                <xsd:annotation>
346                        <xsd:documentation>
347                                Is there an ID available to identify users in GENI?
348                                Should there be a project ID or other association?
349                                Do we need an enum for type of contact (e.g., user, operator, aggregate provider)
350                        </xsd:documentation>
351                </xsd:annotation>
352                <xsd:sequence>
353                        <xsd:element name="userName" type="xsd:string"/>
354                        <xsd:element name="organization" type="mdod:temporallyBoundLabelType"
355                                minOccurs="0" maxOccurs="unbounded"/>
356                        <xsd:element name="phone" type="mdod:temporallyBoundLabelType"
357                                minOccurs="0" maxOccurs="unbounded"/>
358                        <xsd:element name="email" type="mdod:temporallyBoundLabelType"
359                                minOccurs="0" maxOccurs="unbounded"/>
360                </xsd:sequence>
361        </xsd:complexType>
362       
363       
364        <xsd:complexType name="geniPolicyType">
365                <xsd:annotation>
366                        <xsd:documentation>
367                                Policies in the draft MDOD from GEC11 had an enum as to whether there is a policy
368                                and an accommpanying optional description.  Instead of a description, should
369                                users be able to provide a URL where the poliiy is?  The use of a URL to the
370                                policy instead of the policy itself.
371                                </xsd:documentation>
372                </xsd:annotation>
373                <xsd:sequence>
374                        <xsd:element name="policyApplication" type="mdod:policyApplicationType"/>
375                        <xsd:element name="policyDescription" type="xsd:string" minOccurs="0"/>
376                        <xsd:element ref="mdod:policyReference" minOccurs="0"/>
377                </xsd:sequence>
378        </xsd:complexType>
379       
380        <xsd:simpleType name="policyApplicationType">
381                <xsd:restriction base="xsd:string">
382                        <xsd:enumeration value="YES"/>
383                        <xsd:enumeration value="YES_INHERITED"/>
384                        <xsd:enumeration value="NOT_REQUIRED"/>
385                </xsd:restriction>
386        </xsd:simpleType>
387       
388        <xsd:element name="policyReference" type="mdod:policyReferenceType"/>
389       
390        <xsd:complexType name="policyReferenceType">
391                <xsd:annotation>
392                        <xsd:documentation>
393                                A version element is included in case a policy URL only reflects the
394                                current policy, or contains multiple versions of a policy.
395                        </xsd:documentation>
396                </xsd:annotation>       
397                <xsd:sequence>
398                        <xsd:element name="policyUrl" type="xsd:anyURI"/>
399                        <xsd:element name="version" type="xsd:string" minOccurs="0"/>
400                </xsd:sequence>
401        </xsd:complexType>
402       
403        <!-- ********** Basic Reused Types ********** -->
404       
405        <xsd:complexType name="measuredIntType">
406                <xsd:simpleContent>
407                        <xsd:extension base="xsd:int">
408                                <xsd:attribute name="uom" type="xsd:string" use="required"/>
409                        </xsd:extension>
410                </xsd:simpleContent>
411        </xsd:complexType>
412       
413        <xsd:complexType name="measuredDoubleType">
414                <xsd:simpleContent>
415                        <xsd:extension base="xsd:double">
416                                <xsd:attribute name="uom" type="xsd:string" use="required"/>
417                        </xsd:extension>
418                </xsd:simpleContent>
419        </xsd:complexType>
420       
421        <xsd:complexType name="temporallyBoundLabelType">
422                <xsd:simpleContent>
423                        <xsd:extension base="xsd:string">
424                                <xsd:attribute name="startDate" type="xsd:date" use="optional"/>
425                                <xsd:attribute name="endDate" type="xsd:date" use="optional"/>
426                        </xsd:extension>
427                </xsd:simpleContent>
428        </xsd:complexType>
429       
430        <xsd:complexType name="sourcedStringType">
431                <xsd:annotation>
432                        <xsd:documentation>
433                                The sourced string type allows keywords, data types, and
434                                other elements in the schema to be specified based on
435                                cotrolled vocabularies created by communities internal
436                                or external to GENI.  To the extent that a value is based
437                                on a controlled vocabulary, the defining source, prefferably
438                                a resolvable URI, would be included.
439                                If not based on a controlled vocabulary, should the
440                                attribute instead be optional or should the value "NONE" be
441                                used as the source.
442                        </xsd:documentation>
443                </xsd:annotation>       
444                <xsd:simpleContent>
445                        <xsd:extension base="xsd:string">
446                                <xsd:attribute name="source" type="xsd:string" use="required"/>
447                        </xsd:extension>
448                </xsd:simpleContent>
449        </xsd:complexType>
450       
451</xsd:schema>