Skip to topic | Skip to bottom

Provenance Challenge

Challenge
Challenge.VisTrails3

Start of topic | Skip to actions

Provenance Challenge: VisTrails, University of Utah

Participating Team

Team and Project Details

Workflow Representation

Workflow Version 1 | png format
[PNG] | [PDF]

Workflow Version 2 | png format
[PNG] | [PDF]

Workflow Version 3 | png format
[PNG] | [PDF]

Open Provenance Model Output

To generate OPM output, we combined information from the execution log with the workflow specification and the module registry. We have included all pieces of this output as xml files including the original vistrail. The OPM output is generated from a run of version 121 of the workflow, and the wf file contains that version.

We also have versions of the log and OPM translation when the IsMatchTableRowCount method returned False:

Note that the values of processes and artifacts are VisTrails entities. We include 3 different accounts of the workflow in our OPM output. The first shows a Map function that processes a ListOfElements in order to loop over all of the CSVFileEntries; the second shows each iteration where each iteration is a Group (subworkflow) of modules; and the third shows the iterations with the interior steps.

OPM Account 0 (png format)
[PNG] [PDF]
OPM Account 1 (png format)
[PNG] [PDF]
OPM Account 2 (png format)
[PNG] [PDF]

The same workflow, but halting with the IsMatchTableRowCount check for the second table. Again, we have three accounts:

OPM Account 0 (png format)
[PNG] [PDF]
OPM Account 1 (png format)
[PNG] [PDF]
OPM Account 2 (png format)
[PNG] [PDF]

Query Results

We used XQuery to query the XML specifications. upstream/downstream queries were implemented using recursive functions as follows:

(: derivedFrom* - artefacts upstream :)
declare function local:derivedFrom($d, $a as element(opm:artifact))
   as element(opm:artifact)* {
   let $a1 := for $used in $d//opm:used[opm:effect/@id=$d//opm:process[@id=$d//
                               opm:wasGeneratedBy[opm:effect/@id=$a/@id]/opm:cause/@id]/@id],
          $artifact2 in $d//opm:artifact
              where $used/opm:cause/@id=$artifact2/@id
        return $artifact2
   let $a2 := for $a3 in $a1 return local:derivedFrom($d, $a3)
    return $a1 | $a2
};

(: triggeredBy* - processes upstream :)
declare function local:triggeredBy($d, $p as element(opm:process))
   as element(opm:process)* {
   let $p1 := for $wasGeneratedBy in $d//opm:wasGeneratedBy[opm:effect/@id=$d//opm:artifact[@id=$d//
                                         opm:used[opm:effect/@id=$p/@id]/opm:cause/@id]/@id],
          $process2 in $d//opm:process
              where $wasGeneratedBy/opm:cause/@id=$process2/@id
        return $process2
   let $p2 := for $p3 in $p1 return local:triggeredBy($d, $p3)
    return $p1 | $p2
};

(: triggers* - processes downstream :)
declare function local:triggers($d, $p as element(opm:process))
   as element(opm:process)* {
   let $p1 := for $used in $d//opm:used[opm:cause/@id=$d//opm:artifact[@id=$d//
                               opm:wasGeneratedBy[opm:cause/@id=$p/@id]/opm:effect/@id]/@id],
          $process2 in $d//opm:process
              where $used/opm:effect/@id=$process2/@id
        return $process2
   let $p2 := for $p3 in $p1 return local:triggers($d, $p3)
    return $p1 | $p2
};

Vistrails

Query 1

declare namespace opm='http://openprovenance.org/model/v1.01.a';
let $d := doc('workflow_opm2.xml')

(: The user must first find the detection value in the DB. That table is used to find the artifact :)
let $a := $d//artifact[value/function/parameter/@val = 'P2Detection']

(: return all artifacts upstream containing a P2Detection.csv file :)
return local:derivedFrom($d, $a)[ends-with(value/function/parameter/@val,'P2Detection.csv')]
            
(:
result:
<?xml version="1.0" encoding="UTF-8"?>
<artifact id="a34">
      <value>
        <function id="-1" name="file" pos="0">
          <parameter alias="" id="-1" name="" pos="0" type="edu.utah.sci.vistrails.basic:File"
                    val="/vistrails/pc3/SampleData/J062945/P2_J062945_B001_P2fits0_20081115_P2Detection.csv"/>
        </function>
      </value>
      <account id="acct2"/>
    </artifact>
:)

Query 2

let $d := doc('workflow_opm2.xml')

(: Get the artifacts containing a database entry :)
let $artifacts := $d//artifact[value/function/parameter/@val = 'P2Detection']

(: Get the processes of type LoadCSVFileIntoDB :)
let $processes := $d//process[value/moduleExec/@moduleName='LoadCSVFileIntoDB']

(: Get a process that produces such a artifact :)
let $loadProcess := for $artifact in $artifacts, $wasGeneratedBy in $d//wasGeneratedBy, $process in $processes
               where $wasGeneratedBy/cause/@id = $process/@id and $wasGeneratedBy/effect/@id = $artifact/@id
               return $process
            
(: Get all processes downstream :)
let $triggered := local:triggers($d, $loadProcess)
            
(: see if any descendant processes were ComputeColumns :)
(: Processes that failed are excluded :)
let $found := (for $process in $triggered
               where $process/value/moduleExec/@moduleName = "ComputeColumns"
            and $process/value/moduleExec/@error = ""
               return $process)

return if(count($found) = 0) then "no" else "yes"

(:
result:
<?xml version="1.0" encoding="UTF-8"?>yes
:)

Query 3

let $d := doc('workflow_opm2.xml')

(: Get the artifacts containing a image database entry with specific value J062945_LoadDB:)
let $artifacts := $d//artifact[value/function/parameter/@val = 'J062945_LoadDB'
 and value/function/parameter/@val = 'P2ImageMeta']

(: Get the process that produces such a artifact :)
let $loadProcess := for $artifact in $artifacts, $wasGeneratedBy in $d//wasGeneratedBy, $process in $d//process
               where $wasGeneratedBy/cause/@id = $process/@id and $wasGeneratedBy/effect/@id = $artifact/@id
               return $process

let $triggeredBy := local:triggeredBy($d, $loadProcess)

return $triggeredBy
            
(:
Result: (no way of removing results that does not affect the value)

<?xml version="1.0" encoding="UTF-8"?>
<process id="p0">
      <value>
        <moduleExec cached="0" completed="1" error="" id="1" machine_id="1" moduleId="12"
                  moduleName="String"
                  tsEnd="2009-05-14 16:51:46"
                  tsStart="2009-05-14 16:51:46"/>
      </value>
      <account id="acct0"/>
      <account id="acct1"/>
      <account id="acct2"/>
    </process>
<process id="p1">
      <value>
        <moduleExec cached="0" completed="1" error="" id="2" machine_id="1" moduleId="13"
                  moduleName="ConcatenateString"
                  tsEnd="2009-05-14 16:51:46"
                  tsStart="2009-05-14 16:51:46"/>
      </value>
      <account id="acct0"/>
      <account id="acct1"/>
      <account id="acct2"/>
    </process>
<process id="p2">
      <value>
        <moduleExec cached="0" completed="1" error="" id="3" machine_id="1" moduleId="2"
                  moduleName="GetCSVFiles"
                  tsEnd="2009-05-14 16:51:46"
                  tsStart="2009-05-14 16:51:46">
          <annotation id="1" key="used_files"
                     value="['/vistrails/pc3/SampleData/J062945/csv_ready.csv']"/>
        </moduleExec>
      </value>
      <account id="acct0"/>
      <account id="acct1"/>
      <account id="acct2"/>
    </process>
<process id="p3">
      <value>
        <moduleExec cached="0" completed="1" error="" id="4" machine_id="1" moduleId="4"
                  moduleName="CreateEmptyLoadDB"
                  tsEnd="2009-05-14 16:51:46"
                  tsStart="2009-05-14 16:51:46"/>
      </value>
      <account id="acct0"/>
      <account id="acct1"/>
      <account id="acct2"/>
    </process>
<process id="p11">
      <value>
        <moduleExec cached="" completed="1" error="" id="-1" machine_id="" moduleId="-1"
                  moduleName="Split"
                  tsEnd=""
                  tsStart=""/>
      </value>
      <account id="acct1"/>
      <account id="acct2"/>
    </process>
<process id="p21">
      <value>
        <moduleExec cached="0" completed="1" error="" id="16" machine_id="1" moduleId="5"
                  moduleName="InputPort"
                  tsEnd="2009-05-14 16:51:46"
                  tsStart="2009-05-14 16:51:46"/>
      </value>
      <account id="acct2"/>
    </process>
<process id="p22">
      <value>
        <moduleExec cached="0" completed="1" error="" id="17" machine_id="1" moduleId="4"
                  moduleName="InputPort"
                  tsEnd="2009-05-14 16:51:46"
                  tsStart="2009-05-14 16:51:46"/>
      </value>
      <account id="acct2"/>
    </process>
<process id="p23">
      <value>
        <moduleExec cached="0" completed="1" error="" id="18" machine_id="1" moduleId="3"
                  moduleName="ReadCSVFile"
                  tsEnd="2009-05-14 16:51:46"
                  tsStart="2009-05-14 16:51:46">
          <annotation id="6" key="used_files"
                     value="['/vistrails/pc3/SampleData/J062945/P2_J062945_B001_P2fits0_20081115_P2ImageMeta.csv.hdr']"/>
        </moduleExec>
      </value>
      <account id="acct2"/>
    </process>
:)

SDSC

Query 1

let $d := doc('sdsc.xml')

(: This is the detection value :)
let $s := '261887437030025141'

(: Find the artifact containing it :)
let $a := $d//opm:artifact[contains(.,$s)]

(: Find the artifact upstream containing the csv file:)
let $result := for $artifact in local:derivedFrom($d, $a), $used in $d//opm:used
               where $used/opm:role/@value = "FileEntry" and $used/opm:cause/@id = $artifact/@id
               return $artifact
return $d//opm:artifact[@id=$result/@id]

(:
result:
<artifact xmlns="http://openprovenance.org/model/v1.01.a" id="_a54">
  <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xmlns:xs="http://www.w3.org/2001/XMLSchema"
          xsi:type="xs:string">
        {Checksum = "f8f9d70711cb3a1cb8b359d99d98fa63", ColumnNames =
        {"objID", "detectID", "ippObjID", "ippDetectID", "filterID",
        "imageID", "obsTime", "xPos", "yPos", "xPosErr", "yPosErr",
        "instFlux", "instFluxErr", "psfWidMajor", "psfWidMinor", "psfTheta",
        "psfLikelihood", "psfCf", "infoFlag", "htmID", "zoneID",
        "assocDate", "modNum", "ra", "dec", "raErr", "decErr", "cx", "cy",
        "cz", "peakFlux", "calMag", "calMagErr", "calFlux", "calFluxErr",
        "calColor", "calColorErr", "sky", "skyErr", "sgSep", "dataRelease"},
        FilePath = "pc3/workflows/data/J062941/P2_J062941_B001_P2fits0_20081115_P2Detection.csv",
        HeaderPath = "pc3/workflows/data/J062941/P2_J062941_B001_P2fits0_20081115_P2Detection.csv.hdr",
        RowCount = 20, TargetTable = "P2Detection"}
  </value>
</artifact>
:)

Query 2

let $d := doc('sdsc.xml')

(: Get the artifacts containing a database entry :)
let $artifacts := $d//opm:artifact[contains(.,'TargetTable = "P2Detection"')]

(: Get the processes of type LoadCSVFileIntoTable :)
let $processes := $d//opm:process[contains(.,'LoadCSVFileIntoTable')]

(: Get the load process that uses such a artifact :)
let $loadProcess := for $artifact in $artifacts, $used in $d//opm:used, $process in $processes
               where $used/opm:effect/@id = $process/@id and $used/opm:cause/@id = $artifact/@id
               return $process
            
(: Get all processes downstream :)
let $triggered := local:triggers($d, $loadProcess)
            
(: get any descendant processes that are IsMatchTableColumnRanges :)
let $found := (for $process in $triggered
               where contains($process, "IsMatchTableColumnRanges")
               return $process)

return if(count($found) = 0) then "no" else "yes"

(:
result:
<?xml version="1.0" encoding="UTF-8"?>yes
:)

Query 3

let $d := doc('sdsc.xml')

(: Get the artifacts containing a database entry :)
let $artifacts := $d//opm:artifact[contains(.,'TargetTable = "P2ImageMeta"')]

(: Get the processes of type LoadCSVFileIntoTable :)
let $processes := $d//opm:process[contains(.,'LoadCSVFileIntoTable')]

(: Get the load process that uses such a artifact :)
let $loadProcess := for $artifact in $artifacts, $used in $d//opm:used, $process in $processes
               where $used/opm:effect/@id = $process/@id and $used/opm:cause/@id = $artifact/@id
               return $process

let $triggeredBy := local:triggeredBy($d, $loadProcess)

return $triggeredBy
            
(:
result: (not consistent with sdsc results!) (no way of removing results that does not affect the value)

<?xml version="1.0" encoding="UTF-8"?>
<process xmlns="http://openprovenance.org/model/v1.01.a" id="_p0">
            <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xmlns:xs="http://www.w3.org/2001/XMLSchema"
          xsi:type="xs:string">.load.IsCSVReadyFileExists fire 0</value>
        </process>
<process xmlns="http://openprovenance.org/model/v1.01.a" id="_p1">
            <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xmlns:xs="http://www.w3.org/2001/XMLSchema"
          xsi:type="xs:string">.load.StopOnFalse fire 0</value>
        </process>
<process xmlns="http://openprovenance.org/model/v1.01.a" id="_p2">
            <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xmlns:xs="http://www.w3.org/2001/XMLSchema"
          xsi:type="xs:string">.load.ReadCSVReadyFile fire 0</value>
        </process>
<process xmlns="http://openprovenance.org/model/v1.01.a" id="_p3">
            <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xmlns:xs="http://www.w3.org/2001/XMLSchema"
          xsi:type="xs:string">.load.IsMatchCSVFileTables fire 0</value>
        </process>
<process xmlns="http://openprovenance.org/model/v1.01.a" id="_p4">
            <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xmlns:xs="http://www.w3.org/2001/XMLSchema"
          xsi:type="xs:string">.load.StopOnFalse2 fire 0</value>
        </process>
<process xmlns="http://openprovenance.org/model/v1.01.a" id="_p5">
            <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xmlns:xs="http://www.w3.org/2001/XMLSchema"
          xsi:type="xs:string">.load.CreateEmptyLoadDB fire 0</value>
        </process>
<process xmlns="http://openprovenance.org/model/v1.01.a" id="_p6">
            <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xmlns:xs="http://www.w3.org/2001/XMLSchema"
          xsi:type="xs:string">.load.Array Permute fire 0</value>
        </process>
<process xmlns="http://openprovenance.org/model/v1.01.a" id="_p8">
            <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xmlns:xs="http://www.w3.org/2001/XMLSchema"
          xsi:type="xs:string">.load.ForEach.in fire 0</value>
        </process>
<process xmlns="http://openprovenance.org/model/v1.01.a" id="_p27">
            <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xmlns:xs="http://www.w3.org/2001/XMLSchema"
          xsi:type="xs:string">.load.ForEach.CompositeActor.in fire 3</value>
        </process>
<process xmlns="http://openprovenance.org/model/v1.01.a" id="_p28">
            <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xmlns:xs="http://www.w3.org/2001/XMLSchema"
          xsi:type="xs:string">.load.ForEach.CompositeActor.Record Disassembler fire 1</value>
        </process>
<process xmlns="http://openprovenance.org/model/v1.01.a" id="_p29">
            <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xmlns:xs="http://www.w3.org/2001/XMLSchema"
          xsi:type="xs:string">.load.ForEach.CompositeActor.IsExistsCSVFile fire 1</value>
        </process>
<process xmlns="http://openprovenance.org/model/v1.01.a" id="_p30">
            <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xmlns:xs="http://www.w3.org/2001/XMLSchema"
          xsi:type="xs:string">.load.ForEach.CompositeActor.StopOnFalse fire 1</value>
        </process>
<process xmlns="http://openprovenance.org/model/v1.01.a" id="_p31">
            <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xmlns:xs="http://www.w3.org/2001/XMLSchema"
          xsi:type="xs:string">.load.ForEach.CompositeActor.ReadCSVFileColumnNames fire 1</value>
        </process>
<process xmlns="http://openprovenance.org/model/v1.01.a" id="_p32">
            <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xmlns:xs="http://www.w3.org/2001/XMLSchema"
          xsi:type="xs:string">.load.ForEach.CompositeActor.IsMatchCSVFileColumnNames fire 1</value>
        </process>
<process xmlns="http://openprovenance.org/model/v1.01.a" id="_p33">
            <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xmlns:xs="http://www.w3.org/2001/XMLSchema"
          xsi:type="xs:string">.load.ForEach.CompositeActor.StopOnFalse2 fire 1</value>
        </process>
:)

Suggested Workflow Variants

Suggested Queries

Suggestions for Modification of the Open Provenance Model

Conclusions

-- JulianaFreire - 02 Apr 2009
to top


End of topic
Skip to action links | Back to top

I Attachment sort Action Size Date Who Comment
wf_clean.png manage 63.9 K 08 Apr 2009 - 23:08 JulianaFreire  
wf_ifs.png manage 178.6 K 08 Apr 2009 - 23:09 JulianaFreire  
wf_clean.pdf manage 285.0 K 08 Apr 2009 - 23:09 JulianaFreire  
wf_ifs.pdf manage 370.4 K 08 Apr 2009 - 23:09 JulianaFreire  
workflow_vt.xml manage 75.2 K 15 May 2009 - 06:01 JulianaFreire  
workflow_wf.xml manage 12.5 K 11 May 2009 - 21:44 JulianaFreire  
workflow_log.xml manage 6.5 K 11 May 2009 - 21:44 JulianaFreire  
workflow_reg.xml manage 34.6 K 11 May 2009 - 21:44 JulianaFreire  
workflow_opm.xml manage 55.9 K 29 May 2009 - 15:24 JulianaFreire  
pc3acct0.pdf manage 36.0 K 15 May 2009 - 16:12 JulianaFreire  
pc3acct0.png manage 74.7 K 15 May 2009 - 20:38 JulianaFreire  
pc3acct1.pdf manage 37.2 K 15 May 2009 - 16:13 JulianaFreire  
pc3acct1.png manage 86.0 K 15 May 2009 - 20:38 JulianaFreire  
pc3acct2.pdf manage 41.8 K 15 May 2009 - 16:13 JulianaFreire  
pc3acct2.png manage 172.0 K 15 May 2009 - 20:38 JulianaFreire  
workflow_opm2.xml manage 55.9 K 29 May 2009 - 15:26 JulianaFreire  
workflow_log2.xml manage 10.5 K 15 May 2009 - 06:02 JulianaFreire  
workflow_opm_err.xml manage 32.0 K 28 May 2009 - 17:39 JulianaFreire  
workflow_log_err.xml manage 6.2 K 15 May 2009 - 06:03 JulianaFreire  
pc3err_acct0.pdf manage 30.3 K 15 May 2009 - 16:15 JulianaFreire  
pc3err_acct0.png manage 26.8 K 15 May 2009 - 20:39 JulianaFreire  
pc3err_acct1.pdf manage 31.0 K 15 May 2009 - 16:15 JulianaFreire  
pc3err_acct1.png manage 34.9 K 15 May 2009 - 20:39 JulianaFreire  
pc3err_acct2.pdf manage 34.9 K 15 May 2009 - 16:16 JulianaFreire  
pc3err_acct2.png manage 87.0 K 15 May 2009 - 20:40 JulianaFreire  
wf_hist_full.pdf manage 99.4 K 15 May 2009 - 19:46 JulianaFreire  
wf_hist_full.png manage 57.0 K 15 May 2009 - 19:47 JulianaFreire  
presntation.pdf manage 827.9 K 27 Jul 2009 - 21:19 JulianaFreire  
presentation.pdf manage 827.9 K 27 Jul 2009 - 21:20 JulianaFreire  

You are here: Challenge > ThirdProvenanceChallenge > ParticipatingTeams3 > VisTrails3

to top

Copyright © 1999-2012 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.