nosqlmarklogictriplesrdf-xml

Graph data model to transform XML to RDF


I am engineering a semantic graph system.

Raw /projects.xml

  <projects>
     <project>
        <projectID>123859</projectID>
        <program> Economic Development Fund</program>
        <recipient>ABC, Inc</recipient>
        <projectName> ABC, Inc Innovation Cluster </projectName>
        <projectDescription>Grant and Credit; Scientific, and Technical Services;G13; ABC, Inc </projectDescription>
        <postalCode>60311</postalCode>
        <region>G13</region>
        <industry>Scientific, and Technical Services</industry>
        <startDate>2021-08-17T00:00:00</startDate>
        <incentiveType>Grant and Credit</incentiveType>
        <totalAwarded>13,315,259</totalAwarded>
        <totalInvestment>175,991,971</totalInvestment>
        <disbursementsToDate>7,000,000</disbursementsToDate>
 </project>
======================================
<project>
++++++++++++++++++++++++++++++++++++++
</project>
 
  </projects>

Is it possible to transform such XML to graph document? If yes, I need the workable RDF and the transformed triples example. Thanks in advance!


Solution

  • Please review these helpful references:

    Valid RDF triples can be stored in any graph databases.

    Example of MarkLogic supported RDF XML data model:

    <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
          xmlns:spi="http://my.semantics/projects/incentive/"
          xmlns:sbase="http://my.semantics/projects/"
          xmlns:onto="http://my.semantics/projects/ontology/"
          xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
      <rdf:Description rdf:about="http://my.semantics/projects/incentive/123859"
              sbase:program="Economic Development Fund"
              sbase:projectID="123859"
              sbase:projectName="ABC, Inc Innovation Cluster "
              sbase:recipient="ABC, Inc">
        <sbase:about rdf:resource="http://my.semantics/ontology/economy"/>
        <spi:postalCode>60311</spi:postalCode>
        <spi:region>G13</spi:region>
        <spi:industry>Scientific, and Technical Services</spi:industry>
        <spi:incentiveType>Grant and Credit</spi:incentiveType>
        <spi:startDate rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2021-08-17T00:00:00</spi:startDate>
        <spi:totalAwarded rdf:datatype="http://www.w3.org/2001/XMLSchema#decimal">13,315,259</spi:totalAwarded>   
        <spi:totalInvestment rdf:datatype="http://www.w3.org/2001/XMLSchema#decimal">175,991,971</spi:totalInvestment>
        <spi:disbursementsToDate rdf:datatype="http://www.w3.org/2001/XMLSchema#decimal">7,000,000</spi:disbursementsToDate>
        <rdfs:comment xml:lang="en">Grant and Credit; Scientific, and Technical Services;G13; ABC, Inc </rdfs:comment>
      </rdf:Description>
    </rdf:RDF>
    

    Once the RDF data model is correctly constructed, the triples transformation is straightforward.

    Transformed and ingested MarkLogic triples based on above model:

    <?xml version="1.0" encoding="UTF-8"?>
    <sem:triples xmlns:sem="http://marklogic.com/semantics">
       <sem:triple>
          <sem:subject>http://my.semantics/projects/incentive/123859</sem:subject>
          <sem:predicate>http://my.semantics/projects/program</sem:predicate>
          <sem:object datatype="http://www.w3.org/2001/XMLSchema#string">Economic Development Fund</sem:object>
       </sem:triple>
       <sem:triple>
          <sem:subject>http://my.semantics/projects/incentive/123859</sem:subject>
          <sem:predicate>http://my.semantics/projects/projectID</sem:predicate>
          <sem:object datatype="http://www.w3.org/2001/XMLSchema#string">123859</sem:object>
       </sem:triple>
       <sem:triple>
          <sem:subject>http://my.semantics/projects/incentive/123859</sem:subject>
          <sem:predicate>http://my.semantics/projects/projectName</sem:predicate>
          <sem:object datatype="http://www.w3.org/2001/XMLSchema#string">ABC, Inc Innovation Cluster</sem:object>
       </sem:triple>
       <sem:triple>
          <sem:subject>http://my.semantics/projects/incentive/123859</sem:subject>
          <sem:predicate>http://my.semantics/projects/recipient</sem:predicate>
          <sem:object datatype="http://www.w3.org/2001/XMLSchema#string">ABC, Inc</sem:object>
       </sem:triple>
       <sem:triple>
          <sem:subject>http://my.semantics/projects/incentive/123859</sem:subject>
          <sem:predicate>http://my.semantics/projects/about</sem:predicate>
          <sem:object>http://my.semantics/ontology/economy</sem:object>
       </sem:triple>
       <sem:triple>
          <sem:subject>http://my.semantics/projects/incentive/123859</sem:subject>
          <sem:predicate>http://my.semantics/projects/incentive/postalCode</sem:predicate>
          <sem:object datatype="http://www.w3.org/2001/XMLSchema#string">60311</sem:object>
       </sem:triple>
       <sem:triple>
          <sem:subject>http://my.semantics/projects/incentive/123859</sem:subject>
          <sem:predicate>http://my.semantics/projects/incentive/region</sem:predicate>
          <sem:object datatype="http://www.w3.org/2001/XMLSchema#string">G13</sem:object>
       </sem:triple>
       <sem:triple>
          <sem:subject>http://my.semantics/projects/incentive/123859</sem:subject>
          <sem:predicate>http://my.semantics/projects/incentive/industry</sem:predicate>
          <sem:object datatype="http://www.w3.org/2001/XMLSchema#string">Scientific, and Technical Services</sem:object>
       </sem:triple>
       <sem:triple>
          <sem:subject>http://my.semantics/projects/incentive/123859</sem:subject>
          <sem:predicate>http://my.semantics/projects/incentive/incentiveType</sem:predicate>
          <sem:object datatype="http://www.w3.org/2001/XMLSchema#string">Grant and Credit</sem:object>
       </sem:triple>
       <sem:triple>
          <sem:subject>http://my.semantics/projects/incentive/123859</sem:subject>
          <sem:predicate>http://my.semantics/projects/incentive/startDate</sem:predicate>
          <sem:object datatype="http://www.w3.org/2001/XMLSchema#dateTime">2021-08-17T00:00:00</sem:object>
       </sem:triple>
       <sem:triple>
          <sem:subject>http://my.semantics/projects/incentive/123859</sem:subject>
          <sem:predicate>http://my.semantics/projects/incentive/totalAwarded</sem:predicate>
          <sem:object datatype="http://www.w3.org/2001/XMLSchema#decimal">13,315,259</sem:object>
       </sem:triple>
       <sem:triple>
          <sem:subject>http://my.semantics/projects/incentive/123859</sem:subject>
          <sem:predicate>http://my.semantics/projects/incentive/totalInvestment</sem:predicate>
          <sem:object datatype="http://www.w3.org/2001/XMLSchema#decimal">175,991,971</sem:object>
       </sem:triple>
       <sem:triple>
          <sem:subject>http://my.semantics/projects/incentive/123859</sem:subject>
          <sem:predicate>http://my.semantics/projects/incentive/disbursementsToDate</sem:predicate>
          <sem:object datatype="http://www.w3.org/2001/XMLSchema#decimal">7,000,000</sem:object>
       </sem:triple>
       <sem:triple>
          <sem:subject>http://my.semantics/projects/incentive/123859</sem:subject>
          <sem:predicate>http://www.w3.org/2000/01/rdf-schema#comment</sem:predicate>
          <sem:object xml:lang="en">Grant and Credit; Scientific, and Technical Services;G13; ABC, Inc</sem:object>
       </sem:triple>
    </sem:triples>