Fedora-Proai Integration

The default configuration of Proai is such that it periodically polls Fedora for new content. This is not very efficient, so I disabled that and write directly to Proai's internal database.

<camelContext id="proai-routes" xmlns="http://camel.apache.org/schema/spring"
    xmlns:sparql="http://www.w3.org/2001/sw/DataAccess/rf1/result">
 
  <route id="proai-delete">
    <description>Delete an item from Proai</description>
    <from uri="vm:oai.delete"/>
    <log message="removing ${header.pid} from proai database"/>
    <setBody>
      <simple>
        DELETE r, i, m
        FROM rcItem AS i
        INNER JOIN rcRecord AS r
          ON i.itemKey = r.itemKey
        INNER JOIN rcMembership AS m
          ON r.recordKey = m.recordKey
        WHERE i.identifier = 'oai:acdc.amherst.edu:${header.pid}'
      </simple>
    </setBody>
    <to uri="jdbc:proaiDS"/>
  </route>
 
  <route id="proai-update">
    <description>Retrieve the relevant collections for this object</description>
    <from uri="vm:oai.update"/>
    <setHeader headerName="Exchange.HTTP_METHOD">
      <constant>GET</constant>
    </setHeader>
    <setHeader headerName="Exchange.HTTP_QUERY">
       <simple>type=tuples&amp;lang=Sparql&amp;format=Sparql&amp;query=SELECT%20%3Fspec%20WHERE%20%7B%3Cfedora%3A${header.pid}%3E%20%3Cfedora-rels-ext%3AisMemberOfCollection%3E%20%3Fcoll%20.%20%3Fcoll%20%3Chttp%3A%2F%2Fwww.openarchives.org%2FOAI%2F2.0%2FsetSpec%3E%20%3Fspec%20.%7D</simple>
    </setHeader>
    <setHeader headerName="Exchange.HTTP_PATH">
      <constant>/fedora/risearch</constant>
    </setHeader>
    <to uri="http4://fedora-host:8080/?authUsername=...&amp;authPassword=..."/>
    <convertBodyTo type="org.w3c.dom.Document"/>
    <split>
      <xpath>/sparql:sparql/sparql:results/sparql:result</xpath>
      <setHeader headerName="collection">
        <xpath>/sparql:result/sparql:spec/text()</xpath>
      </setHeader>
      <multicast>
        <to uri="seda:proai.oai_dc"/>
        <to uri="seda:proai.mods"/>
      </multicast>
    </split>
  </route>
 
  <route id="proai-oai_dc">
    <description>Format a message for OAI-DC metadata</description>
    <from uri="seda:proai.oai_dc"/>
    <setHeader headerName="mdPrefix">
      <constant>oai_dc</constant>
    </setHeader>
    <setHeader headerName="sourceInfo">
      <simple>info:fedora/${header.pid}/amherst:objectSDef/getDC null false ${date:now:yyyy-MM-dd'T'HH:mm:ss'Z'} ${header.collection}</simple>
    </setHeader>
    <to uri="seda:proai.sql"/>
  </route>
 
  <route id="proai-mods">
    <description>Format a message for MODS metadata</description>
    <from uri="seda:proai.mods"/>
    <setHeader headerName="mdPrefix">
      <constant>mods</constant>
    </setHeader>
    <setHeader headerName="sourceInfo">
      <simple>info:fedora/${header.pid}/MODS null false ${date:now:yyyy-MM-dd'T'HH:mm:ss'Z'} ${header.collection}</simple>
    </setHeader>
    <to uri="seda:proai.sql"/>
  </route>
 
  <route id="proai-sql">
    <description>Send a message to the Proai queue</description>
    <from uri="seda:proai.sql"/>
    <setBody>
      <simple>
        INSERT INTO rcQueue (identifier, mdPrefix, sourceInfo, queueSource)
        VALUES ('${header.pid}', '${header.mdPrefix}', '${header.sourceInfo}', 'R')
      </simple>
    </setBody>
    <to uri="jdbc:proaiDS"/>
    <log message="inserting into proai database for ${header.pid}/${header.mdPrefix}/${header.collection}"/>
  </route>
</camelContext>
 
<bean id="proaiDS" class="com.mysql.jdbc.jdbc2.optional.MysqlConnectionPoolDataSource">
  <property name="url" value="jdbc:mysql://proai-host:3306/proai"/>
  <property name="user" value="..."/>
  <property name="password" value="..."/>
</bean>
proai-routes.txt · Last modified: 2013/03/26 12:48 by acoburn@amherst.edu
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Share Alike 4.0 International