20 September 2009

Xalan part 3: BerkeleyDB+XSLT+pubmed

In my previous post I showed how to call mysql from the XALAN XSLT engine. In the current post, I'll show how a custom function for XALAN can return a new DOM/XML document that will be later used by the XSLT stylesheet: To get a source of data, i'm going to create a key-value database with berkeleyDB (Java Edition) storing strings (as the key) and XML document (as the value).

The Database XMLStore


In the constructor, the BerkeleyDB environement is open, a DOM parser to parse the XML is created as well as a Transformer to serialize this XML to String.
DocumentBuilderFactory domFactory= DocumentBuilderFactory.newInstance();
domFactory.setCoalescing(true);
domFactory.setExpandEntityReferences(true);
domFactory.setIgnoringComments(true);
domFactory.setNamespaceAware(false);
domFactory.setValidating(false);
domFactory.setIgnoringElementContentWhitespace(true);
this.docBuilder= domFactory.newDocumentBuilder();

TransformerFactory tFactory=TransformerFactory.newInstance();
this.xmlSerializer=tFactory.newTransformer();
EnvironmentConfig envCfg= new EnvironmentConfig();
envCfg.setAllowCreate(true);
envCfg.setReadOnly(false);
this.env=new Environment(new File(envHome), envCfg);
DatabaseConfig cfg= new DatabaseConfig();
cfg.setAllowCreate(true);
cfg.setReadOnly(false);
this.id2xml= env.openDatabase(null, "id2xml", cfg);

The class XMLStore contains a method to PUT the XML/DOM document in the database.
public OperationStatus put(String id,Document dom) throws DatabaseException
{
DatabaseEntry key=new DatabaseEntry();
DatabaseEntry data=new DatabaseEntry();
StringBinding.stringToEntry(id, key);
StringWriter w= new StringWriter();
try
{
this.xmlSerializer.transform(
new DOMSource(dom),
new StreamResult(w)
);
}
catch (TransformerException e)
{
throw new DatabaseException(e);
}
StringBinding.stringToEntry(w.toString(), data);
return this.id2xml.put(null, key, data);
}

We also need a GET method to retrieve a XML document from a given key. This will be the new document processed by the XSLT stylesheet
public Document get(String id) throws DatabaseException
{
Document dom= this.docBuilder.newDocument();
Element root= dom.createElement("Query");
root.setAttribute("key", String.valueOf(id));
dom.appendChild(root);

if(id==null)
{
root.setAttribute("status", "failure");
root.appendChild(dom.createTextNode("key is null"));
return dom;
}

DatabaseEntry key=new DatabaseEntry();
DatabaseEntry data=new DatabaseEntry();
StringBinding.stringToEntry(id, key);
if(this.id2xml.get(null, key, data, LockMode.DEFAULT)!=OperationStatus.SUCCESS)
{
root.setAttribute("status", "failure");
root.appendChild(dom.createTextNode("key not found"));
return dom;
}
try
{
Document doc = this.docBuilder.parse(new InputSource(new StringReader(StringBinding.entryToString(data))));
root.setAttribute("status", "success");
root.appendChild(dom.importNode(doc.getDocumentElement(),true));
return dom;
}
catch (Exception e)
{
throw new DatabaseException(e);
}
}

Full Source code of XMLStore.java

package test;
import java.io.File;
import java.io.StringReader;
import java.io.StringWriter;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;


import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.InputSource;


import com.sleepycat.bind.tuple.StringBinding;
import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseConfig;
import com.sleepycat.je.DatabaseEntry;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.Environment;
import com.sleepycat.je.EnvironmentConfig;
import com.sleepycat.je.LockMode;
import com.sleepycat.je.OperationStatus;


public class XMLStore
{
/** BerkeleyDB environement */
private Environment env;
/** Database mapping String to DOM */
private Database id2xml;
/** DOM builder */
private DocumentBuilder docBuilder;
/** DOM to String factory */
private Transformer xmlSerializer;

public XMLStore(String envHome) throws Exception
{
DocumentBuilderFactory domFactory= DocumentBuilderFactory.newInstance();
domFactory.setCoalescing(true);
domFactory.setExpandEntityReferences(true);
domFactory.setIgnoringComments(true);
domFactory.setNamespaceAware(false);
domFactory.setValidating(false);
domFactory.setIgnoringElementContentWhitespace(true);
this.docBuilder= domFactory.newDocumentBuilder();

TransformerFactory tFactory=TransformerFactory.newInstance();
this.xmlSerializer=tFactory.newTransformer();

EnvironmentConfig envCfg= new EnvironmentConfig();
envCfg.setAllowCreate(true);
envCfg.setReadOnly(false);
this.env=new Environment(new File(envHome), envCfg);
DatabaseConfig cfg= new DatabaseConfig();
cfg.setAllowCreate(true);
cfg.setReadOnly(false);
this.id2xml= env.openDatabase(null, "id2xml", cfg);
}

public void close()
{
try {
id2xml.close();
env.close();
} catch(DatabaseException err) {}
}

public Document get(String id) throws DatabaseException
{
Document dom= this.docBuilder.newDocument();
Element root= dom.createElement("Query");
root.setAttribute("key", String.valueOf(id));
dom.appendChild(root);

if(id==null)
{
root.setAttribute("status", "failure");
root.appendChild(dom.createTextNode("key is null"));
return dom;
}

DatabaseEntry key=new DatabaseEntry();
DatabaseEntry data=new DatabaseEntry();
StringBinding.stringToEntry(id, key);
if(this.id2xml.get(null, key, data, LockMode.DEFAULT)!=OperationStatus.SUCCESS)
{
root.setAttribute("status", "failure");
root.appendChild(dom.createTextNode("key not found"));
return dom;
}
try
{
Document doc = this.docBuilder.parse(new InputSource(new StringReader(StringBinding.entryToString(data))));
root.setAttribute("status", "success");
root.appendChild(dom.importNode(doc.getDocumentElement(),true));
return dom;
}
catch (Exception e)
{
throw new DatabaseException(e);
}
}

public OperationStatus put(String id,Document dom) throws DatabaseException
{
DatabaseEntry key=new DatabaseEntry();
DatabaseEntry data=new DatabaseEntry();
StringBinding.stringToEntry(id, key);
StringWriter w= new StringWriter();
try
{
this.xmlSerializer.transform(
new DOMSource(dom),
new StreamResult(w)
);
}
catch (TransformerException e)
{
throw new DatabaseException(e);
}
StringBinding.stringToEntry(w.toString(), data);
return this.id2xml.put(null, key, data);
}

public static void main(String[] args) {
XMLStore store=null;
try
{
String dbHome=null;
int optind=0;
while(optind< args.length)
{
if(args[optind].equals("-h"))
{
System.err.println("-D berkeleyDB home");
return;
}
else if(args[optind].equals("-D"))
{
dbHome= args[++optind];
}
else if(args[optind].equals("--"))
{
optind++;
break;
}
else if(args[optind].startsWith("-"))
{
System.err.println("Unknown option "+args[optind]);
}
else
{
break;
}
++optind;
}
if(dbHome==null)
{
System.err.println("-D missing");
return;
}
store= new XMLStore(dbHome);
int nargs= args.length - optind;
if(nargs==3 &&
args[optind].equals("put"))
{
Document dom= store.docBuilder.parse(new InputSource(
new StringReader(args[optind+2])));
OperationStatus status=store.put(args[optind+1], dom);
System.out.println("put \""+args[optind+1]+"\":"+status);
}
else if(nargs==3 &&
args[optind].equals("put-file"))
{
Document dom= store.docBuilder.parse(new File(args[optind+2]));
OperationStatus status=store.put(args[optind+1], dom);
System.out.println("put-file \""+args[optind+1]+"\":"+status);
}
else if(nargs==2 &&
args[optind].equals("get"))
{
Document dom= store.get(args[optind+1]);
store.xmlSerializer.transform(new DOMSource(dom),
new StreamResult(System.out))
;
}
else
{
System.err.println("Illegal arguments.");
}

}
catch(Throwable err)
{
err.printStackTrace();
}
finally
{
if(store!=null) store.close();
}
}
}

Compile & Package

javac -cp je-3.3.75.jar test/XMLStore.java
jar cvf xmlstore.jar test

Test


The articles pubmed id 15677533 and 18398438 were downloaded. Those document are put in the database.
java -cp je-3.3.75.jar:xmlstore.jar test.XMLStore -D /tmp/bdb put-file 15677533 pubmed_15677533.xml
put-file "15677533":OperationStatus.SUCCESS
java -cp je-3.3.75.jar:xmlstore.jar test.XMLStore -D /tmp/bdb put-file 18398438 pubmed_18398438.xml
put-file "18398438":OperationStatus.SUCCESS

Let's retrieve the document id "18398438"
java -cp je-3.3.75.jar:xmlstore.jar test.XMLStore -D /tmp/bdb get 18398438

<?xml version="1.0" encoding="UTF-8" standalone="no"?
><Query key="18398438" status="success"><PubmedArticleSet>
<PubmedArticle><MedlineCitation Owner="NLM" Status="MEDLINE">
(...)</PubmedArticle></PubmedArticleSet></Query>

The styleseet


We're going to process the following XML NCBI/ELink document: for one SNP, it contains a list of the PMIDs of the associated papers.
<eLinkResult>
<LinkSet>
<DbFrom>snp</DbFrom>
<IdList>
<Id>1802710</Id>
</IdList>
<LinkSetDb>
<DbTo>pubmed</DbTo>
<LinkName>snp_pubmed</LinkName>
<Link>
<Id>18398438</Id>
</Link>
<Link>
<Id>15677533</Id>
</Link>
<Link>
<Id>15010842</Id>
</Link>
</LinkSetDb>
</LinkSet>
</eLinkResult>

In the header of the stylesheet, the use of XMLStore is declared:
<xsl:stylesheet
xmlns:xsl='http://www.w3.org/1999/XSL/Transform'
version='1.0'
xmlns:xstore="xalan://test.XMLStore"
extension-element-prefixes="xstore"
>
A new XMLStore is created. It stores its data in a directory called "/tmp/bdbd". This database will be closed at the end of the processing.
<xsl:param name="directory" select="'/tmp/bdb'"/>
<xsl:variable name="db" select="xstore:new($directory)"/>

<xsl:template match="/eLinkResult">
<html><body>
<xsl:apply-templates select="LinkSet"/>
</body></html>
<xsl:value-of select="xstore:close($db)"/>
</xsl:template>
And each time a PMID is seen, the XMLStore is called, a new XML/Document is returned by XMLStore, and the title of the paper is extracted from this new document.
<xsl:variable name="result" select="xstore:get($db,.)"/>
<li>
<b>Pubmed Id <xsl:value-of select="."/></b>:
<xsl:choose>
<xsl:when test="$result/Query/@status='success'">
<xsl:value-of select="$result/Query/PubmedArticleSet/PubmedArticle/MedlineCitation/Article/ArticleTitle"/>.
</xsl:when>
<xsl:otherwise>
<span style="color:red;"><xsl:value-of select="$result/Query"/></span>
</xsl:otherwise>
</xsl:choose>
</li>

Full source code of the stylesheet

<xsl:stylesheet version="1.0" extension-element-prefixes="xstore"
xmlns:xsl='http://www.w3.org/1999/XSL/Transform'
xmlns:xstore="xalan://test.XMLStore"
>

<xsl:output method="xml" indent="yes"/>
<xsl:param name="directory" select="'/tmp/bdb'"/>
<xsl:variable name="db" select="xstore:new($directory)"/>

<xsl:template match="/eLinkResult">
<html><body>
<xsl:apply-templates select="LinkSet"/>
</body></html>
<xsl:value-of select="xstore:close($db)"/>
</xsl:template>

<xsl:template match="LinkSet">
<h1><xsl:value-of select="DbFrom"/></h1>
<ul>
<xsl:for-each select="IdList/Id">
<li><xsl:value-of select="."/></li>
</xsl:for-each>
</ul>
<h2>Related Pubmed</h2>
<ul>
<xsl:for-each select="LinkSetDb[DbTo='pubmed']/Link/Id">
<xsl:variable name="result" select="xstore:get($db,.)"/>
<li>
<b>Pubmed Id <xsl:value-of select="."/></b>:
<xsl:choose>
<xsl:when test="$result/Query/@status='success'">
<xsl:value-of select="$result/Query/PubmedArticleSet/PubmedArticle/MedlineCitation/Article/ArticleTitle"/>.
</xsl:when>
<xsl:otherwise>
<span style="color:red;"><xsl:value-of select="$result/Query"/></span>
</xsl:otherwise>
</xsl:choose>
</li>
</xsl:for-each>
</ul>
</xsl:template>
</xsl:stylesheet>

Running the stylesheet


java -cp ${XALAN}/org.apache.xalan_2.7.1.v200905122109.jar:\
${XALAN}/org.apache.xml.serializer_2.7.1.v200902170519.jar:\
je-3.3.75.jar:\
xmlstore.jar \
org.apache.xalan.xslt.Process -IN elink.fcgi.xml -XSL elink2html.xsl

Result



snp

  • 1802710

Related Pubmed


  • Pubmed Id 18398438:Preferential reciprocal transfer of paternal/maternal DLK1 alleles to obese children: first evidence of polar overdominance in humans..
  • Pubmed Id 15677533:Imprinting, expression, and localisation of DLK1 in Wilms tumours.
  • Pubmed Id 15010842:key not found



That's it

Pierre

No comments: