diff options
author | Benjamin Franzke <bfr@qbus.de> | 2015-02-18 15:48:18 +0100 |
---|---|---|
committer | Benjamin Franzke <bfr@qbus.de> | 2015-02-18 15:54:24 +0100 |
commit | aceed71c01b9a25ee4fc70074d3edbd7c95042ad (patch) | |
tree | 04c1409cfc7449a03b37e47559acd5c3baf52c44 | |
download | endnote-import-aceed71c01b9a25ee4fc70074d3edbd7c95042ad.tar.gz endnote-import-aceed71c01b9a25ee4fc70074d3edbd7c95042ad.tar.bz2 endnote-import-aceed71c01b9a25ee4fc70074d3edbd7c95042ad.zip |
Implement simple xml import using xsl and load xml
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | convert-style-to-html.xsl | 107 | ||||
-rw-r--r-- | endnote-to-dbxml.xsl | 72 | ||||
-rw-r--r-- | fix-empty-tags.sed | 3 | ||||
-rwxr-xr-x | update.sh | 13 | ||||
-rw-r--r-- | update.sql | 6 | ||||
-rw-r--r-- | util.xsl | 54 |
7 files changed, 257 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b9f3884 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +LIKAT Publications.xml +dbxml.xml diff --git a/convert-style-to-html.xsl b/convert-style-to-html.xsl new file mode 100644 index 0000000..9858964 --- /dev/null +++ b/convert-style-to-html.xsl @@ -0,0 +1,107 @@ +<?xml version="1.0" encoding="UTF-8"?> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> + + <xsl:output method="xml" indent="yes" encoding="UTF-8" /> + + <!-- Convert endnote style definitions declarations to html tags --> + <!-- TODO: <author>Editors,</author> bzw <author>Editor,</author> --> + <xsl:template match="style" name="style"> + + <xsl:param name="oldface" select="@face"/> + <xsl:param name="exclude" select="''" /> + + <xsl:variable name="face"> + <xsl:call-template name="string-replace-all"> + <xsl:with-param name="text" select="$oldface" /> + <xsl:with-param name="replace" select="$exclude" /> + <xsl:with-param name="by" select="''" /> + </xsl:call-template> + </xsl:variable> + + <xsl:choose> + <xsl:when test="contains($face, 'subscript')"> + <sub> + <xsl:call-template name="style"> + <xsl:with-param name="oldface" select="$face"/> + <xsl:with-param name="exclude" select="'subscript'"/> + </xsl:call-template> + </sub> + </xsl:when> + <xsl:when test="contains($face, 'superscript')"> + <sub> + <xsl:call-template name="style"> + <xsl:with-param name="oldface" select="$face"/> + <xsl:with-param name="exclude" select="'superscript'"/> + </xsl:call-template> + </sub> + </xsl:when> + <xsl:when test="contains($face, 'italic')"> + <em> + <xsl:call-template name="style"> + <xsl:with-param name="oldface" select="$face"/> + <xsl:with-param name="exclude" select="'italic'"/> + </xsl:call-template> + </em> + </xsl:when> + <xsl:when test="contains($face, 'boldface')"> + <strong> + <xsl:call-template name="style"> + <xsl:with-param name="oldface" select="$face"/> + <xsl:with-param name="exclude" select="'boldface'"/> + </xsl:call-template> + </strong> + </xsl:when> + <xsl:when test="contains($face, 'normal')"> + <xsl:call-template name="style"> + <xsl:with-param name="oldface" select="$face"/> + <xsl:with-param name="exclude" select="'normal'"/> + </xsl:call-template> + </xsl:when> + + <xsl:otherwise> + <xsl:apply-templates /> + </xsl:otherwise> + + </xsl:choose> + </xsl:template> + + <!-- Drop style definitions from author-tags --> + <xsl:template match="author/style"> + <xsl:apply-templates /> + </xsl:template> + + <xsl:template match="*"> + <xsl:copy> + <xsl:copy-of select="@*"/> + <xsl:apply-templates /> + </xsl:copy> + </xsl:template> + + <xsl:template name="string-replace-all"> + <xsl:param name="text" /> + <xsl:param name="replace" /> + <xsl:param name="by" /> + <xsl:choose> + <xsl:when test="$replace = ''"> + <xsl:value-of select="$text" /> + </xsl:when> + <xsl:when test="contains($text, $replace)"> + <xsl:value-of select="substring-before($text,$replace)" /> + <xsl:value-of select="$by" /> + <xsl:call-template name="string-replace-all"> + <xsl:with-param name="text" select="substring-after($text,$replace)" /> + <xsl:with-param name="replace" select="$replace" /> + <xsl:with-param name="by" select="$by" /> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="$text" /> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <xsl:template match="/"> + <xsl:apply-templates /> + </xsl:template> + +</xsl:stylesheet> diff --git a/endnote-to-dbxml.xsl b/endnote-to-dbxml.xsl new file mode 100644 index 0000000..24d3be3 --- /dev/null +++ b/endnote-to-dbxml.xsl @@ -0,0 +1,72 @@ +<?xml version="1.0" encoding="UTF-8"?> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> + + <xsl:output method="xml" indent="yes" encoding="UTF-8" /> + + <xsl:include href="util.xsl"/> + + <xsl:template match="record"> + <publication> + <uid> + <xsl:value-of select="rec-number" /> + </uid> + <title> + <xsl:apply-templates select="titles/title" mode="copy-html" /> + </title> + <secondary_title> + <xsl:apply-templates select="titles/secondary-title" mode="copy-html" /> + <xsl:text></xsl:text> + </secondary_title> + <full_title> + <xsl:apply-templates select="periodical/full-title" mode="copy-html" /> + <xsl:text></xsl:text> + </full_title> + <type> + <xsl:value-of select="ref-type/@name" /> + </type> + <year> + <xsl:apply-templates select="dates/year" mode="content" /> + </year> + + <authors> + <!--<xsl:apply-templates select="authors" mode="references" />--> + <xsl:for-each select="contributors/authors/author"> + <xsl:if test="position() > 1"> + <xsl:text>|</xsl:text> + </xsl:if> + <xsl:value-of select="." /> + </xsl:for-each> + </authors> + </publication> + </xsl:template> + + <xsl:template match="/"> + <tx_likat_pubs_domain_model_publications> + <xsl:apply-templates select="xml/records/record" /> + </tx_likat_pubs_domain_model_publications> + </xsl:template> + + <xsl:template name="string-replace-all"> + <xsl:param name="text" /> + <xsl:param name="replace" /> + <xsl:param name="by" /> + <xsl:choose> + <xsl:when test="$replace = ''"> + <xsl:value-of select="$text" /> + </xsl:when> + <xsl:when test="contains($text, $replace)"> + <xsl:value-of select="substring-before($text,$replace)" /> + <xsl:value-of select="$by" /> + <xsl:call-template name="string-replace-all"> + <xsl:with-param name="text" select="substring-after($text,$replace)" /> + <xsl:with-param name="replace" select="$replace" /> + <xsl:with-param name="by" select="$by" /> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="$text" /> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + +</xsl:stylesheet> diff --git a/fix-empty-tags.sed b/fix-empty-tags.sed new file mode 100644 index 0000000..f958206 --- /dev/null +++ b/fix-empty-tags.sed @@ -0,0 +1,3 @@ +# LOAD XML has bugs (NULL columns) when there are empty, self closing tags like +# <title/> Replace those with <title></title> +s/<\([^>]*\)\/>/<\1><\/\1>/g diff --git a/update.sh b/update.sh new file mode 100755 index 0000000..6ad1214 --- /dev/null +++ b/update.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +cd $(dirname $0) +cat "LIKAT Publications.xml" \ + | xsltproc convert-style-to-html.xsl - \ + | xsltproc endnote-to-dbxml.xsl - \ + | sed -f fix-empty-tags.sed \ + > dbxml.xml + +# sed -f cdata.sed | \ +# xmllint --pretty 1 - \ + +mysql -v -utypo3 -ptypo3 typo3 < update.sql diff --git a/update.sql b/update.sql new file mode 100644 index 0000000..c745643 --- /dev/null +++ b/update.sql @@ -0,0 +1,6 @@ +START TRANSACTION; +DELETE FROM tx_likat_pubs_domain_model_publications; +LOAD XML LOCAL INFILE './dbxml.xml' + INTO TABLE tx_likat_pubs_domain_model_publications + ROWS IDENTIFIED BY '<publication>'; +COMMIT; diff --git a/util.xsl b/util.xsl new file mode 100644 index 0000000..19e6cf9 --- /dev/null +++ b/util.xsl @@ -0,0 +1,54 @@ +<?xml version="1.0" encoding="UTF-8"?> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> + + + <xsl:template match="*" mode="copy-cdata"> + <cdata> + <xsl:apply-templates /> + </cdata> + </xsl:template> + + <xsl:template match="*" mode="copy-html"> + <xsl:apply-templates select="*|text()" mode="verb" /> + </xsl:template> + + <!-- + Copy nodeset as escaped XML + http://logit.yudichev.net/2007/11/xslt-output-xml-escaped-copy-of-source.html + --> + <xsl:template match="*|@*" mode="verb"> + <xsl:variable name="node-type"> + <xsl:call-template name="node-type"/> + </xsl:variable> + <xsl:choose> + <xsl:when test="$node-type='element'"> <!-- element --> + <xsl:text><</xsl:text> + <xsl:value-of select="name()"/> + <xsl:apply-templates select="@*" mode="verb"/> + <xsl:text>></xsl:text> + <xsl:apply-templates mode="verb"/> + <xsl:text></</xsl:text> + <xsl:value-of select="name()"/> + <xsl:text>></xsl:text> + </xsl:when> + <xsl:when test="$node-type='text'"> <!-- text --> + <xsl:value-of select="self::text()"/> + </xsl:when> + <xsl:when test="$node-type='attribute'"> <!--any attribute--> + <xsl:text> </xsl:text> + <xsl:value-of select="name()"/> + <xsl:text>="</xsl:text> + <xsl:value-of select="."/> + <xsl:text>"</xsl:text> + </xsl:when> + </xsl:choose> + </xsl:template> + <xsl:template name="node-type"> + <xsl:param name="node" select="."/> + <xsl:apply-templates mode="nodetype" select="$node"/> + </xsl:template> + <xsl:template mode="nodetype" match="*">element</xsl:template> + <xsl:template mode="nodetype" match="@*">attribute</xsl:template> + <xsl:template mode="nodetype" match="text()">text</xsl:template> + +</xsl:stylesheet> |