summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenjamin Franzke <bfr@qbus.de>2015-02-18 15:48:18 +0100
committerBenjamin Franzke <bfr@qbus.de>2015-02-18 15:54:24 +0100
commitaceed71c01b9a25ee4fc70074d3edbd7c95042ad (patch)
tree04c1409cfc7449a03b37e47559acd5c3baf52c44
downloadendnote-import-aceed71c01b9a25ee4fc70074d3edbd7c95042ad.zip
endnote-import-aceed71c01b9a25ee4fc70074d3edbd7c95042ad.tar.gz
endnote-import-aceed71c01b9a25ee4fc70074d3edbd7c95042ad.tar.bz2
Implement simple xml import using xsl and load xml
-rw-r--r--.gitignore2
-rw-r--r--convert-style-to-html.xsl107
-rw-r--r--endnote-to-dbxml.xsl72
-rw-r--r--fix-empty-tags.sed3
-rwxr-xr-xupdate.sh13
-rw-r--r--update.sql6
-rw-r--r--util.xsl54
7 files changed, 257 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b9f3884
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+LIKAT Publications.xml
+dbxml.xml
diff --git a/convert-style-to-html.xsl b/convert-style-to-html.xsl
new file mode 100644
index 0000000..9858964
--- /dev/null
+++ b/convert-style-to-html.xsl
@@ -0,0 +1,107 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+
+ <xsl:output method="xml" indent="yes" encoding="UTF-8" />
+
+ <!-- Convert endnote style definitions declarations to html tags -->
+ <!-- TODO: <author>Editors,</author> bzw <author>Editor,</author> -->
+ <xsl:template match="style" name="style">
+
+ <xsl:param name="oldface" select="@face"/>
+ <xsl:param name="exclude" select="''" />
+
+ <xsl:variable name="face">
+ <xsl:call-template name="string-replace-all">
+ <xsl:with-param name="text" select="$oldface" />
+ <xsl:with-param name="replace" select="$exclude" />
+ <xsl:with-param name="by" select="''" />
+ </xsl:call-template>
+ </xsl:variable>
+
+ <xsl:choose>
+ <xsl:when test="contains($face, 'subscript')">
+ <sub>
+ <xsl:call-template name="style">
+ <xsl:with-param name="oldface" select="$face"/>
+ <xsl:with-param name="exclude" select="'subscript'"/>
+ </xsl:call-template>
+ </sub>
+ </xsl:when>
+ <xsl:when test="contains($face, 'superscript')">
+ <sub>
+ <xsl:call-template name="style">
+ <xsl:with-param name="oldface" select="$face"/>
+ <xsl:with-param name="exclude" select="'superscript'"/>
+ </xsl:call-template>
+ </sub>
+ </xsl:when>
+ <xsl:when test="contains($face, 'italic')">
+ <em>
+ <xsl:call-template name="style">
+ <xsl:with-param name="oldface" select="$face"/>
+ <xsl:with-param name="exclude" select="'italic'"/>
+ </xsl:call-template>
+ </em>
+ </xsl:when>
+ <xsl:when test="contains($face, 'boldface')">
+ <strong>
+ <xsl:call-template name="style">
+ <xsl:with-param name="oldface" select="$face"/>
+ <xsl:with-param name="exclude" select="'boldface'"/>
+ </xsl:call-template>
+ </strong>
+ </xsl:when>
+ <xsl:when test="contains($face, 'normal')">
+ <xsl:call-template name="style">
+ <xsl:with-param name="oldface" select="$face"/>
+ <xsl:with-param name="exclude" select="'normal'"/>
+ </xsl:call-template>
+ </xsl:when>
+
+ <xsl:otherwise>
+ <xsl:apply-templates />
+ </xsl:otherwise>
+
+ </xsl:choose>
+ </xsl:template>
+
+ <!-- Drop style definitions from author-tags -->
+ <xsl:template match="author/style">
+ <xsl:apply-templates />
+ </xsl:template>
+
+ <xsl:template match="*">
+ <xsl:copy>
+ <xsl:copy-of select="@*"/>
+ <xsl:apply-templates />
+ </xsl:copy>
+ </xsl:template>
+
+ <xsl:template name="string-replace-all">
+ <xsl:param name="text" />
+ <xsl:param name="replace" />
+ <xsl:param name="by" />
+ <xsl:choose>
+ <xsl:when test="$replace = ''">
+ <xsl:value-of select="$text" />
+ </xsl:when>
+ <xsl:when test="contains($text, $replace)">
+ <xsl:value-of select="substring-before($text,$replace)" />
+ <xsl:value-of select="$by" />
+ <xsl:call-template name="string-replace-all">
+ <xsl:with-param name="text" select="substring-after($text,$replace)" />
+ <xsl:with-param name="replace" select="$replace" />
+ <xsl:with-param name="by" select="$by" />
+ </xsl:call-template>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="$text" />
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+
+ <xsl:template match="/">
+ <xsl:apply-templates />
+ </xsl:template>
+
+</xsl:stylesheet>
diff --git a/endnote-to-dbxml.xsl b/endnote-to-dbxml.xsl
new file mode 100644
index 0000000..24d3be3
--- /dev/null
+++ b/endnote-to-dbxml.xsl
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+
+ <xsl:output method="xml" indent="yes" encoding="UTF-8" />
+
+ <xsl:include href="util.xsl"/>
+
+ <xsl:template match="record">
+ <publication>
+ <uid>
+ <xsl:value-of select="rec-number" />
+ </uid>
+ <title>
+ <xsl:apply-templates select="titles/title" mode="copy-html" />
+ </title>
+ <secondary_title>
+ <xsl:apply-templates select="titles/secondary-title" mode="copy-html" />
+ <xsl:text></xsl:text>
+ </secondary_title>
+ <full_title>
+ <xsl:apply-templates select="periodical/full-title" mode="copy-html" />
+ <xsl:text></xsl:text>
+ </full_title>
+ <type>
+ <xsl:value-of select="ref-type/@name" />
+ </type>
+ <year>
+ <xsl:apply-templates select="dates/year" mode="content" />
+ </year>
+
+ <authors>
+ <!--<xsl:apply-templates select="authors" mode="references" />-->
+ <xsl:for-each select="contributors/authors/author">
+ <xsl:if test="position() &gt; 1">
+ <xsl:text>|</xsl:text>
+ </xsl:if>
+ <xsl:value-of select="." />
+ </xsl:for-each>
+ </authors>
+ </publication>
+ </xsl:template>
+
+ <xsl:template match="/">
+ <tx_likat_pubs_domain_model_publications>
+ <xsl:apply-templates select="xml/records/record" />
+ </tx_likat_pubs_domain_model_publications>
+ </xsl:template>
+
+ <xsl:template name="string-replace-all">
+ <xsl:param name="text" />
+ <xsl:param name="replace" />
+ <xsl:param name="by" />
+ <xsl:choose>
+ <xsl:when test="$replace = ''">
+ <xsl:value-of select="$text" />
+ </xsl:when>
+ <xsl:when test="contains($text, $replace)">
+ <xsl:value-of select="substring-before($text,$replace)" />
+ <xsl:value-of select="$by" />
+ <xsl:call-template name="string-replace-all">
+ <xsl:with-param name="text" select="substring-after($text,$replace)" />
+ <xsl:with-param name="replace" select="$replace" />
+ <xsl:with-param name="by" select="$by" />
+ </xsl:call-template>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="$text" />
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+
+</xsl:stylesheet>
diff --git a/fix-empty-tags.sed b/fix-empty-tags.sed
new file mode 100644
index 0000000..f958206
--- /dev/null
+++ b/fix-empty-tags.sed
@@ -0,0 +1,3 @@
+# LOAD XML has bugs (NULL columns) when there are empty, self closing tags like
+# <title/> Replace those with <title></title>
+s/<\([^>]*\)\/>/<\1><\/\1>/g
diff --git a/update.sh b/update.sh
new file mode 100755
index 0000000..6ad1214
--- /dev/null
+++ b/update.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+cd $(dirname $0)
+cat "LIKAT Publications.xml" \
+ | xsltproc convert-style-to-html.xsl - \
+ | xsltproc endnote-to-dbxml.xsl - \
+ | sed -f fix-empty-tags.sed \
+ > dbxml.xml
+
+# sed -f cdata.sed | \
+# xmllint --pretty 1 - \
+
+mysql -v -utypo3 -ptypo3 typo3 < update.sql
diff --git a/update.sql b/update.sql
new file mode 100644
index 0000000..c745643
--- /dev/null
+++ b/update.sql
@@ -0,0 +1,6 @@
+START TRANSACTION;
+DELETE FROM tx_likat_pubs_domain_model_publications;
+LOAD XML LOCAL INFILE './dbxml.xml'
+ INTO TABLE tx_likat_pubs_domain_model_publications
+ ROWS IDENTIFIED BY '<publication>';
+COMMIT;
diff --git a/util.xsl b/util.xsl
new file mode 100644
index 0000000..19e6cf9
--- /dev/null
+++ b/util.xsl
@@ -0,0 +1,54 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+
+
+ <xsl:template match="*" mode="copy-cdata">
+ <cdata>
+ <xsl:apply-templates />
+ </cdata>
+ </xsl:template>
+
+ <xsl:template match="*" mode="copy-html">
+ <xsl:apply-templates select="*|text()" mode="verb" />
+ </xsl:template>
+
+ <!--
+ Copy nodeset as escaped XML
+ http://logit.yudichev.net/2007/11/xslt-output-xml-escaped-copy-of-source.html
+ -->
+ <xsl:template match="*|@*" mode="verb">
+ <xsl:variable name="node-type">
+ <xsl:call-template name="node-type"/>
+ </xsl:variable>
+ <xsl:choose>
+ <xsl:when test="$node-type='element'"> <!-- element -->
+ <xsl:text>&lt;</xsl:text>
+ <xsl:value-of select="name()"/>
+ <xsl:apply-templates select="@*" mode="verb"/>
+ <xsl:text>&gt;</xsl:text>
+ <xsl:apply-templates mode="verb"/>
+ <xsl:text>&lt;/</xsl:text>
+ <xsl:value-of select="name()"/>
+ <xsl:text>&gt;</xsl:text>
+ </xsl:when>
+ <xsl:when test="$node-type='text'"> <!-- text -->
+ <xsl:value-of select="self::text()"/>
+ </xsl:when>
+ <xsl:when test="$node-type='attribute'"> <!--any attribute-->
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="name()"/>
+ <xsl:text>="</xsl:text>
+ <xsl:value-of select="."/>
+ <xsl:text>"</xsl:text>
+ </xsl:when>
+ </xsl:choose>
+ </xsl:template>
+ <xsl:template name="node-type">
+ <xsl:param name="node" select="."/>
+ <xsl:apply-templates mode="nodetype" select="$node"/>
+ </xsl:template>
+ <xsl:template mode="nodetype" match="*">element</xsl:template>
+ <xsl:template mode="nodetype" match="@*">attribute</xsl:template>
+ <xsl:template mode="nodetype" match="text()">text</xsl:template>
+
+</xsl:stylesheet>