diff options
authorBenjamin Franzke <>2012-06-28 10:09:28 +0200
committerBenjamin Franzke <>2012-06-28 10:09:28 +0200
commit36c0f2f4d8aef72776a337eef05bec8cd0360e83 (patch)
Add scripts to download elberfelder from
That is download is shell scripts using curl, parse books and chapters with sed. Then prepare html with sed to be converted to zefania xml using a xsl stylesheet.
12 files changed, 219 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..cf7201c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
diff --git a/ b/
new file mode 100755
index 0000000..e9b203e
--- /dev/null
+++ b/
@@ -0,0 +1,24 @@
+echo '<?xml version="1.0" encoding="utf-8"?>'
+echo '<xmlbible type="x-bible" biblename="Elberfelder 2006" status="v">'
+echo '<information>'
+echo '<title>Elberfelder 2006</title>'
+echo '<format>Zefania XML Bible Markup Language</format>'
+echo '</information>'
+cat book-list | while read buch
+ echo "<biblebook bname=\"$buch\" bnumber=\"$j\">"
+ i=1
+ while [ -e "chapters/$buch/$i" ]
+ do
+ cat "chapters/$buch/$i.xml" | sed 1d
+ i=$((i+1))
+ done
+ j=$((j+1))
+ echo "</biblebook>"
+echo '</xmlbible>'
diff --git a/convert.sed b/convert.sed
new file mode 100755
index 0000000..2449d1f
--- /dev/null
+++ b/convert.sed
@@ -0,0 +1,31 @@
+#!/bin/sed -f
+# xsltproc --html doesnt understand html5
+# Fix incorrect < and > inside p tags, that is by allowing only
+# known tag be surrounded by < and >.
diff --git a/convert.xsl b/convert.xsl
new file mode 100644
index 0000000..d8f1d77
--- /dev/null
+++ b/convert.xsl
@@ -0,0 +1,88 @@
+<xsl:stylesheet xmlns:xsl="" version="1.0">
+ <xsl:output method="xml" indent="yes" encoding="UTF-8" media-type="text/xml"/>
+ <xsl:template match="h2">
+ <caption vref="{following::span[@class='verse']}">
+ <xsl:apply-templates mode="copy" />
+ </caption>
+ </xsl:template>
+ <xsl:template match="span[@class='fussnote']" mode="copy-with-notes">
+ <note n1="x-studynote">
+ <xsl:variable name="id">
+ <xsl:value-of select="@data-param"/>
+ </xsl:variable>
+ <xsl:apply-templates select="//div[@id=$id]/div/p" mode="copy-with-notes"/>
+ </note>
+ </xsl:template>
+ <xsl:template match="text()" mode="copy-with-notes">
+ <xsl:if test="string-length(normalize-space(.)) > 0">
+ <xsl:value-of select="." />
+ </xsl:if>
+ </xsl:template>
+ <xsl:template match="em" mode="copy-with-notes">
+ <style fs="emphasize">
+ <xsl:value-of select="." />
+ </style>
+ </xsl:template>
+ <xsl:template match="br" mode="copy-with-notes">
+ <br art="x-nl" />
+ </xsl:template>
+ <xsl:template match="*" mode="error">
+ <xsl:choose>
+ <xsl:when test="@class='fussnote'"/>
+ <xsl:when test="@class='verse'"/>
+ <xsl:when test="@class='chapter'"/>
+ <xsl:when test="local-name()='em'"/>
+ <xsl:when test="local-name()='br'"/>
+ <xsl:otherwise>
+ <xsl:message terminate="yes">
+ <xsl:value-of select="local-name()" />
+ <xsl:value-of select="@class" />
+ <xsl:apply-templates select="." mode="copy" />
+ </xsl:message>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+ <xsl:template match="p[span/@class='verse']">
+ <vers>
+ <xsl:attribute name="vnumber">
+ <xsl:value-of select="span[@class='verse']"/>
+ </xsl:attribute>
+ <xsl:apply-templates select="text()|span[@class='fussnote']|em|br" mode="copy-with-notes" />
+ <!--<xsl:apply-templates select="*" mode="error" />-->
+ </vers>
+ </xsl:template>
+ <xsl:template match="div[@class='annotation']">
+ </xsl:template>
+ <xsl:template match="div[@class='markdown']">
+ <xsl:apply-templates select="*" />
+ </xsl:template>
+ <xsl:template match="/">
+ <chapter cnumber="{//span[@class='chapter']}">
+ <xsl:apply-templates select="//div[@class='markdown']"/>
+ </chapter>
+ </xsl:template>
+ <xsl:template match="*" mode="copy">
+ <xsl:element name="{local-name()}">
+ <xsl:apply-templates mode="copy" select="@*|node()" />
+ </xsl:element>
+ </xsl:template>
+ <xsl:template match="@*" mode="copy">
+ <xsl:attribute name="{local-name()}">
+ <xsl:value-of select="." />
+ </xsl:attribute>
+ </xsl:template>
diff --git a/ b/
new file mode 100755
index 0000000..125c29c
--- /dev/null
+++ b/
@@ -0,0 +1,17 @@
+rm -rf books/
+mkdir books
+echo -n > book-list
+curl -s | \
+ ./parse-book.sed | \
+ while read url
+ do
+ read book
+ book=$(echo $book | sed s:/.*$:: )
+ echo $book
+ echo $book >> book-list
+ curl $url > "books/$book"
+ done
diff --git a/ b/
new file mode 100755
index 0000000..ba29d8e
--- /dev/null
+++ b/
@@ -0,0 +1,17 @@
+mkdir -p chapters/
+cat book-list | while read buch
+ mkdir -p "chapters/$buch"
+ ./parse-chapter.sed "books/$buch" | \
+ while read url
+ do
+ read number
+ echo $url
+ echo "$buch $number"
+ curl $url > "chapters/$buch/$number"
+ done
diff --git a/ b/
new file mode 100755
index 0000000..0e80054
--- /dev/null
+++ b/
@@ -0,0 +1,4 @@
diff --git a/ b/
new file mode 100755
index 0000000..c6e56df
--- /dev/null
+++ b/
@@ -0,0 +1,6 @@
+./ > elberfelder2006.xml
+zip elberfelder2006.xml
diff --git a/ b/
new file mode 100755
index 0000000..0964982
--- /dev/null
+++ b/
@@ -0,0 +1,11 @@
+cat book-list | while read buch
+ i=1
+ while [ -e "chapters/$buch/$i" ]
+ do
+ echo "chapters/$buch/$i"
+ i=$((i+1))
+ done
diff --git a/ b/
new file mode 100755
index 0000000..dace25c
--- /dev/null
+++ b/
@@ -0,0 +1,8 @@
+./ | while read file
+ echo "$file"
+ ./convert.sed "$file" | \
+ xsltproc --encoding utf-8 --html convert.xsl - > "${file}.xml"
diff --git a/parse-book.sed b/parse-book.sed
new file mode 100755
index 0000000..d46191c
--- /dev/null
+++ b/parse-book.sed
@@ -0,0 +1,4 @@
+#!/bin/sed -nf
diff --git a/parse-chapter.sed b/parse-chapter.sed
new file mode 100755
index 0000000..614d2cf
--- /dev/null
+++ b/parse-chapter.sed
@@ -0,0 +1,4 @@
+#!/bin/sed -nf