From 36c0f2f4d8aef72776a337eef05bec8cd0360e83 Mon Sep 17 00:00:00 2001 From: Benjamin Franzke Date: Thu, 28 Jun 2012 10:09:28 +0200 Subject: Add scripts to download elberfelder from die-bibel.de That is download is shell scripts using curl, parse books and chapters with sed. Then prepare html with sed to be converted to zefania xml using a xsl stylesheet. --- .gitignore | 5 +++ concat.sh | 24 ++++++++++++++ convert.sed | 31 ++++++++++++++++++ convert.xsl | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++ download-books.sh | 17 ++++++++++ download-chapters.sh | 17 ++++++++++ download.sh | 4 +++ gen-bible.sh | 6 ++++ list-chapters.sh | 11 +++++++ make-xml.sh | 8 +++++ parse-book.sed | 4 +++ parse-chapter.sed | 4 +++ 12 files changed, 219 insertions(+) create mode 100644 .gitignore create mode 100755 concat.sh create mode 100755 convert.sed create mode 100644 convert.xsl create mode 100755 download-books.sh create mode 100755 download-chapters.sh create mode 100755 download.sh create mode 100755 gen-bible.sh create mode 100755 list-chapters.sh create mode 100755 make-xml.sh create mode 100755 parse-book.sed create mode 100755 parse-chapter.sed diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cf7201c --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +books/ +chapters/ +book-list +elberfelder2006.zip +elberfelder2006.xml diff --git a/concat.sh b/concat.sh new file mode 100755 index 0000000..e9b203e --- /dev/null +++ b/concat.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +echo '' +echo '' +echo '' +echo 'Elberfelder 2006' +echo 'Zefania XML Bible Markup Language' +echo '' +j=1 +cat book-list | while read buch +do + echo "" + + i=1 + while [ -e "chapters/$buch/$i" ] + do + cat "chapters/$buch/$i.xml" | sed 1d + i=$((i+1)) + done + + j=$((j+1)) + echo "" +done +echo '' diff --git a/convert.sed b/convert.sed new file mode 100755 index 0000000..2449d1f --- /dev/null +++ b/convert.sed @@ -0,0 +1,31 @@ +#!/bin/sed -f + +/data-href/s/&/&/g + +# xsltproc --html doesnt understand html5 +s/section/div/g +s/header/h1/g +s/