#!/bin/sed -f # xsltproc --html doesnt understand html5 s:\(*\)\(section\|header\|nav\|footer\|article\):\1div:g /data-href/s/&/&/g # Fix incorrect < and > inside p tags, that is by allowing only # known tag be surrounded by < and >. ta :a s/
\(.*\)<\/p>/\1/
tfix
b
:fix
s/\</g
s/>/\>/g
s/<span\([^;]*\)>//g
s/<\/span>/<\/span>/g
s/\<em\>//g
s/\<\/em\>/<\/em>/g
s:.*: &