changelog shortlog tags branches files raw gz bz2 help

Mercurial > hg > werc / changeset: Much better (I hope) get_html_title implementation, first try to find <title>, if that fails, get the first non-tag string in the file.

changeset 564: 7f0505f8e891
parent 563: ccfdb0fb8c2f
child 565: a370464b995b
author: uriel@vm41.cat-v.org
date: Thu, 30 Jul 2009 02:12:06 +0200
files: bin/werclib.rc
description: Much better (I hope) get_html_title implementation, first try to find <title>, if that fails, get the first non-tag string in the file.
     1.1--- a/bin/werclib.rc	Wed Jul 29 01:41:18 2009 +0200
     1.2+++ b/bin/werclib.rc	Thu Jul 30 02:12:06 2009 +0200
     1.3@@ -98,12 +98,13 @@
     1.4 }
     1.5 
     1.6 fn get_html_title {
     1.7-    # H1 is not reliable because htmlroff doesn't use it :(
     1.8-    #desc=`{cat $1 | sed 32q | grep '<[Hh]1>' | sed 's/<[Hh]1>(.*)(<\/[Hh]1>|$)/\1/;s/<[^>]*>//g;1q'}
     1.9-    # Pick the first line of body  instead
    1.10-    desc=`{sed -n '/<[Bb][Oo][Dd][Yy]/,/./s/(<[^>]*>|$)//gp' < $1}
    1.11-    if(~ $#desc 0)
    1.12-        desc=`{sed 's/<[^>]*>//g; 1q' < $1}
    1.13+    t=`{sed -n '32q; s/^.*<[Tt][Ii][Tt][Ll][Ee]> *([^<]+) *(<\/[Tt][Ii][Tt][Ll][Ee]>.*)?$/\1/p' < $1}
    1.14+
    1.15+    # As a backup we might want to pick the first 'non-tag' text in the file with:
    1.16+    if(~ $"t '')
    1.17+        t=`{sed -n -e 's/^(<[^>]+>)*([^<]+).*/\2/p; 32q' < $1 | sed 1q}
    1.18+
    1.19+    echo $t
    1.20 }
    1.21 
    1.22 fn get_file_title {