changelog shortlog tags branches changeset files revisions annotate raw help

Mercurial > hg > werc / bin/werc.rc

changeset 656: 50a9b770bb43
parent: 275899147886
child: fb460a671d19
author: Uriel <u@berlinblue.org>
date: Sun, 19 Aug 2012 01:16:10 +0200
permissions: -rwxr-xr-x
description: Be a bit more careful before we redirect to plain files when we get a trailing / and can't find a dir.
1 #!/usr/local/plan9/bin/rc
2 . ./cgilib.rc
3 . ./werclib.rc
4 . ./wercconf.rc
5 . ./corehandlers.rc
6 . ./fltr_cache.rc
7 cd ..
8 
9 forbidden_uri_chars='[^a-zA-Z0-9_+\-\/\.,:]'
10 difs=$ifs # Used to restore default ifs when needed
11 
12 # Expected input: ls -F style, $sitedir/path/to/files/
13 # <ls -F+x><symlink hack><Useless?><hiden files >
14 dirfilter='s/\*$//; s,/+\./+,/,g; s,^\./,,; /\/[._][^\/]/d; /'$forbidden_uri_chars'/d; /\/sitemap\.xml$/d; /\/index\.(md|html|txt|tpl)$/d; /\/(robots|sitemap)\.txt$/d; /_werc\/?$/d; '
15 dirclean=' s/\.(md|html|txt)$//; '
16 
17 # Careful, the proper p9p path might not be set until initrc.local is sourced
18 path=(. $PLAN9/bin ./bin /bin /usr/bin)
19 
20 headers=lib/headers.tpl
21 res_tail='</body></html>'
22 http_content_type='text/html'
23 ll_add handlers_bar_left nav_tree
24 werc_apps=( apps/* )
25 werc_root=`{pwd}
26 sitesdir=sites
27 
28  . ./etc/initrc
29 
30 if(test -f etc/initrc.local)
31  . ./etc/initrc.local
32 
33 for(a in $werc_apps)
34  . ./$a/app.rc
35 
36 fn werc_exec_request {
37  site=$SERVER_NAME
38  base_url=http://$site
39  sitedir=$sitesdir/$site
40  master_template=`{get_lib_file default_master.tpl}
41  current_date_time=`{date}
42 
43  # Note: $REQUEST_URI is not officially in CGI 1.1, but seems to be de-facto
44  # Note: We only urldecode %5F->'_' because some sites (stackoverflow.com?) urlencode it in their links,
45  # perhaps we should completely urldecode the whole url.
46  req_path=`{echo -n $REQUEST_URI | sed 's/\?.*//; s!//+!/!g; s/%5[Ff]/_/g; s/'^$forbidden_uri_chars^'//g; s/\.\.*/./g; 1q'}
47  req_url=$base_url^$req_path
48  local_path=$sitedir$req_path
49  local_file=''
50  ifs='/' { args=`{echo -n $req_path} }
51 
52  # Preload post args for templates where cgi's stdin is not accessible
53  if(~ $REQUEST_METHOD POST) {
54  load_post_args
55  login_user
56  }
57 
58  if(~ $req_path */index)
59  perm_redirect `{echo $req_path | sed 's,/index$,/,'}
60 
61  if(~ $local_path */) {
62  if(test -d $local_path)
63  local_path=$local_path^'index'
64 
65  # If path has a trailing /, and a plain file exists matching that 'name.*'
66  # remove the traling / and redirect to 'name' .
67  # XXX: This redir might step on apps with synthetic dirs.
68  if not {
69  # TODO: Maybe we should be smarter about how to check for existing files
70  if(ls `{basename -d $local_path}^'.*' >/dev/null >[2]/dev/null)
71  perm_redirect `{echo $req_path|sed 's,/+$,,'}
72  }
73  }
74  if not if(~ $req_path *'.' *',' *';' *':')
75  perm_redirect `{echo $req_path | sed 's/[.,;:)]$//'}
76  if not if(test -d $local_path)
77  perm_redirect $req_path^'/'
78 
79  if(! ~ $#args 0)
80  ifs=$NEW_LINE { pageTitle=`{ echo $args|sed -e 's/ / - /g' -e 's/([a-z])-([a-z])/\1 \2/g' -e 's/_/ /g' } }
81 
82  cd $sitedir
83  req_paths_list='/' # Note: req_paths_list doesn't include 'stnythetic' dirs.
84  conf_wd='/' # Used in config files to know where we are in the document tree.
85  if(test -f _werc/config)
86  . _werc/config
87  for(i in $args) {
88  conf_wd=$conf_wd^$i
89  req_paths_list=($req_paths_list $conf_wd)
90  if(test -d $i) {
91  conf_wd=$conf_wd'/'
92  cd $i
93  if(test -f _werc/config)
94  . _werc/config
95  }
96  }
97  cd $werc_root
98 
99  if(~ $#perm_redir_to 1)
100  perm_redirect $perm_redir_to
101  for(l in $perm_redir_patterns) {
102  p=$$l
103  r=$p(1)
104  # If target is absolute, then patern must match whole string
105  if(~ $p(2) http://* https://*)
106  r='^'$r
107  t=`{ echo $req_path | sed 's!'^$r^'!'^$p(2)^'!' } # Malicious danger!
108 
109  if(! ~ $"t '' $req_path)
110  perm_redirect $t
111  }
112 
113  setup_handlers
114 
115 
116  # Set Page title
117  if(! ~ $local_file '') {
118  t=`{get_file_title $local_file}
119  if(! ~ $"t '')
120  pageTitle=$t
121  }
122 
123  # XXX Is this never true? because we set pageTitle earlier based on url.
124  if(~ $"pageTitle '')
125  pageTitle=$"siteTitle' '$"siteSubTitle
126 # if not
127 # pageTitle=$"pageTitle' | '$"siteTitle' '$"siteSubTitle
128 
129  for(h in $extraHttpHeaders)
130  echo $h
131  echo 'Content-Type: '^$http_content_type
132  echo # End of HTTP headers
133 
134  if(! ~ $#debug 0)
135  dprint $"SERVER_NAME^$"REQUEST_URI - $"HTTP_USER_AGENT - $"REQUEST_METHOD - $"handler_body_main - $"master_template
136 
137  if(~ $REQUEST_METHOD HEAD)
138  exit
139 
140  template $headers $master_template | awk_buffer
141  echo $res_tail
142 }
143 
144 werc_exec_request # >[2]/tmp/wercdebug.log