From a88c35052b18071675bbc759b4290fbc72550455 Mon Sep 17 00:00:00 2001 From: =?utf8?q?cat=20=C3=A6scling?= <11325618-aescling@users.noreply.gitlab.com> Date: Wed, 26 Jul 2023 00:46:08 -0400 Subject: [PATCH] Refactor for eltest and case All tested functionality appears to work as expected grep(1) and sed(1) are no longer used at all! We still require tr(1) and urlencode(1); both would be very difficult to relpace with execline * Remove extraneous documentation * Fix improperly unquoted backslashes * Revert * Replace sed usage in httpd.execline * Fix bug in http-header-parse * Fix http-start-line-parse * Fix case-sensitivity in client request header validation * Fix many, many bugs --- README.md | 18 +- .../binaries/clean-dot-directories.execline | 14 ++ .../binaries/http-error-response.execline | 8 +- .../binaries/http-get-extra-headers.execline | 3 +- .../binaries/http-header-parse.execline | 179 +++++++----------- .../binaries/http-print-header-files.execline | 30 +-- .../binaries/http-start-line-parse.execline | 52 ++--- visible-to-httpd/binaries/httpd.execline | 129 +++---------- .../binaries/supported-hostname-test.execline | 13 +- 9 files changed, 165 insertions(+), 281 deletions(-) create mode 100755 visible-to-httpd/binaries/clean-dot-directories.execline diff --git a/README.md b/README.md index a53e7f1..1cdef0f 100644 --- a/README.md +++ b/README.md @@ -60,21 +60,17 @@ feasible configuration is to place statically linked dependencies into `./binaries`: + [s6-portable-utils](https://skaret.org/software/s6-portable-utils/) -`s6-applyuidgid`, `s6-test` +`s6-applyuidgid`, `s6-echo` + [9base](https://tools.suckless.org/9base/): -`tr(1)` `read(1)`, `hoc(1)`, `sed(1)`, `grep(1)`, `urlencode(1)`, -`cleanname(1)`, `cat(1)` +`tr(1)` `read(1)`, `urlencode(1)` + [toybox](http://www.landley.net/toybox/): `wc(1)`, -`date(1p)`, `printenv(1)`, `stat(1)` +`date(1p)`, `printenv(1)`, `stat(1)`, `cat(1)` -we heavily rely on plan 9 regular expression semantics for `sed(1)` and -`grep(1)`; i expect translating them to coreutils or \*BSD userspace would be an -effort. so long as i am writing this code for myself, i will not perform that -effort for you. + - +note that if you build execline and s6-portable-utils with slashpackage +support, they will expect to find commands in `/package`, and you will +need to somehow ensure it exists in the chroot directory. ### configuration ### diff --git a/visible-to-httpd/binaries/clean-dot-directories.execline b/visible-to-httpd/binaries/clean-dot-directories.execline new file mode 100755 index 0000000..3a3117e --- /dev/null +++ b/visible-to-httpd/binaries/clean-dot-directories.execline @@ -0,0 +1,14 @@ +#!/binaries/execlineb -W +# `clean-dot-directories.execline filename`: clean dot directories out of +# `filename`. +# +# effectively `sed s@/\.\.?/@/@g` + +importas given 1 +case -N -- ${given} { + "(.*)/\\.\\.?(/.*)?" { + elgetpositionals + clean-dot-directories.execline ${1}${2} + } +} +s6-echo -n -- ${given} diff --git a/visible-to-httpd/binaries/http-error-response.execline b/visible-to-httpd/binaries/http-error-response.execline index c388f66..8068aae 100755 --- a/visible-to-httpd/binaries/http-error-response.execline +++ b/visible-to-httpd/binaries/http-error-response.execline @@ -6,7 +6,7 @@ heredoc 0 "" foreground { - if -t { s6-test \${#} = 3 } + if -t { eltest \${#} = 3 } log.execline "fatal: ??"${1}"??: "${3} } @@ -21,14 +21,14 @@ multisubstitute { backtick -x -n error_response_directory { ifelse { - s6-test -d configuration/error_response_pages/${hostname}/${1} + eltest -d configuration/error_response_pages/${hostname}/${1} -a -r configuration/error_response_pages/${hostname}/${1} } { s6-echo -n -- configuration/error_response_pages/${hostname}/${1} } ifelse { - s6-test -d configuration/error_response_pages/-fallback/${1} + eltest -d configuration/error_response_pages/-fallback/${1} -a -r configuration/error_response_pages/-fallback/${1} } { @@ -39,7 +39,7 @@ backtick -x -n error_response_directory { if { # if there is an error response page for this status code: - ifelse { s6-test -v error_response_directory } + ifelse { eltest -v error_response_directory } { importas -i -u error_response_directory error_response_directory cd ${error_response_directory} diff --git a/visible-to-httpd/binaries/http-get-extra-headers.execline b/visible-to-httpd/binaries/http-get-extra-headers.execline index 2cacb1f..e45343a 100755 --- a/visible-to-httpd/binaries/http-get-extra-headers.execline +++ b/visible-to-httpd/binaries/http-get-extra-headers.execline @@ -1,6 +1,6 @@ #!/binaries/execlineb -WS0 # -# http-get-particular-headers.execline [status-code] +# http-get-extra-headers.execline [status-code] # # sufficiently annoyingly complex resource-specific HTTP header logic that # gets reused a lot @@ -13,7 +13,6 @@ # user need not bother with editing files containing `\r\n`s # (however, it is almost certainly considerably more annoying to use) # -# TODO: ? status code overrides support?? # TODO: write a GUI?? with like, `yad(1)` or something??? idk multisubstitute { diff --git a/visible-to-httpd/binaries/http-header-parse.execline b/visible-to-httpd/binaries/http-header-parse.execline index 73317c5..66d6be0 100755 --- a/visible-to-httpd/binaries/http-header-parse.execline +++ b/visible-to-httpd/binaries/http-header-parse.execline @@ -4,12 +4,6 @@ # expects a series of http headers from standard input # exits syntax error otherwise # -# current hard dependencies on external ./httpd.execline subscripts: -# -# + ./get-line-from-client.execline -# + ./http-error-response.execline: and thus, -# + ./log.execline -# # the first argument is a program to call when parsing a `Host` header # for the first time (usually the first line, but this convention is not # required); it should exit zero on a hostname the program supports, @@ -37,7 +31,6 @@ # the remaining program is supplied **after an initial argument we must # preserve and use**. we have not found a way to handle this without losing # efficiency (see the shebang (the "#!" line, on line 1)) -# importas supported_hostname_test 1 shift elgetpositionals @@ -55,25 +48,15 @@ emptyenv -P # this is a heavy weakness for implementing actual program logic; we expect # the author of execline to never provide a convenient way to circumvent this # problem, as supporting Actual Programming Logic is out of scope for the -# language (for example: see all the use of external tools `grep` and `sed` -# throughout `httpd.execline`, as execline’s string manipulation tools are -# (deliberately) very underpowered) -# +# language -backtick -n current_line { get-line-from-client.execline } +backtick -E -n current_line { get-line-from-client.execline } ### terminating case: empty line -# -# exec(3p) the remaining program -# -ifelse { - pipeline { printenv current_line } - grep -s "^ *$" -} +ifelse { eltest \${current_line} =~ "^ *$" } { # the client MUST send a Host header, halt otherwise - # - ifelse { s6-test ! -v http_header_parse_host } + ifelse { eltest ! -v http_header_parse_host } { http-error-response.execline 400 @@ -91,103 +74,77 @@ ifelse { # # after parsing, exec(3p) this script with the hostname validating # subscript, then the remaing program, as arguments -# -backtick -x -n header_name { - pipeline { printenv current_line } - pipeline { sed -n "s/^([^ :]+):.*/\\1/p" } - pipeline { tr A-Z a-z } - read -} -backtick -x -n header_contents { - pipeline { printenv current_line } - # strip spaces or tabs from end of line - # then print the second token verbatim - # - # whitespace between header name and contents is optional - # - pipeline { sed -n "s/( )*$//; s/^[^ ]+ *([^ ].*)/\\1/p" } - read -} -ifelse { - s6-test ! -v header_name -o - ! -v header_contents -} -{ - importas -i current_line current_line - http-error-response.execline - 400 - "syntax error" - "http-header-parse.execline: bad header line: \""${current_line}\" +backtick -E -n current_line_stripped { + case -N -- ${current_line} { + # strip trailing whitespace + "^(.*)[ \t]*$" { + importas current_line_stripped 1 + s6-echo -n -- ${current_line_stripped} + } + } } +case -N -- ${current_line_stripped} { + "([^ :]+): *([^ ].*)" { + multisubstitute { + importas -i header_name_anycase 1 + importas -i header_contents 2 + } + backtick -E header_name { + pipeline { s6-echo -n -- ${header_name_anycase} } + tr A-Z a-z + } -multisubstitute { - importas -i -u header_name header_name - importas -i -u header_contents header_contents -} + #### special case: host header + # + # short circuits the program + # TODO: [hard, design problem]: short circuit but exec(3p) into the + # remaining program + ifelse { eltest \${header_name} = host } + { + # we MUST 400 on multiple Host headers + ifelse { eltest -v http_header_parse_host } + { + http-error-response.execline + 400 + "syntax error" + "http-header-parse.execline: multiple Host headers!??" + } -#### special case: host header -# -# short circuits the program -# TODO: [hard, design problem]: short circuit but exec(3p) into the -# remaining program -# -ifelse { s6-test \${header_name} = host } -{ - # we MUST 400 on multiple Host headers - # - ifelse { s6-test -v http_header_parse_host } - { - http-error-response.execline - 400 - "syntax error" - "http-header-parse.execline: multiple Host headers!??" - } + # 400 on syntactically illegal hostnames + ifelse { + define hexadecimal "([0-9]|[a-f]|[A-F])" + multisubstitute { + # incidentally covers all of ipv4 + define domain_name "(([a-z]|[A-Z]|[0-9])([a-z]|[A-Z]|[0-9][\-.])*)" - # validate hostnames, exiting on syntactically illegal ones - # - ifelse { - define hexadecimal "[0-9a-fA-F]" - multisubstitute { - # + dns-resolved hostname - define domain_name "[a-zA-Z0-9\-.]+" + # TODO: incorrect + define approximate_ipv6 "("${hexadecimal}"+?(::"${hexadecimal}")+)" - # + ipv6 address (TODO: handle robustly) - define approximate_ipv6 "("${hexadecimal}"+)?(::"${hexadecimal}")+" + define port ":[0-9]+" + } + eltest \${header_contents} =~ "^ *("${domain_name}"|"${approximate_ipv6}")("${port}")? *$" + } + { + http-error-response.execline + 400 + "syntax error" + "illegal host: "\"${header_contents}\" + } - # + port string - define port ":[0-9]+" + # short circuits on unsupported hostnames + if { ${supported_hostname_test} ${header_contents} } + export http_header_parse_${header_name} ${header_contents} + ${0} + ${supported_hostname_test} + ${@} } - # - # as we understand it, a valid ipv4 address is always a valid - # domain name address, so we do not actually have to handle - # that… - # - pipeline { s6-echo -n -- ${header_contents} } - grep -sv "^ *(("${domain_name}")|("${approximate_ipv6}"))("${port}")? *$" - } - { - http-error-response.execline - 400 - "syntax error" - "illegal host: "\"${header_contents}\" - } - - # short circuit on unsupported hostnames - # - ifelse -n { ${supported_hostname_test} ${header_contents} } - { - heredoc 0 "" export http_header_parse_${header_name} ${header_contents} - ${@} - + ${0} + ${supported_hostname_test} + ${@} } - export http_header_parse_${header_name} ${header_contents} - ${0} - ${supported_hostname_test} - ${@} } - -export http_header_parse_${header_name} ${header_contents} -${0} - ${supported_hostname_test} - ${@} +http-error-response.execline + 400 + "syntax error" + "http-header-parse.execline: bad header line: \""${current_line}\" diff --git a/visible-to-httpd/binaries/http-print-header-files.execline b/visible-to-httpd/binaries/http-print-header-files.execline index 3b5980a..7105f21 100755 --- a/visible-to-httpd/binaries/http-print-header-files.execline +++ b/visible-to-httpd/binaries/http-print-header-files.execline @@ -14,14 +14,13 @@ # base case: quit if there are no arguments ifelse { importas "#" "#" - s6-test ${#} = 0 + eltest ${#} = 0 } { exit 0 } # recursive case: print the current header - importas header_file 1 shift elgetpositionals @@ -29,9 +28,15 @@ emptyenv -P # performs `basename ${header_file}` backtick -E -n header_name { - pipeline { s6-echo -n -- ${header_file} } - pipeline { sed "s@.*/([^/]*)@\\1@" } - tr -d " \t\r\n" # paranoid + pipeline { + case -N -- ${header_file} { + ".*/([^/]*)$" { + importas basename 1 + s6-echo -n -- ${basename} + } + } + } + tr -d " \t\r\n" # paranoia } # ignore protected headers @@ -40,7 +45,7 @@ ifelse { pipeline { s6-echo -n -- ${header_name} } tr A-Z a-z } - s6-test -v httpd_execline_protected_header_${header_name_lowercase} + eltest -v httpd_execline_protected_header_${header_name_lowercase} } { foreground { log.execline "WARNING: configuration error: ignoring protected header: \""${header_name}\" } @@ -48,25 +53,24 @@ ifelse { } # short circuit on overridden header -ifelse { s6-test -v http_print_header_directories_${header_name} } +ifelse { eltest -v http_print_header_directories_${header_name} } { foreground { log.execline "ignoring overridden header_name: "\"${header_name}\" } http-print-header-directories.execline ${@} } # otherwise, print out the header line - multisubstitute { importas -D -no_hostname_parsed hostname http_header_parse_host importas -D -no_resource_parsed requested_resource http_start_line_parse_resource } # wrapper around s6-echo for hostname and resource substitutions in header contents define header_substitution_script -"multisubstitute { - define hostname "${hostname}" - define resource "${requested_resource}" -} -s6-echo -n -- " + "multisubstitute { + define hostname "${hostname}" + define resource "${requested_resource}" + } + s6-echo -n -- " # we’ll strip out `\r`s and `\n`s from file contents, in # case the configuration should ever be made in a mischevious way diff --git a/visible-to-httpd/binaries/http-start-line-parse.execline b/visible-to-httpd/binaries/http-start-line-parse.execline index a358a1a..0fc9c04 100755 --- a/visible-to-httpd/binaries/http-start-line-parse.execline +++ b/visible-to-httpd/binaries/http-start-line-parse.execline @@ -2,49 +2,35 @@ # expects a start line from an http request from standard input # exits syntax error otherwise # -# current hard dependencies on `httpd.execline` subscripts: -# -# + ./get-line-from-client.execline -# + ./http-error-response.execline: and, thus -# + ./log.execline -# # on success, exports # # + http_start_line_parse_method # + http_start_line_parse_resource # + http_start_line_parse_version # -# containing the request's method, requested resource, and http version -# it then exec(3p)s into its command line +# containing the request's method, requested resource, and http version, +# and then exec(3p)s into its command line -backtick -n start_line { get-line-from-client.execline } -backtick -x -n http_start_line_parse_method { - pipeline { printenv start_line } - pipeline { sed -n "s@^(CONNECT|DELETE|GET|HEAD|OPTIONS|PATCH|POST|PUT|TRACE) +.*@\\1@p" } - read -} -backtick -x -n http_start_line_parse_resource { - pipeline { printenv start_line } - pipeline { sed -n "s@^[^ ]+ +(/[^ ]*) +.*@\\1@p" } - read -} -backtick -x -n http_start_line_parse_version { - pipeline { printenv start_line } - pipeline { sed -n "s@.*HTTP/([0-9]\.[0-9]) *@\\1@p" } - read -} +backtick -E -n start_line { get-line-from-client.execline } +case -N -- ${start_line} { + "^(CONNECT|DELETE|GET|HEAD|OPTIONS|PATCH|POST|PUT|TRACE) +(/[^ ]*) +HTTP/([0-9]\.[0-9]) *$" { + # the -S1 flag to execlineb will auto-substitute $1 and the like, + # we need this workaround. the names are chosen so as to not cause + # accidental substitutions in ${@} + multisubstitute { + importas _http_start_line_parse_method 1 + importas _http_start_line_parse_resource 2 + importas _http_start_line_parse_version 3 + } + export http_start_line_parse_method ${_http_start_line_parse_method} + export http_start_line_parse_resource ${_http_start_line_parse_resource} + export http_start_line_parse_version ${_http_start_line_parse_version} -importas -i -u start_line start_line -ifelse { - s6-test ! -v http_start_line_parse_method -o - ! -v http_start_line_parse_resource -o - ! -v http_start_line_parse_version + emptyenv -P + ${@} + } } -{ http-error-response.execline 400 "syntax error" "http-start-line-parse.execline: (bad) start line: \""${start_line}\" -} - -${@} diff --git a/visible-to-httpd/binaries/httpd.execline b/visible-to-httpd/binaries/httpd.execline index 3134664..9be16f1 100755 --- a/visible-to-httpd/binaries/httpd.execline +++ b/visible-to-httpd/binaries/httpd.execline @@ -1,58 +1,13 @@ #!/usr/local/bin/execlineb -WP ## `httpd.execline`: a simple static web server ### -# -# i would like to note that simplicity is relative; the *implementation* of -# this simple functionality is not exactly simple. there are several subscripts -# with sufficently complex and (in all but one case) reusable functionality -# that we separate them out. -# unfortunately, many themselves are (currently )dependent on other subscripts. - -### the http/1.1 protocol, oversimplified ### -# -# a client sends a request that normally looks something like -# -# ``` -# > [http_method] [resource] [http version]\r -# > Host: [hostname]\r -# > [quite possibly many other headers]\r -# > \r -# ``` -# -# (note the `\r`s before newlines. -# (also: the Host header does not *have* to be the second line) -# -# we respond to the client appropriately, using to the following template: -# -# ``` -# < HTTP/1.1 [status code] [status message]\r -# < Content-Type: [MIME type of the message body]\r -# < Content-Length: [size of message body in bytes]\r -# < Date: [the time as of this response]\r -# < [Last-Modified: [date of the resource’s last revision]]\r -# < \r -# < [content, sent verbatim] -# ``` -# -# we do not follow the http/1.1 protocol precisely, but it is enough to satisfy -# web browsers and tools like `curl(1)`, and to handle misbehaving clients. -# - ### brief httpd.execline overview ## # # 1. sandboxing (paranoia?) # 2. read, validate the start line and Host header sent by the client # 3. find resource, determine its filetype # 4. send response to client -# #### 1. sandboxing ### -# -# this recreates a security measure we picked up from `publicfile`: if this -# server should somehow be hijacked, it will not be able to escape the -# directory it runs in, and it will be running as an unpriveleged user -# in the setup of this server, the user `httpd` owns no files or directories in -# the change-rooted directory, nor does it have any write permissions for those -# files and directories, so a hijacked process will not be able to do very much export PATH /binaries chroot . s6-applyuidgid -U -z @@ -60,7 +15,7 @@ s6-applyuidgid -U -z # see `./log.execline` export program_name httpd.execline -# see end of script: handle crashes (or syntax errors in this script,) cleanly +# see end of script: handle crashes cleanly if -X -n -t { #### 2. read from client, with interspersed validation ### ##### 2.1. start line ### @@ -69,10 +24,7 @@ if -X -n -t { importas -i method http_start_line_parse_method importas -i requested_resource http_start_line_parse_resource } - ifelse -n { - s6-test \${method} = HEAD -o - \${method} = GET - } + ifelse -n { eltest \${method} =~ HEAD|GET } { http-error-response.execline 501 @@ -106,30 +58,27 @@ if -X -n -t { backtick -n resource { cd supported_domains - backtick -n candidate_resource { - backtick -n with_dot_and_dot_dot { - pipeline { s6-echo -n -- ${requested_resource} } - # - # strip query string, or resource location - # - pipeline { sed "s/[?#].*//; s@/\\.\\.?/@/@g" } - # decode url-encodings, if any + backtick -E -n candidate_resource { + backtick -E -n without_dot_and_dot_dot { + backtick -E -n without_query_string { + case -N -- ${requested_resource} { + "(.*)[?#].*" { + elgetpositionals + s6-echo -n -- ${1} + } + } + s6-echo -n -- ${requested_resource} + } + pipeline { clean-dot-directories.execline ${without_query_string} } urlencode -d } - importas -i -u with_dot_and_dot_dot with_dot_and_dot_dot + # include the hostname in the final resource name - # - if { s6-echo -n -- ${hostname} } - # handle dot and dot-dot directory semantics - # we prepend the hostname to the result, ensuring - # `${resource}` will route to somewhere inside the - # subdirectory named after the host - cleanname ${with_dot_and_dot_dot} + s6-echo -n -- ${hostname}/${without_dot_and_dot_dot} } - importas -i -u candidate_resource candidate_resource # `${directory}` -> `${directory}/index.xhtml` - ifelse { s6-test -d \${candidate_resource} } + ifelse { eltest -d \${candidate_resource} } { s6-echo -n -- ${candidate_resource}/index.xhtml } @@ -137,7 +86,7 @@ if -X -n -t { } importas -i resource resource - ifelse { s6-test ! -r supported_domains/${resource} } + ifelse { eltest ! -r supported_domains/${resource} } { http-error-response.execline 404 @@ -154,13 +103,13 @@ if -X -n -t { cat ${Content_Type_override_file} } - backtick -D "no.extension" -n extension { - pipeline { printenv resource } - # strip everything up to the non-periods after the final - # period in the string - # - pipeline { sed -n "s/.+\\.([^.]+)$/\\1/p" } - read + backtick -E -D "no.extension" -n extension { + case -N -- ${resource} { + ".+\\.([^.]+)$" { + elgetpositionals + s6-echo -n -- ${1} + } + } } # publicfile-style custom filetypes: `file.{1}={2}` is served @@ -168,25 +117,16 @@ if -X -n -t { # transformed into periods, allowing files like # `index.text=x:market` being served as `text/x.market` ifelse { - pipeline { printenv extension } # this regex matches exactly what `publicfile` does - grep -s "[a-zA-Z0-9]+=[^=]+$" + eltest \\${extension} =~ "[a-zA-Z0-9]+=[^=]+$" } { - pipeline { printenv extension } + pipeline { s6-echo -n -- extension } tr := ./ } - # use `./configuration/Content-Type_table` as a key-value store: files with - # the name ${extension} map to the `Content-Type` embedded in - # their contents. for example, `./configuration/Content-Type_table/xhtml` - # contains the text “application/xhtml+xml” (with no newline) - # (it is fine if the file contains a single newline at the end) - # - # if no key exists with the extension’s name, we fall back on - # “application/octet-stream”, as we should - importas -i -u extension extension - ifelse { s6-test -r configuration/Content-Type_table/${extension} } + # use `./configuration/Content-Type_table` as a key-value store + ifelse { eltest -r configuration/Content-Type_table/${extension} } { cat configuration/Content-Type_table/${extension} } @@ -212,9 +152,7 @@ if -X -n -t { # current time of response: SHOULD be provided (why?) backtick -n Date { date -u ${date_format} } - # allow for arbitrary HTTP header and HTTP status code overrides. - # for an example where the former might be useful, consider Content - # Security Policy; for the latter, consider HTTP 301 redirects + # allow for arbitrary HTTP header and HTTP status code overrides # # be warned!! we do not validate these overrides! backtick -n extra_headers { @@ -250,7 +188,7 @@ Date: "${Date}${extra_headers}" " } foreground { - if -t { s6-test \${method} = GET } + if -t { eltest \${method} = GET } cat supported_domains/${resource} } # hack: write(3p) does not guarantee that all the @@ -261,11 +199,6 @@ Date: "${Date}${extra_headers}" s6-sleep -m 512 # TODO: (?) persistent connections? (recursion??) } - ##### end of script - # catches crashes (and syntax errors,,), and other unexpected things - # useful for debugging! otherwise, clients might do strange things - # - # probably a bad sign this is still left in lol http-error-response.execline 500 "internal server error" diff --git a/visible-to-httpd/binaries/supported-hostname-test.execline b/visible-to-httpd/binaries/supported-hostname-test.execline index 060537e..ed767b3 100755 --- a/visible-to-httpd/binaries/supported-hostname-test.execline +++ b/visible-to-httpd/binaries/supported-hostname-test.execline @@ -4,16 +4,11 @@ # tests if `hostname` is supported by this server, by checking if # a directory by that exact name exists in the current working directory # immediately 404s otherwise -# -# hard depends on these external `httpd.execline` subscripts: -# -# + ./http-error-response.execline: and thus, -# + ./log.execline -# -# reject unsupported hostnames -# -ifelse { s6-test ! -d \supported_domains/${1} } +ifelse { + eltest ! -d \supported_domains/${1} -o + ! -r \supported_domains/${1} +} { if { http-error-response.execline -- 2.47.3