#!/bin/bash # url-query - 2024-03-24 # # query websites by key with text selection or given query-text # # open URL(s) for given key(s), substituting the current text selection # or open URL for given key and substitution part(s) # (see config file for details) # todo: scan for new sources - https://gitlab.com/surfraw/Surfraw # todo: create default config # default config file config=${XDG_CONFIG_HOME:-$HOME/.config}/url-query.tsv # don't quote substitution which includes spaces (1 = true, 0 = false) noquote=1 # default menu format - "{k}" = key, "{n}" = name defmfmt='{n} - {k}' # command to open each URL opencmd=xdg-open # command to print text selection printselcmd=xsel # command to run awk script (mawk is *fast* and very well maintained) awkcmd=awk # file to store command statistics ("" = disabled) statsfile=${XDG_DATA_HOME:-$HOME/.local/share}/$(basename -- "$(readlink -f "$0")")-stats.tsv ############################################################ usage() { cat <<- sss $(sed -n '3s/^..//p;5s/^../ /p;6q' "$0") usage: $(basename -- "$0") [opt...] key query-text... options -s use text selection as query-text and interpret arg(s) as key(s) -p print URL(s) to stdout instead of opening -q toggle quoting of query-text which include whitespace (default: noquote=$noquote) -m [f] menu: print list for a selector/menu, with optional format f (default: $defmfmt) -c f use config file f instead of default ($config) -n k n t create new record for (k)ey (n)ame (t)arget -v verify config (and report any problems) -b print config in bookmarks.html format to stdout note - option -[nb] causes other options to be ignored except -c - option -b reads up to 3 arguments if given: prefix tags placeholder (defaults: prefix="" ; tags="url-queries" ; placeholder="%s") - option -m format is a string with these substitutions: {k} = key, {n} = name e.g. "{k} - {n}" -> "ia - Internet Archive" config file - format (tsv): key name target - target: url | key | key-set - key-set: key key[ ...] - blank lines and comments (i.e. lines beginning with #) are ignored sss } function warn { typeset r z [[ -t 2 && -n $(type -p tput) ]] && (( $(tput colors) > 1 )) && r=$(tput setaf 1) z=$(tput sgr0) (( $# )) && printf "%s%s%s\n" "$r" "$*" "$z" >&2 } die() { [[ -n $2 ]] && warn "${*:2}" [[ $1 == 2 && -t 2 ]] && usage >&2 exit "${1:-1}" } updatestats() { [[ ! -w $statsfile || -z $1 ]] && return awks=' BEGIN { OFS = FS = "\t" now = strftime("%F", systime()) # ENVIRON prevents backslash interpretation x = ENVIRON["x"] } $3 == x { $2++ $1 = now m = 1 } 1 END { if(m != 1) print now, 1, x } ' tmp=$(mktemp) [[ -f $statsfile ]] || touch -- "$statsfile" x="$1" $awkcmd "$awks" "$statsfile" | sort -t$'\t' -k 2nr,2 -k 1r,1 > "$tmp" mv -- "$tmp" "$statsfile" } urlencode() { # ENVIRON avoids pipe/stdin (and \ interpretation) # explicit character set (avoids locale trouble with ranges) awks=' BEGIN { unreserved = "[ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.~-]" for (i = 0; i <= 255; i++) ord[sprintf("%c", i)] = i len = length(ENVIRON["t"]) for (i = 1; i <= len; i++) { c = substr(ENVIRON["t"], i, 1) if (c !~ unreserved) c = sprintf("%%%02X", ord[c]) r = r c } print r } ' t=$1 $awkcmd "$awks" } gettarget() { awks=' # read the whole file to build array[key]=target ! /^([# \t]|$)/ { kk[$1] = $3 } END { # follow key (recur until URL/key-set/null/recursive-key) while (k !~ /(:\/\/|[^ ] [^ ]|^$)/) { if (k == kk[k]) { print "0 0" # recursive-key exit } k=kk[k] } # also return c count of placeholders (to avoid a subshell call later) c = gsub(/%[sS]/, "&", k) print c, k } ' $awkcmd -v k="$1" -F'\t' "$awks" "$config" } urlsub() { (( $# )) || die 2 "argument(s) expected; none given" # this is faster than loading once into shell's associative array (for at laest 6 iterations) # c - count of placeholders k=$1 url=$(gettarget "$k") c=${url%% *} url=${url#* } case $url in *'://'* ) ;; *[!\ ]' '[!\ ]* ) shift # recur for key set (i.e. a key to multiple keys) for k in $url ; do urlsub "$k" "$@" done return ;; '' | 0 ) case $url in '' ) w='URL not found for key' ;; 0 ) w='recursive key' ;; esac warn "warning - %s: %s\n" "$w" "$1" return 1 ;; esac shift case $c:$# in 0:* ) ;; *:0 ) # just load base domain scheme=${url%%://*} url=${url#*://} url=$scheme://${url%%/*} ;; *:* ) if (( c < $# )) ; then # merge extra args to cth placeholder z=${*:c} # set -- is expensive; create array instead pp=( "${@:1:c - 1}" "$z" ) else (( c > $# )) && warn 'warning - placeholders exceed substitution parts: %s\n' "$url" pp=( "$@" ) fi # substitute placeholder(s) with substitution part(s) (optionally URL-encoded) for p in "${pp[@]}" ; do p=${p//$'\n'/ } # multiple parts: (re)quote all parts which include spaces, and are unquoted # single part: only quote if noquote=0, it has spaces, and it is unquoted (( ${#pp[@]} > noquote )) && [[ $p == *[$' \t']* && $p != \"*\" ]] && p=\"$p\" s=${url%%\%[sS]*} s=${url:${#s} + 1:1} case $s in s ) p=$(urlencode "$p") ;; S ) ;; * ) die 1 "placeholder parse failed: $s -> $p" esac url=${url/\%$s/$p} done ;; esac case $print in 1 ) printf '%s\n' "$url" ;; * ) ${opencmd:?} "$url" || die 3 "opencmd failed: $opencmd" # update stats for: key (name) updatestats "$k ($($awkcmd -v k="$k" -F'\t' '$1 == k {print $2}' "$config"))" ;; esac } urlsubsel() { sel=$(${printselcmd:?}) || die 3 "printselcmd failed: $printselcmd" [[ -n $sel ]] || die 1 'text selection is empty' for k ; do urlsub "$k" "$sel" done } newrecord() { (( $# == 3 )) || die 2 "3 arguments expected; $# given" [[ $1 != *[^[:alnum:]]* ]] || die 1 "key contains bad character(s) (allowed: [[:alnum:]]): $1" [[ $2 != *[^[:print:]]* ]] || die 1 "name contains bad character(s) (allowed: [[:print:]]): $2" [[ $3 != *[^[:print:]]* ]] || die 1 "target contains bad character(s) (allowed: [[:print:]]): $3" [[ -w $config ]] || die 1 "config file not writable: $config" ! $awkcmd -F'\t' -v t="$3" '$3 == t {print; exit 1}' "$config" && { echo 'target already exists! continue anyway? [yN] ' read yn && [[ $yn != [yY] ]] && exit 3 } [[ $3 == *://* && $3 != *%[sS]* ]] && warn 'warning - url does not contain a placeholder' k=$1 newk= # test with awk instead of grep to avoid escaping regex metachars until $awkcmd -F'\t' -v k="$k" '$1 == k {print; exit 1}' "$config" ; do echo 'key already exists! enter new key (or the same one to replace it): ' read -r newk case $newk in "$k" ) echo 'replace existing record? [yN] ' read yn [[ $yn == [yY] ]] && { tmp=$(mktemp /tmp/"$(basename -- "$config")".XXXXXX) # ENVIRON avoids awk's backslash interpretation awks=' BEGIN { OFS = FS = "\t" } $1 == ENVIRON["k"] { $2 = ENVIRON["n"]; $3 = ENVIRON["t"] } 1 ' k=$k n=$2 t=$3 $awkcmd "$awks" "$config" > "$tmp" && mv "$tmp" "$config" && echo 'existing record updated' return } ;; ?* ) k=$newk ;; esac done printf '%s\t%s\t%s\n' "$k" "$2" "$3" >> "$config" && echo 'new record appended' } verifyconfig() { awks=' /^([# \t]|$)/ { next } $1 ~ /[^[:alnum:]]/ { printf("line %s contains bad character in key: %s\n", NR, $1) } { if (kk[$1]) { print kk[$1] print $0 } else { kk[$1] = $0 } } ' $awkcmd -F'\t' "$awks" "$config" } printbookmarks() { awks=' BEGIN { print "\n\n
" } NF == 3 && $1 !~ /^(#.*|[ \t]*)$/ { keys[++i] = $1 names[$1] = $2 targets[$1] = $3 } END { len = i for (i = 1; i <= len; i++) { k = keys[i] n = names[k] u = targets[k] while (u !~ /^([a-z]+:.+)?$/ && u != k) { u = targets[u] } if (u !~ /^[a-z]+:.+$/) continue gsub(/%[sS]/, ENVIRON["s"], u) printf("
\n