[systemd-devel] [PATCH 1/2] tools: add script to detect repeating words in docs

Karel Zak kzak at redhat.com
Wed Jul 23 03:40:06 PDT 2014


 - all <programlisting> sections from input files are ignored
 - it's possible to white-list wanted repeats by KNOWN_REPEATS[] in the script
 - the script is based on checkmans.sh from util-linux project
 - it's integrated to build-sys, just type "make check-repwords", for example:

$ make check-repwords
  GEN      check-repwords
warning: man/coredump.conf.xml has repeating words: on
warning: man/sd_bus_message_append_array.xml has repeating words: of
warning: man/systemctl.xml has repeating words: on
warning: man/systemd.journal-fields.xml has repeating words: with
warning: man/systemd-journal-remote.xml has repeating words: is
warning: man/sysusers.d.xml has repeating words: be
---
 Makefile.am             |  7 ++++
 tools/check-repwords.sh | 97 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+)
 create mode 100755 tools/check-repwords.sh

diff --git a/Makefile.am b/Makefile.am
index 3fb3703..7ee0264 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -5918,6 +5918,13 @@ check-includes: $(top_srcdir)/tools/check-includes.pl
 EXTRA_DIST += \
 	$(top_srcdir)/tools/check-includes.pl
 
+.PHONY: check-repwords
+check-repwords: $(top_srcdir)/tools/check-repwords.sh
+	$(AM_V_GEN) $(top_srcdir)/tools/check-repwords.sh man/*.xml
+
+EXTRA_DIST += \
+	$(top_srcdir)/tools/check-repwords.sh
+
 # Stupid test that everything purported to be exported really is
 define generate-sym-test
 	$(AM_V_at)$(MKDIR_P) $(dir $@)
diff --git a/tools/check-repwords.sh b/tools/check-repwords.sh
new file mode 100755
index 0000000..f2aa327
--- /dev/null
+++ b/tools/check-repwords.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+#
+# Check all files specified on command line for repeating words
+#
+# Copyright (C) 2014 Karel Zak <kzak at redhat.com>
+# based on util-linux checkmans.sh from Sami Kerola <kerolasa at iki.fi>
+#
+
+set -e		# exit on errors
+set -o pipefail	# exit if pipe writer fails
+set -u		# disallow usage of unset variables
+set -C		# disallow redirection file overwriting
+SCRIPT_INVOCATION_SHORT_NAME=$(basename ${0})
+trap 'echo "${SCRIPT_INVOCATION_SHORT_NAME}: exit on error"; exit 1' ERR
+
+usage() {
+	echo "Usage: ${0} [-vVh] <file ...>"
+	echo " -v  verbose messaging"
+	echo " -h  print this help and exit"
+}
+
+VERBOSE='false'
+while getopts vh OPTIONS; do
+	case ${OPTIONS} in
+		v)
+			VERBOSE='true'
+			;;
+		h)
+			usage
+			exit 0
+			;;
+		*)
+			usage
+			exit 1
+	esac
+done
+
+shift $(( OPTIND - 1 ))
+
+declare -a REPEATS
+declare -A KNOWN_REPEATS
+
+### white list
+# Note that all text between <programlisting> </programlisting> tags is ingored.
+#
+# For exmaple to ignore 'bar bar' in the file foo.xml define:
+# KNOWN_REPEATS[foo.xml]='bar'
+
+
+remove_repeats()
+{
+	set +u
+	for KN in ${KNOWN_REPEATS[${I##*/}]}; do
+		if [ "${KN}" = "${REPEATS[$1]}" ]; then
+			if $VERBOSE; then
+				echo "info: ${I} ignore repeat: ${REPEATS[$1]}"
+			fi
+			unset REPEATS[$1]
+		fi
+	done
+	set -u
+}
+
+COUNT_ERRORS=0
+
+for I in $*; do
+	I_ERR=0
+	if ${VERBOSE}; then
+		echo "testing: ${I}"
+	fi
+	REPEATS=( $( cat ${I} | col -b | \
+		sed  -e 's/\s\+/\n/g;
+	                 /^$/d;
+			 /<programlisting/,/\/programlisting>/d' | \
+		awk 'BEGIN { p="" } { if (0 < length($0)) { if (p == $0) { print } } p = $0 }') )
+
+	if [ 0 -lt "${#REPEATS[@]}" ]; then
+		ITER=${#REPEATS[@]}
+		while [ -1 -lt ${ITER} ]; do
+			remove_repeats ${ITER}
+			# The 'let' may cause exit on error.
+			# When ITER == 0 -> let returns 1, bash bug?
+			let ITER=${ITER}-1 || true
+		done
+		if [ 0 -lt "${#REPEATS[@]}" ]; then
+			echo "warning: ${I} has repeating words: ${REPEATS[@]}"
+		fi
+	fi
+
+	let COUNT_ERRORS=$COUNT_ERRORS+$I_ERR || true
+done
+
+if [ ${COUNT_ERRORS} -ne 0 ]; then
+	exit 1
+fi
+
+exit 0
-- 
1.9.3



More information about the systemd-devel mailing list