col - 1) edition, 2) work, 3) isbn 1) cd -> folder with editions.txt 2) cat editions.txt | cut -f5 | jq -r '[.key, .works[].key, .isbn_10[]] | @csv' > isbn10.txt 3) cat editions.txt | cut -f5 | jq -r '[.key, .works[].key, .isbn_13[]] | @csv' > isbn13.txt 4) create count: sort -k3 isbn10.txt | uniq -c -f2 > dups10.txt - -c - count * use -d for the dup lines - -f2 - counts by spaces, not columns 5) find dups: awk -F" " '{ if ($1==1) {next} {print}}' histo10.txt > nodups10.txt - sort -r -k1 nodups10.txt > end10.txt - -r - reverse - -k1 - column 1 6) sum (all editions with a dup isbn): awk '{n += $1}; END{print n}' end10.txt > sum10.txt 7) dup lines: sort -k3 isbn10.txt | uniq -D -f2 > dup10.txt 8) total lines: wc -l end10.txt > tot10.txt Note: get total lines: "sort -k3 dups10u.csv | uniq -c -f3 > histo10.csv"