cut, paste & join β Column Tools#
cut#
Extract specific fields or byte/character ranges from each line.
Syntax#
cut OPTION... [FILE...]
By delimiter field (-d / -f)#
cut -d: -f1 /etc/passwd # first field (username)
cut -d: -f1,7 /etc/passwd # fields 1 and 7
cut -d: -f1-3 /etc/passwd # fields 1 through 3
cut -d: -f3- /etc/passwd # field 3 to end
cut -d, -f2 data.csv # CSV second column
cut -d$'\t' -f1,3 data.tsv # tab-delimited fields 1 and 3
cut -d' ' -f2- sentence.txt # all words except first
# Change output delimiter (cut has no -D; use tr or awk)
cut -d: -f1,3 /etc/passwd | tr ':' '\t'
By character position (-c)#
cut -c1 file.txt # first character of each line
cut -c1-10 file.txt # characters 1β10
cut -c5- file.txt # character 5 to end
cut -c1,5,10 file.txt # characters 1, 5, and 10
cut -c-80 file.txt # max 80 chars (truncate long lines)
By byte position (-b)#
cut -b1-4 binary.dat # bytes 1β4 (differs from -c for multibyte chars)
Suppress undelimited lines#
cut -d: -f1 -s /etc/passwd # -s: skip lines without the delimiter
paste#
Merge files horizontally (column-by-column).
Syntax#
paste [OPTIONS] [FILE...]
paste file1.txt file2.txt # merge side by side (tab-delimited)
paste -d, file1.txt file2.txt # use comma as delimiter
paste -d'\t' names.txt emails.txt # explicit tab
# Serial mode (-s): transpose β each file becomes one tab-joined line
paste -s file.txt
paste -s -d, file.txt # comma-separated
# Combine N columns from same file
paste - - < file.txt # 2 lines β 1 row (2 columns)
paste - - - < file.txt # 3 lines β 1 row (3 columns)
Practical paste examples#
# Create a CSV from two column files
paste -d, ids.txt names.txt > combined.csv
# Add line numbers to a file
seq 1 $(wc -l < file.txt) | paste -d'\t' - file.txt
# Interleave lines from two files
paste -d'\n' file1.txt file2.txt
# Recreate a CSV from a column of values
paste -s -d, values.txt
join#
Join lines from two files on a common key field (like SQL inner join).
Syntax#
join [OPTIONS] FILE1 FILE2
Both files must be sorted on the join key first.
# Join on field 1 (default)
join sorted1.txt sorted2.txt
# Join on specific fields
join -1 2 -2 1 file1.txt file2.txt # field 2 of f1, field 1 of f2
# Change output delimiter
join -t, file1.csv file2.csv
# Include unmatched lines (outer join)
join -a 1 file1.txt file2.txt # + unmatched from file1
join -a 2 file1.txt file2.txt # + unmatched from file2
join -a 1 -a 2 file1.txt file2.txt # full outer join
# Fill missing fields
join -a 1 -e 'N/A' -o 0,1.2,2.2 file1.txt file2.txt
# Suppress matched lines (anti-join)
join -v 1 file1.txt file2.txt # lines in f1 not in f2
join -v 2 file1.txt file2.txt # lines in f2 not in f1
join example#
# employees.txt (sorted by ID):
# 101 Alice
# 102 Bob
# 103 Carol
# salaries.txt (sorted by ID):
# 101 75000
# 102 82000
# 104 91000
join employees.txt salaries.txt
# 101 Alice 75000
# 102 Bob 82000
join -a 1 employees.txt salaries.txt
# 101 Alice 75000
# 102 Bob 82000
# 103 Carol β Carol has no salary record
Practical pipelines#
# Extract second column from CSV, remove header, sort, count unique
tail -n +2 data.csv | cut -d, -f2 | sort | uniq -c | sort -rn
# Get all usernames from /etc/passwd
cut -d: -f1 /etc/passwd | sort
# Get the home directories of users with /bin/bash shell
grep '/bin/bash$' /etc/passwd | cut -d: -f6
# Compare two lists (IDs in file1 not in file2)
join -v 1 <(sort ids1.txt) <(sort ids2.txt)
# Build a quick lookup from key=value file
cut -d= -f1,2 config.env | tr '=' '\t'
# Transpose a whitespace-delimited matrix
# (for small matrices β use awk for larger ones)
paste $(for i in $(seq 1 $(awk '{print NF; exit}' matrix.txt)); do
echo <(cut -d' ' -f$i matrix.txt)
done)
[!TIP]
cutalways outputs fields in the order they appear in the input, regardless of the order specified with-f. To reorder fields, useawk '{print $3, $1}'instead.
[!TIP]
joinrequires sorted input. Usejoin <(sort f1) <(sort f2)or pre-sort withsort -k1when sorting on a non-first field.