Skip to content
21 changes: 21 additions & 0 deletions evaluation/benchmarks/dgsh/sequential/1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash

## Initialize the necessary temporary files
file1=$(mktemp)

cat >"$file1"

printf 'File type:\t'
file - <"$file1"

printf 'Original size:\t'
wc -c <"$file1"

printf 'xz:\t\t'
xz -c <"$file1" | wc -c

printf 'bzip2:\t\t'
bzip2 -c <"$file1" | wc -c

printf 'gzip:\t\t'
gzip -c <"$file1" | wc -c
112 changes: 112 additions & 0 deletions evaluation/benchmarks/dgsh/sequential/11.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/bin/bash

## Initialize the necessary temporary files
file1=$(mktemp)
file2=$(mktemp)
file3=$(mktemp)
file4=$(mktemp)
file5=$(mktemp)
file6=$(mktemp)
file7=$(mktemp)

export LC_ALL=C

# Commit history in the form of ascending Unix timestamps, emails
git log --pretty=tformat:'%at %ae' |
awk 'NF == 2 && $1 > 100000 && $1 < '`date +%s` |
sort -n > "$file1"

# Calculate number of committers
awk '{print $2}' "$file1" |
sort -u |
wc -l > "$file2"
cp "$file2" "$file3"
cp "$file2" "$file4"

# Calculate last commit timestamp in seconds
tail -1 "$file1" |
awk '{print $1}' > "$file5"

# Calculate first commit timestamp in seconds
head -1 "$file1" |
awk '{print $1}' >> "$file5"

# Gather last and first commit timestamp and compute the difference in days
cat "$file5" |
tr '\n' ' ' |
awk '{print int(($1 - $2) / 60 / 60 / 24)}' > "$file5"

sort -k2 "$file1" > "$file6"

# Place committers left/right of the median according to the number of their commits
awk '{print $2}' "$file1" |
sort |
uniq -c |
sort -n |
awk -v committers1="$file2" '
BEGIN {
while ((getline NCOMMITTERS < committers1) > 0) {}
l = 0; r = NCOMMITTERS;
}
{print NR % 2 ? l++ : --r, $2}' |
sort -k2 > "$file7"

# Join committer positions with commit timestamps based on committer email
join -j 2 "$file6" "$file7" |
sort -k 2n > "$file6"

# Create portable bitmap
{
echo 'P1'
{
cat "$file3"
cat "$file5"
} |
tr '\n' ' ' |
awk '{print $1, $2}'

perl -na -e '
BEGIN {
open(my $ncf, "<", "'"$file4"'");
$ncommitters = <$ncf>;
@empty[$ncommitters - 1] = 0; @committers = @empty;
}
sub out {
print join("", map($_ ? "1" : "0", @committers)), "\n";
}

$day = int($F[1] / 60 / 60 / 24);
$pday = $day if (!defined($pday));

while ($day != $pday) {
out();
@committers = @empty;
$pday++;
}

$committers[$F[2]] = 1;

END { out(); }
' "$file6"
} |
pgmmorphconv -erode <(
cat <<EOF
P1
7 7
1 1 1 0 1 1 1
1 1 0 0 0 1 1
1 0 0 0 0 0 1
0 0 0 0 0 0 0
1 0 0 0 0 0 1
1 1 0 0 0 1 1
1 1 1 0 1 1 1
EOF
) |
tee |
{
# Full-scale image
pnmtopng >large.png
# A smaller image
pamscale -width 640 |
pnmtopng >small.png
}
25 changes: 25 additions & 0 deletions evaluation/benchmarks/dgsh/sequential/17.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash

# Initialize the necessary temporary files
file1=$(mktemp)
file2=$(mktemp)
file3=$(mktemp)
file4=$(mktemp)

# Save the ls output to a temporary file
ls -n > "$file1"

# Reorder fields in DIR-like way
awk '!/^total/ {print $6, $7, $8, $1, sprintf("%8d", $5), $9}' "$file1" > "$file2"

# Count number of files
wc -l "$file1" | tr -d \\n > "$file3"
echo -n ' File(s) ' >> "$file3"
awk '{s += $5} END {printf("%d bytes\n", s)}' "$file1" >> "$file3"

# Count number of directories and print label for number of dirs and calculate free bytes
grep -c '^d' "$file1" | tr -d \\n > "$file4"
df -h . | awk '!/Use%/{print " Dir(s) " $4 " bytes free"}' >> "$file4"

# Display the results
cat "$file2" "$file3" "$file4"
16 changes: 16 additions & 0 deletions evaluation/benchmarks/dgsh/sequential/18.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

# Initialize the necessary temporary files
file1=$(mktemp)
file2=$(mktemp)
file3=$(mktemp)

# Read the input stream and save to a temporary file
cat $INPUT_FILE > "$file1"

# Process the input in two different ways
cut -d , -f 5-6 "$file1" > "$file2"
cut -d , -f 2-4 "$file1" > "$file3"

# Merge the processed results
paste -d , "$file2" "$file3"
24 changes: 24 additions & 0 deletions evaluation/benchmarks/dgsh/sequential/2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash

## Note: Needs to be run on a big git repository to make sense (maybe linux)

## Initialize the necessary temporary files
file1=$(mktemp)

forder()
{
sort |
uniq -c |
sort -rn
}


git log --format="%an:%ad" --date=default "$@" >"$file1"

echo "Authors ordered by number of commits"
# Order by frequency
awk -F: '{print $1}' <"$file1" | forder

echo "Days ordered by number of commits"
# Order by frequency
awk -F: '{print substr($2, 1, 3)}' <"$file1" | forder
132 changes: 132 additions & 0 deletions evaluation/benchmarks/dgsh/sequential/3.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#!/bin/bash

## Note: Needs to be run on a big git repository to make sense (maybe linux)

## Initialize the necessary temporary files
file1=$(mktemp)
file2=$(mktemp)
file3=$(mktemp)
file4=$(mktemp)

find "$@" \( -name \*.c -or -name \*.h \) -type f -print0 >"$file1"

echo -n 'FNAMELEN: '

tr \\0 \\n <"$file1" |
# Remove path
sed 's|^.*/||' |
# Maintain average
awk '{s += length($1); n++} END {
if (n>0)
print s / n;
else
print 0; }'

xargs -0 /bin/cat <"$file1" >"$file2"

sed 's/#/@/g;s/\\[\\"'\'']/@/g;s/"[^"]*"/""/g;'"s/'[^']*'/''/g" <"$file2" |
cpp -P >"$file3"

# Structure definitions
echo -n 'NSTRUCT: '

egrep -c 'struct[ ]*{|struct[ ]*[a-zA-Z_][a-zA-Z0-9_]*[ ]*{' <"$file3"
#}} (match preceding openings)

# Type definitions
echo -n 'NTYPEDEF: '
grep -cw typedef <"$file3"

# Use of void
echo -n 'NVOID: '
grep -cw void <"$file3"

# Use of gets
echo -n 'NGETS: '
grep -cw gets <"$file3"

# Average identifier length
echo -n 'IDLEN: '

tr -cs 'A-Za-z0-9_' '\n' <"$file3" |
sort -u |
awk '/^[A-Za-z]/ { len += length($1); n++ } END {
if (n>0)
print len / n;
else
print 0; }'

echo -n 'CHLINESCHAR: '
wc -lc <"$file2" |
awk '{OFS=":"; print $1, $2}'

echo -n 'NCCHAR: '
sed 's/#/@/g' <"$file2" |
cpp -traditional -P |
wc -c |
awk '{OFMT = "%.0f"; print $1/1000}'

# Number of comments
echo -n 'NCOMMENT: '
egrep -c '/\*|//' <"$file2"

# Occurences of the word Copyright
echo -n 'NCOPYRIGHT: '
grep -ci copyright <"$file2"

# C files
find "$@" -name \*.c -type f -print0 >"$file2"

# Convert to newline separation for counting
tr \\0 \\n <"$file2" >"$file3"

# Number of C files
echo -n 'NCFILE: '
wc -l <"$file3"

# Number of directories containing C files
echo -n 'NCDIR: '
sed 's,/[^/]*$,,;s,^.*/,,' <"$file3" |
sort -u |
wc -l

# C code
xargs -0 /bin/cat <"$file2" >"$file3"

# Lines and characters
echo -n 'CLINESCHAR: '
wc -lc <"$file3" |
awk '{OFS=":"; print $1, $2}'

# C code without comments and strings
sed 's/#/@/g;s/\\[\\"'\'']/@/g;s/"[^"]*"/""/g;'"s/'[^']*'/''/g" <"$file3" |
cpp -P >"$file4"

# Number of functions
echo -n 'NFUNCTION: '
grep -c '^{' <"$file4"

# Number of gotos
echo -n 'NGOTO: '
grep -cw goto <"$file4"

# Occurrences of the register keyword
echo -n 'NREGISTER: '
grep -cw register <"$file4"

# Number of macro definitions
echo -n 'NMACRO: '
grep -c '@[ ]*define[ ][ ]*[a-zA-Z_][a-zA-Z0-9_]*(' <"$file4"
# Number of include directives
echo -n 'NINCLUDE: '
grep -c '@[ ]*include' <"$file4"

# Number of constants
echo -n 'NCONST: '
grep -ohw '[0-9][x0-9][0-9a-f]*' <"$file4" | wc -l


# Header files
echo -n 'NHFILE: '
find "$@" -name \*.h -type f |
wc -l
35 changes: 35 additions & 0 deletions evaluation/benchmarks/dgsh/sequential/4.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash

## Initialize the necessary temporary files
file1=$(mktemp)
file2=$(mktemp)
file3=$(mktemp)

# Create list of files
find "$@" -type f |

# Produce lines of the form
# MD5(filename)= 811bfd4b5974f39e986ddc037e1899e7
xargs openssl md5 |

# Convert each line into a "filename md5sum" pair
sed 's/^MD5(//;s/)= / /' |

# Sort by MD5 sum
sort -k2 > "$file1"

# Print an MD5 sum for each file that appears more than once
awk '{print $2}' < "$file1" | uniq -d > "$file2"


# Join the repeated MD5 sums with the corresponding file names
# Join expects two inputs, second will come from scatter
# XXX make streaming input identifiers transparent to users
join -2 2 "$file2" "$file1" |

# Output same files on a single line
awk '
BEGIN {ORS=""}
$1 != prev && prev {print "\n"}
END {if (prev) print "\n"}
{if (prev) print " "; prev = $1; print $2}'
Loading