Skip to content

Commit

Permalink
artifact stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
grahamstark committed Nov 28, 2024
1 parent d242ad2 commit 54b919b
Showing 1 changed file with 50 additions and 0 deletions.
50 changes: 50 additions & 0 deletions scripts/parse-scottish-census.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
using CSV, DataFrames

function readc(filename::String)::Tuple
d = (CSV.File( filename; normalizenames=true, header=10, skipto=12)|>DataFrame)
if ismissing(d[1,2])
delete!( )
end
label = names(d)[1]
actuald = d[1:33,2:end]
nms = names(actuald)
rename!(actuald,1=>"Authority")
actuald, label, nms
end

function read_all()
fs = sort(readdir("."))
n = 0
allfs = nothing
rows = 0
cols = 0
nfs = length(fs)
labels = DataFrame( filename=fill("",nfs), label=fill("",nfs), start=zeros(Int,nfs) )
for f in fs
if ! isnothing(match(r"^table.*.csv$",f))
n += 1
println( "on $f")
data, label, nms = readc(f)
println(nms)
println(label)
println(data)
labels.filename[n] = f
labels.label[n]=label
labels.start[n]=cols+2
if n == 1
allfs = deepcopy( data )
else
n1 = String.(data[:,1])[1:8] # skip "Na hEileanan Siar", since it's sometimes edited
n2 = String.(allfs[:,1])[1:8]
@assert n1 == n2 "$(n1) !== $(n2)" # check in sync
allfs = hcat( allfs, data; makeunique=true )
rows,cols = size(allfs)
end
# println( "label=$label")
end
end
allfs,labels[1:n,:]
end

CSV.write( "labels.tab", labels; delim='\t')
CSV.write( "allfs.tab", allfs; delim='\t' )

0 comments on commit 54b919b

Please sign in to comment.