Skip to content

Commit 1cfa68d

Browse files
committed
Add support for building GNU archives (issue #10)
1 parent 0362a68 commit 1cfa68d

File tree

1 file changed

+254
-19
lines changed

1 file changed

+254
-19
lines changed

src/lib.rs

Lines changed: 254 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@
2424
//! stores filenames in a slightly different, incompatible way, and has its
2525
//! own strategy for supporting long filenames.
2626
//!
27-
//! Currently, this crate supports reading all three of these variants, but
28-
//! only supports writing the BSD/common variant.
27+
//! This crate supports reading and writing all three of these variants.
2928
//!
3029
//! # Example usage
3130
//!
@@ -71,6 +70,7 @@ extern crate byteorder;
7170

7271
use byteorder::{BigEndian, LittleEndian, ReadBytesExt};
7372
use std::cmp;
73+
use std::collections::{HashMap, HashSet};
7474
use std::ffi::OsStr;
7575
use std::fs::{File, Metadata};
7676
use std::io::{self, BufRead, BufReader, Error, ErrorKind, Read, Result, Seek,
@@ -97,7 +97,7 @@ const ENTRY_HEADER_LEN: usize = 60;
9797
const BSD_SYMBOL_LOOKUP_TABLE_ID: &[u8] = b"__.SYMDEF";
9898
const BSD_SORTED_SYMBOL_LOOKUP_TABLE_ID: &[u8] = b"__.SYMDEF SORTED";
9999

100-
const GNU_NAME_TABLE_ID: &[u8] = b"//";
100+
const GNU_NAME_TABLE_ID: &str = "//";
101101
const GNU_SYMBOL_LOOKUP_TABLE_ID: &[u8] = b"/";
102102

103103
// ========================================================================= //
@@ -130,12 +130,12 @@ impl Header {
130130
/// other fields set to zero.
131131
pub fn new(identifier: Vec<u8>, size: u64) -> Header {
132132
Header {
133-
identifier: identifier,
133+
identifier,
134134
mtime: 0,
135135
uid: 0,
136136
gid: 0,
137137
mode: 0,
138-
size: size,
138+
size,
139139
}
140140
}
141141

@@ -144,7 +144,7 @@ impl Header {
144144
#[cfg(unix)]
145145
pub fn from_metadata(identifier: Vec<u8>, meta: &Metadata) -> Header {
146146
Header {
147-
identifier: identifier,
147+
identifier,
148148
mtime: meta.mtime() as u64,
149149
uid: meta.uid(),
150150
gid: meta.gid(),
@@ -230,7 +230,7 @@ impl Header {
230230
if identifier == GNU_SYMBOL_LOOKUP_TABLE_ID {
231231
io::copy(&mut reader.by_ref().take(size), &mut io::sink())?;
232232
return Ok(Some((Header::new(identifier, size), header_len)));
233-
} else if identifier == GNU_NAME_TABLE_ID {
233+
} else if identifier == GNU_NAME_TABLE_ID.as_bytes() {
234234
*name_table = vec![0; size as usize];
235235
reader.read_exact(name_table as &mut [u8]).map_err(|err| {
236236
annotate(err, "failed to read name table")
@@ -308,12 +308,12 @@ impl Header {
308308
}
309309
Ok(Some((
310310
Header {
311-
identifier: identifier,
312-
mtime: mtime,
313-
uid: uid,
314-
gid: gid,
315-
mode: mode,
316-
size: size,
311+
identifier,
312+
mtime,
313+
uid,
314+
gid,
315+
mode,
316+
size,
317317
},
318318
header_len,
319319
)))
@@ -350,6 +350,31 @@ impl Header {
350350
}
351351
Ok(())
352352
}
353+
354+
fn write_gnu<W>(&self, writer: &mut W, names: &HashMap<Vec<u8>, usize>)
355+
-> Result<()>
356+
where
357+
W: Write,
358+
{
359+
if self.identifier.len() > 15 {
360+
let offset = names[&self.identifier];
361+
write!(writer, "/{:<15}", offset)?;
362+
} else {
363+
writer.write_all(&self.identifier)?;
364+
writer.write_all(b"/")?;
365+
writer.write_all(&vec![b' '; 15 - self.identifier.len()])?;
366+
}
367+
write!(
368+
writer,
369+
"{:<12}{:<6}{:<6}{:<8o}{:<10}`\n",
370+
self.mtime,
371+
self.uid,
372+
self.gid,
373+
self.mode,
374+
self.size
375+
)?;
376+
Ok(())
377+
}
353378
}
354379

355380
fn parse_number(field_name: &str, bytes: &[u8], radix: u32) -> Result<u64> {
@@ -419,7 +444,7 @@ impl<R: Read> Archive<R> {
419444
/// source of all data read.
420445
pub fn new(reader: R) -> Archive<R> {
421446
Archive {
422-
reader: reader,
447+
reader,
423448
variant: Variant::Common,
424449
name_table: Vec::new(),
425450
entry_headers: Vec::new(),
@@ -446,7 +471,8 @@ impl<R: Read> Archive<R> {
446471
pub fn into_inner(self) -> Result<R> { Ok(self.reader) }
447472

448473
fn is_name_table_id(&self, identifier: &[u8]) -> bool {
449-
self.variant == Variant::GNU && identifier == GNU_NAME_TABLE_ID
474+
self.variant == Variant::GNU &&
475+
identifier == GNU_NAME_TABLE_ID.as_bytes()
450476
}
451477

452478
fn is_symbol_lookup_table_id(&self, identifier: &[u8]) -> bool {
@@ -650,7 +676,7 @@ impl<R: Read + Seek> Archive<R> {
650676
}
651677
self.next_entry_index = index + 1;
652678
Ok(Entry {
653-
header: header,
679+
header,
654680
reader: self.reader.by_ref(),
655681
length: size,
656682
position: 0,
@@ -851,7 +877,8 @@ impl<'a, R: Read> ExactSizeIterator for Symbols<'a, R> {}
851877

852878
// ========================================================================= //
853879

854-
/// A structure for building archives.
880+
/// A structure for building Common or BSD-variant archives (the archive format
881+
/// typically used on e.g. BSD and Mac OS X systems).
855882
///
856883
/// This structure has methods for building up an archive from scratch into any
857884
/// arbitrary writer.
@@ -865,7 +892,7 @@ impl<W: Write> Builder<W> {
865892
/// destination of all data written.
866893
pub fn new(writer: W) -> Builder<W> {
867894
Builder {
868-
writer: writer,
895+
writer,
869896
started: false,
870897
}
871898
}
@@ -921,6 +948,135 @@ impl<W: Write> Builder<W> {
921948
}
922949
}
923950

951+
// ========================================================================= //
952+
953+
/// A structure for building GNU-variant archives (the archive format typically
954+
/// used on e.g. GNU/Linux and Windows systems).
955+
///
956+
/// This structure has methods for building up an archive from scratch into any
957+
/// arbitrary writer.
958+
pub struct GnuBuilder<W: Write> {
959+
writer: W,
960+
short_names: HashSet<Vec<u8>>,
961+
long_names: HashMap<Vec<u8>, usize>,
962+
name_table_size: usize,
963+
started: bool,
964+
}
965+
966+
impl<W: Write> GnuBuilder<W> {
967+
/// Create a new archive builder with the underlying writer object as the
968+
/// destination of all data written. The `identifiers` parameter must give
969+
/// the complete list of entry identifiers that will be included in this
970+
/// archive.
971+
pub fn new(writer: W, identifiers: Vec<Vec<u8>>) -> GnuBuilder<W> {
972+
let mut short_names = HashSet::<Vec<u8>>::new();
973+
let mut long_names = HashMap::<Vec<u8>, usize>::new();
974+
let mut name_table_size: usize = 0;
975+
for identifier in identifiers.into_iter() {
976+
let length = identifier.len();
977+
if length > 15 {
978+
long_names.insert(identifier, name_table_size);
979+
name_table_size += length + 2;
980+
} else {
981+
short_names.insert(identifier);
982+
}
983+
}
984+
GnuBuilder {
985+
writer,
986+
short_names,
987+
long_names,
988+
name_table_size,
989+
started: false,
990+
}
991+
}
992+
993+
/// Unwrap this archive builder, returning the underlying writer object.
994+
pub fn into_inner(self) -> Result<W> { Ok(self.writer) }
995+
996+
/// Adds a new entry to this archive.
997+
pub fn append<R: Read>(&mut self, header: &Header, mut data: R)
998+
-> Result<()> {
999+
let is_long_name = header.identifier().len() > 15;
1000+
let has_name = if is_long_name {
1001+
self.long_names.contains_key(header.identifier())
1002+
} else {
1003+
self.short_names.contains(header.identifier())
1004+
};
1005+
if !has_name {
1006+
let msg = format!(
1007+
"Identifier {:?} was not in the list of \
1008+
identifiers passed to GnuBuilder::new()",
1009+
String::from_utf8_lossy(header.identifier())
1010+
);
1011+
return Err(Error::new(ErrorKind::InvalidInput, msg));
1012+
}
1013+
1014+
if !self.started {
1015+
self.writer.write_all(GLOBAL_HEADER)?;
1016+
if !self.long_names.is_empty() {
1017+
write!(
1018+
self.writer,
1019+
"{:<48}{:<10}`\n",
1020+
GNU_NAME_TABLE_ID,
1021+
self.name_table_size
1022+
)?;
1023+
let mut entries: Vec<(usize, &[u8])> = self.long_names
1024+
.iter()
1025+
.map(|(id, &start)| (start, id.as_slice()))
1026+
.collect();
1027+
entries.sort();
1028+
for (_, id) in entries {
1029+
self.writer.write_all(id)?;
1030+
self.writer.write_all(b"/\n")?;
1031+
}
1032+
}
1033+
self.started = true;
1034+
}
1035+
1036+
header.write_gnu(&mut self.writer, &self.long_names)?;
1037+
let actual_size = io::copy(&mut data, &mut self.writer)?;
1038+
if actual_size != header.size() {
1039+
let msg = format!(
1040+
"Wrong file size (header.size() = {}, actual \
1041+
size was {})",
1042+
header.size(),
1043+
actual_size
1044+
);
1045+
return Err(Error::new(ErrorKind::InvalidData, msg));
1046+
}
1047+
if actual_size % 2 != 0 {
1048+
self.writer.write_all(&['\n' as u8])?;
1049+
}
1050+
1051+
Ok(())
1052+
}
1053+
1054+
/// Adds a file on the local filesystem to this archive, using the file
1055+
/// name as its identifier.
1056+
pub fn append_path<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
1057+
let name: &OsStr = path.as_ref().file_name().ok_or_else(|| {
1058+
let msg = "Given path doesn't have a file name";
1059+
Error::new(ErrorKind::InvalidInput, msg)
1060+
})?;
1061+
let identifier = osstr_to_bytes(name)?;
1062+
let mut file = File::open(&path)?;
1063+
self.append_file_id(identifier, &mut file)
1064+
}
1065+
1066+
/// Adds a file to this archive, with the given name as its identifier.
1067+
pub fn append_file(&mut self, name: &[u8], file: &mut File) -> Result<()> {
1068+
self.append_file_id(name.to_vec(), file)
1069+
}
1070+
1071+
fn append_file_id(&mut self, id: Vec<u8>, file: &mut File) -> Result<()> {
1072+
let metadata = file.metadata()?;
1073+
let header = Header::from_metadata(id, &metadata);
1074+
self.append(&header, file)
1075+
}
1076+
}
1077+
1078+
// ========================================================================= //
1079+
9241080
#[cfg(unix)]
9251081
fn osstr_to_bytes(string: &OsStr) -> Result<Vec<u8>> {
9261082
Ok(string.as_bytes().to_vec())
@@ -960,7 +1116,7 @@ fn annotate(error: io::Error, msg: &str) -> io::Error {
9601116

9611117
#[cfg(test)]
9621118
mod tests {
963-
use super::{Archive, Builder, Header, Variant};
1119+
use super::{Archive, Builder, GnuBuilder, Header, Variant};
9641120
use std::io::{Cursor, Read, Result, Seek, SeekFrom};
9651121
use std::str;
9661122

@@ -1040,6 +1196,85 @@ mod tests {
10401196
assert_eq!(str::from_utf8(&actual).unwrap(), expected);
10411197
}
10421198

1199+
#[test]
1200+
fn build_gnu_archive() {
1201+
let names = vec![b"baz.txt".to_vec(), b"foo.txt".to_vec()];
1202+
let mut builder = GnuBuilder::new(Vec::new(), names);
1203+
let mut header1 = Header::new(b"foo.txt".to_vec(), 7);
1204+
header1.set_mtime(1487552916);
1205+
header1.set_uid(501);
1206+
header1.set_gid(20);
1207+
header1.set_mode(0o100644);
1208+
builder.append(&header1, "foobar\n".as_bytes()).unwrap();
1209+
let header2 = Header::new(b"baz.txt".to_vec(), 4);
1210+
builder.append(&header2, "baz\n".as_bytes()).unwrap();
1211+
let actual = builder.into_inner().unwrap();
1212+
let expected = "\
1213+
!<arch>\n\
1214+
foo.txt/ 1487552916 501 20 100644 7 `\n\
1215+
foobar\n\n\
1216+
baz.txt/ 0 0 0 0 4 `\n\
1217+
baz\n";
1218+
assert_eq!(str::from_utf8(&actual).unwrap(), expected);
1219+
}
1220+
1221+
#[test]
1222+
fn build_gnu_archive_with_long_filenames() {
1223+
let names = vec![
1224+
b"this_is_a_very_long_filename.txt".to_vec(),
1225+
b"and_this_is_another_very_long_filename.txt".to_vec(),
1226+
];
1227+
let mut builder = GnuBuilder::new(Vec::new(), names);
1228+
let mut header1 = Header::new(b"short".to_vec(), 1);
1229+
header1.set_identifier(b"this_is_a_very_long_filename.txt".to_vec());
1230+
header1.set_mtime(1487552916);
1231+
header1.set_uid(501);
1232+
header1.set_gid(20);
1233+
header1.set_mode(0o100644);
1234+
header1.set_size(7);
1235+
builder.append(&header1, "foobar\n".as_bytes()).unwrap();
1236+
let header2 = Header::new(
1237+
b"and_this_is_another_very_long_filename.txt".to_vec(),
1238+
4,
1239+
);
1240+
builder.append(&header2, "baz\n".as_bytes()).unwrap();
1241+
let actual = builder.into_inner().unwrap();
1242+
let expected = "\
1243+
!<arch>\n\
1244+
// 78 `\n\
1245+
this_is_a_very_long_filename.txt/\n\
1246+
and_this_is_another_very_long_filename.txt/\n\
1247+
/0 1487552916 501 20 100644 7 `\n\
1248+
foobar\n\n\
1249+
/34 0 0 0 0 4 `\n\
1250+
baz\n";
1251+
assert_eq!(str::from_utf8(&actual).unwrap(), expected);
1252+
}
1253+
1254+
#[test]
1255+
fn build_gnu_archive_with_space_in_filename() {
1256+
let names = vec![b"foo bar".to_vec()];
1257+
let mut builder = GnuBuilder::new(Vec::new(), names);
1258+
let header = Header::new(b"foo bar".to_vec(), 4);
1259+
builder.append(&header, "baz\n".as_bytes()).unwrap();
1260+
let actual = builder.into_inner().unwrap();
1261+
let expected = "\
1262+
!<arch>\n\
1263+
foo bar/ 0 0 0 0 4 `\n\
1264+
baz\n";
1265+
assert_eq!(str::from_utf8(&actual).unwrap(), expected);
1266+
}
1267+
1268+
#[test]
1269+
#[should_panic(expected = "Identifier \\\"bar\\\" was not in the list of \
1270+
identifiers passed to GnuBuilder::new()")]
1271+
fn build_gnu_archive_with_unexpected_identifier() {
1272+
let names = vec![b"foo".to_vec()];
1273+
let mut builder = GnuBuilder::new(Vec::new(), names);
1274+
let header = Header::new(b"bar".to_vec(), 4);
1275+
builder.append(&header, "baz\n".as_bytes()).unwrap();
1276+
}
1277+
10431278
#[test]
10441279
fn read_common_archive() {
10451280
let input = "\

0 commit comments

Comments
 (0)