Skip to content

Commit a522e0a

Browse files
author
robin
committed
first init
0 parents  commit a522e0a

File tree

6 files changed

+155
-0
lines changed

6 files changed

+155
-0
lines changed

.gitignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
target/
2+
.DS_Store
3+
.idea
4+
*.iml
5+
*.log
6+
*.bak
7+
*.lock
8+

Cargo.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
[package]
2+
name = "fred"
3+
version = "0.1.0"
4+
authors = ["robin <[email protected]>"]
5+
6+
[dependencies]
7+
zip = { version = "0.2", default-features = false }
8+
quick-xml = "0.9.4"

data/filosofi-logo.docx

27.3 KB
Binary file not shown.

data/sample.docx

93.5 KB
Binary file not shown.

examples/readdocx.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
2+
3+
extern crate fred;
4+
5+
use fred::Docx;
6+
use std::io::Read;
7+
8+
fn main(){
9+
let mut file = Docx::open("data/sample.docx").unwrap();
10+
let mut isi = String::new();
11+
let _ = file.read_to_string(&mut isi);
12+
println!("ISI:");
13+
println!("----------BEGIN----------");
14+
println!("{}", isi);
15+
println!("----------EOF----------");
16+
}

src/lib.rs

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
2+
#![allow(unused_imports, dead_code, unused_must_use)]
3+
4+
extern crate zip;
5+
extern crate quick_xml as xml;
6+
7+
use zip::ZipArchive;
8+
9+
use xml::reader::Reader;
10+
use xml::events::Event;
11+
12+
use std::path::{Path, PathBuf};
13+
use std::fs::File;
14+
use std::io::prelude::*;
15+
use std::io;
16+
use std::clone::Clone;
17+
use zip::read::ZipFile;
18+
19+
pub struct Docx {
20+
path: PathBuf,
21+
data: String,
22+
offset: usize
23+
}
24+
25+
impl Docx {
26+
pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Docx> {
27+
let file = File::open(path.as_ref())?;
28+
let mut archive = ZipArchive::new(file)?;
29+
30+
let mut xml_data = String::new();
31+
32+
// let doc_xml_file:File;
33+
34+
for i in 0..archive.len(){
35+
let mut c_file = archive.by_index(i).unwrap();
36+
if c_file.name() == "word/document.xml" {
37+
c_file.read_to_string(&mut xml_data);
38+
break
39+
}
40+
}
41+
42+
let mut xml_reader = Reader::from_str(xml_data.as_ref());
43+
44+
let mut buf = Vec::new();
45+
let mut txt = Vec::new();
46+
47+
if xml_data.len() > 0 {
48+
let mut to_read = false;
49+
loop {
50+
match xml_reader.read_event(&mut buf){
51+
Ok(Event::Start(ref e)) => {
52+
match e.name() {
53+
b"w:p" => {
54+
to_read = true;
55+
txt.push("\n\n".to_string());
56+
},
57+
b"w:t" => to_read = true,
58+
_ => (),
59+
}
60+
},
61+
Ok(Event::Text(e)) => {
62+
if to_read {
63+
txt.push(e.unescape_and_decode(&xml_reader).unwrap());
64+
to_read = false;
65+
}
66+
},
67+
Ok(Event::Eof) => break, // exits the loop when reaching end of file
68+
Err(e) => panic!("Error at position {}: {:?}", xml_reader.buffer_position(), e),
69+
_ => (),
70+
}
71+
}
72+
}
73+
74+
Ok(
75+
Docx {
76+
path: path.as_ref().to_path_buf(),
77+
data: txt.join(""),
78+
offset: 0
79+
}
80+
)
81+
}
82+
83+
}
84+
85+
impl Read for Docx {
86+
fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
87+
let bytes = self.data.as_bytes();
88+
let limit = if bytes.len() < self.offset + 10 {
89+
bytes.len()
90+
}else{
91+
self.offset + 10
92+
};
93+
94+
if self.offset > limit {
95+
Ok(0)
96+
}else{
97+
let rv = buf.write(&bytes[self.offset..limit])?;
98+
self.offset = self.offset + 10;
99+
Ok(rv)
100+
}
101+
}
102+
}
103+
104+
105+
#[cfg(test)]
106+
mod tests {
107+
use std::path::{Path, PathBuf};
108+
use super::*;
109+
110+
#[test]
111+
fn instantiate(){
112+
let _ = Docx::open(Path::new("./tmp"));
113+
}
114+
115+
#[test]
116+
fn read_docx(){
117+
let mut f = Docx::open(Path::new("data/filosofi-logo.docx")).unwrap();
118+
119+
let mut data = String::new();
120+
let len = f.read_to_string(&mut data).unwrap();
121+
println!("len: {}, data: {}", len, data);
122+
}
123+
}

0 commit comments

Comments
 (0)