Merge pull request #36 from epage/bin

feat: Ignore binary files
This commit is contained in:
Ed Page 2019-07-13 22:56:02 -06:00 committed by GitHub
commit 807a4a8a2f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 56 additions and 58 deletions

80
Cargo.lock generated
View file

@ -71,8 +71,18 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bytecount"
version = "0.3.2"
name = "bstr"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-automata 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "byteorder"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
@ -153,22 +163,6 @@ name = "either"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "encoding_rs"
version = "0.8.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "encoding_rs_io"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "failure"
version = "0.1.5"
@ -237,28 +231,6 @@ dependencies = [
"walkdir 2.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "grep-matcher"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "grep-searcher"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bytecount 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs_io 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"grep-matcher 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "heck"
version = "0.3.1"
@ -333,15 +305,6 @@ dependencies = [
"libc 0.2.47 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "memmap"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.47 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "normalize-line-endings"
version = "0.2.2"
@ -568,6 +531,14 @@ dependencies = [
"utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex-automata"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex-syntax"
version = "0.6.4"
@ -766,10 +737,10 @@ name = "typos"
version = "0.1.0"
dependencies = [
"assert_fs 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
"bstr 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
"csv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
"failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"grep-searcher 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"ignore 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -885,7 +856,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum backtrace 0.3.13 (registry+https://github.com/rust-lang/crates.io-index)" = "b5b493b66e03090ebc4343eb02f94ff944e0cbc9ac6571491d170ba026741eb5"
"checksum backtrace-sys 0.1.28 (registry+https://github.com/rust-lang/crates.io-index)" = "797c830ac25ccc92a7f8a7b9862bde440715531514594a6154e3d4a54dd769b6"
"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
"checksum bytecount 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f861d9ce359f56dbcb6e0c2a1cb84e52ad732cadb57b806adeb3c7668caccbd8"
"checksum bstr 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6cc0572e02f76cb335f309b19e0a0d585b4f62788f7d26de2a13a836a637385f"
"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
"checksum cc 1.0.28 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4a8b715cb4597106ea87c7c84b2f1d452c7492033765df7f32651e66fcf749"
"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
"checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e"
@ -896,8 +868,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum csv-core 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa5cdef62f37e6ffe7d1f07a381bc0db32b7a3ff1cac0de56cb0d81e71f53d65"
"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
"checksum either 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3be565ca5c557d7f59e7cfcf1844f9e3033650c929c6566f511e8005f205c1d0"
"checksum encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)" = "a69d152eaa438a291636c1971b0a370212165ca8a75759eb66818c5ce9b538f7"
"checksum encoding_rs_io 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "098f6a0ab73a9ba256b71344dc82c6d7e252736ad9db7f4e35345f3a1f8713f5"
"checksum failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "795bd83d3abeb9220f257e597aa0080a508b27533824adf336529648f6abf7e2"
"checksum failure_derive 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ea1063915fd7ef4309e222a5a07cf9c319fb9c7836b1f89b85458672dbb127e1"
"checksum float-cmp 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "134a8fa843d80a51a5b77d36d42bc2def9edcb0262c914861d08129fd1926600"
@ -906,8 +876,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
"checksum globset 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4743617a7464bbda3c8aec8558ff2f9429047e025771037df561d383337ff865"
"checksum globwalk 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "89fa2e29859da05acd066bd45996f05c271b271d7ec4a781f909682328f65d25"
"checksum grep-matcher 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "820946e0c314510779a8d86c5cd03240e0ae0993dabcdb98733a8f6a9001b607"
"checksum grep-searcher 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6eb23805170ff0e96894a24847019500de11e9baaabe3dafed75b35a897636e1"
"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
"checksum ignore 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ad03ca67dc12474ecd91fdb94d758cbd20cb4e7a78ebe831df26a9b7511e1162"
"checksum itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5b8467d9c1cebe26feb08c640139247fac215782d35371ade9a2136ed6085358"
@ -917,7 +885,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c"
"checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6"
"checksum memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e1dd4eaac298c32ce07eb6ed9242eda7d82955b9170b7d6db59b2e02cc63fcb8"
"checksum memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e2ffa2c986de11a9df78620c01eeaaf27d94d3ff02bf81bfcca953102dd0c6ff"
"checksum normalize-line-endings 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "2e0a1a39eab95caf4f5556da9289b9e68f0aafac901b2ce80daaf020d3b733a8"
"checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1"
"checksum owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "49a4b8ea2179e6a2e27411d3bca09ca6dd630821cf6894c6c7c8467a8ee7ef13"
@ -944,6 +911,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum redox_syscall 0.1.51 (registry+https://github.com/rust-lang/crates.io-index)" = "423e376fffca3dfa06c9e9790a9ccd282fafb3cc6e6397d01dbf64f9bacc6b85"
"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76"
"checksum regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f"
"checksum regex-automata 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "3ed09217220c272b29ef237a974ad58515bde75f194e3ffa7e6d0bf0f3b01f86"
"checksum regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1"
"checksum remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5"
"checksum rustc-demangle 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "adacaae16d02b6ec37fdc7acfcddf365978de76d1983d3ee22afc260e1ca9619"

View file

@ -27,12 +27,12 @@ ignore = "0.4"
phf = { version = "0.7", features = ["unicase"] }
regex = "1.0"
lazy_static = "1.2.0"
grep-searcher = "0.1"
serde = "1.0"
serde_derive = "1.0"
serde_json = "1.0"
itertools = "0.8"
unicase = "1.1"
bstr = "0.2"
[dev-dependencies]
assert_fs = "0.10"

View file

@ -18,6 +18,7 @@ fn process_empty(b: &mut test::Bencher) {
sample_path.path(),
&corrections,
true,
false,
typos::report::print_silent,
)
});
@ -37,6 +38,7 @@ fn process_no_tokens(b: &mut test::Bencher) {
sample_path.path(),
&corrections,
true,
false,
typos::report::print_silent,
)
});
@ -56,6 +58,7 @@ fn process_single_token(b: &mut test::Bencher) {
sample_path.path(),
&corrections,
true,
false,
typos::report::print_silent,
)
});
@ -75,6 +78,7 @@ fn process_sherlock(b: &mut test::Bencher) {
sample_path.path(),
&corrections,
true,
false,
typos::report::print_silent,
)
});
@ -94,6 +98,7 @@ fn process_code(b: &mut test::Bencher) {
sample_path.path(),
&corrections,
true,
false,
typos::report::print_silent,
)
});
@ -113,6 +118,7 @@ fn process_corpus(b: &mut test::Bencher) {
sample_path.path(),
&corrections,
true,
false,
typos::report::print_silent,
)
});

View file

@ -12,15 +12,22 @@ pub use crate::dict::*;
use std::fs::File;
use std::io::Read;
use bstr::ByteSlice;
pub fn process_file(
path: &std::path::Path,
dictionary: &Dictionary,
ignore_hex: bool,
binary: bool,
report: report::Report,
) -> Result<(), failure::Error> {
let mut buffer = Vec::new();
File::open(path)?.read_to_end(&mut buffer)?;
for (line_idx, line) in grep_searcher::LineIter::new(b'\n', &buffer).enumerate() {
if !binary && buffer.find_byte(b'\0').is_some() {
return Ok(());
}
for (line_idx, line) in buffer.lines().enumerate() {
let line_num = line_idx + 1;
for ident in tokens::Identifier::parse(line) {
if !ignore_hex && is_hex(ident.token()) {

View file

@ -55,6 +55,12 @@ struct Options {
/// The approximate number of threads to use.
threads: usize,
#[structopt(long, raw(overrides_with = r#""no-binary""#))]
/// Search binary files.
binary: bool,
#[structopt(long, raw(overrides_with = r#""binary""#), raw(hidden = "true"))]
no_binary: bool,
#[structopt(long, raw(overrides_with = r#""no-hidden""#))]
/// Search hidden files and directories.
hidden: bool,
@ -118,6 +124,15 @@ impl Options {
}
}
pub fn binary(&self) -> Option<bool> {
match (self.binary, self.no_binary) {
(true, false) => Some(true),
(false, true) => Some(false),
(false, false) => None,
(_, _) => unreachable!("StructOpt should make this impossible"),
}
}
pub fn ignore_hidden(&self) -> Option<bool> {
match (self.hidden, self.no_hidden) {
(true, false) => Some(false),
@ -183,6 +198,7 @@ fn run() -> Result<(), failure::Error> {
let dictionary = typos::Dictionary::new();
let ignore_hex = options.ignore_hex().unwrap_or(true);
let binary = options.binary().unwrap_or(false);
let first_path = &options
.path
@ -207,6 +223,7 @@ fn run() -> Result<(), failure::Error> {
entry.path(),
&dictionary,
ignore_hex,
binary,
options.format.report(),
)?;
}