Commit dfbf694b authored by Wohlgemuth, Jason's avatar Wohlgemuth, Jason
Browse files

wip: Add filter option

parent 067281ae
Loading
Loading
Loading
Loading
Loading
+75 −6
Original line number Diff line number Diff line
use acorn::io::{files_all, files_from_git_branch, files_from_git_commit, files_from_gitlab_merge_request, filter_ignored};
use acorn::prelude::PathBuf;
use acorn::util::constants::{ENV_CACHE_TTL, ENV_DATABASE_BACKEND, ENV_DATABASE_PATH, ENV_NO_LOCAL_DATABASE};
use acorn::util::{regex_inverse, regex_join};
use bon::Builder;
use fancy_regex::Regex;
use clap::builder::{
    styling::{Ansi256Color, AnsiColor},
    Styles,
};
use clap::{ArgAction, Parser, Subcommand, ValueHint};
use clap_verbosity_flag::Verbosity;
use tracing::error;

pub mod arguments;
use arguments::{CheckCategoryArgument, DatabaseBackend, Diagnostic, FileFormat, ReadabilityTypeArgument, Standard};
@@ -29,6 +32,8 @@ pub struct CommandOptions {
    pub branch: Option<String>,
    /// Git commit hash
    pub commit: Option<String>,
    /// Regex pattern of files to include at a given path desginated by `path`
    pub filter: Option<String>,
    /// Regex pattern of files to ignore at a given path desginated by `path`
    pub ignore: Option<String>,
    /// Path to file or folder to be used for output
@@ -59,34 +64,73 @@ pub struct CommandOptions {
/// If the options specify a commit, the files changed in the commit are returned.
/// If the options specify a branch, the files changed in the branch are returned.
/// If none of the above options are set, the files in the given path are returned.
/// If the options include a filter regex, only matching files are returned.
/// If the options include an ignore regex, it is applied to the files returned.
pub async fn resolve_paths(path: &Option<PathBuf>, options: &CommandOptions) -> Vec<PathBuf> {
    let extensions = Some(vec!["JSON", "YAML"]);
    let CommandOptions {
        branch,
        commit,
        filter,
        ignore,
        merge_request,
        ..
    } = options;
    let files = if *merge_request {
        files_from_gitlab_merge_request(extensions).await
    let (files, local_base) = if *merge_request {
        (files_from_gitlab_merge_request(extensions).await, None)
    } else {
        match commit {
            | Some(hash) => files_from_git_commit(hash, extensions),
            | Some(hash) => (files_from_git_commit(hash, extensions), None),
            | None => match branch {
                | Some(name) => files_from_git_branch(name, extensions),
                | Some(name) => (files_from_git_branch(name, extensions), None),
                | None => {
                    let value = match path {
                        | Some(x) => x.clone(),
                        | None => PathBuf::from("."),
                    };
                    files_all(value, extensions)
                    (files_all(value.clone(), extensions), Some(value))
                }
            },
        }
    };
    filter_ignored(files, ignore.clone())
    let patterns = vec![ignore.clone(), filter.clone().map(regex_inverse)]
        .into_iter()
        .flatten()
        .collect::<Vec<_>>();
    match regex_join(&patterns) {
        | Some(pattern) => match local_base {
            | Some(root) => filter_by_pattern(files, pattern, root),
            | None => filter_ignored(files, Some(pattern)),
        },
        | None => files,
    }
}
fn filter_by_pattern(paths: Vec<PathBuf>, pattern: String, root: PathBuf) -> Vec<PathBuf> {
    let root = if root.is_file() {
        root.parent().map(|value| value.to_path_buf()).unwrap_or(root)
    } else {
        root
    };
    let normalized_root = root.canonicalize().unwrap_or(root);
    match Regex::new(&pattern) {
        | Ok(re) => paths
            .into_iter()
            .filter(|path| {
                let value = path
                    .canonicalize()
                    .unwrap_or_else(|_| path.clone())
                    .strip_prefix(&normalized_root)
                    .map(|relative| relative.to_string_lossy().to_string())
                    .unwrap_or_else(|_| path.to_string_lossy().to_string())
                    .replace('\\', "/");
                !re.is_match(&value).unwrap_or(false)
            })
            .collect(),
        | Err(why) => {
            error!("=> Filter ignored - {why}");
            vec![]
        }
    }
}
/// "Plant an ACORN and grow your science"
///
@@ -236,6 +280,11 @@ pub enum Commands {
        /// Exit on first error
        #[arg(short, long = "exit-on-first-error", value_name = "BOOL", help_heading = "FLAGS")]
        exit_on_first_error: bool,
        /// Regular expression pattern(s) applied to absolute paths of files to include during checks
        ///
        /// Only files matching at least one pattern will be processed
        #[arg(long, value_name = "LIST", value_delimiter = ',', help_heading = "OPTIONS")]
        filter: Vec<String>,
        /// Regular expression pattern(s) applied to absolute paths of files to exclude from checking
        ///
        /// Only applies to `--path` values that point to a directory
@@ -332,6 +381,11 @@ pub enum Commands {
        /// Path to configuration file (alternative to URL argument)
        #[arg(short, long, value_name = "PATH", value_hint = ValueHint::FilePath, conflicts_with = "url", help_heading = "OPTIONS")]
        config: Option<PathBuf>,
        /// Regular expression pattern(s) used to include files while downloading
        ///
        /// Only files matching at least one pattern will be processed
        #[arg(long, value_name = "LIST", value_delimiter = ',', help_heading = "OPTIONS")]
        filter: Vec<String>,
        /// Regular expression pattern(s) to ignore while downloading
        ///
        /// Values augment built-in ignores and can be provided as a comma-separated list
@@ -379,6 +433,11 @@ pub enum Commands {
        /// Export target file format
        #[arg(default_value_t, short, long, value_name = "FORMAT", help_heading = "OPTIONS")]
        format: FileFormat,
        /// Regular expression pattern(s) applied to absolute paths of files to include during export
        ///
        /// Only files matching at least one pattern will be processed
        #[arg(long, value_name = "LIST", value_delimiter = ',', help_heading = "OPTIONS")]
        filter: Vec<String>,
        /// Regular expression pattern(s) applied to absolute paths of files to exclude from export process
        ///
        /// Only applies to `--path` values that point to a directory
@@ -435,6 +494,11 @@ pub enum Commands {
        /// Run format without making changes to target file(s). Will print a diff of changes.
        #[arg(short, long = "dry-run", value_name = "BOOL", help_heading = "FLAGS")]
        dry_run: bool,
        /// Regex pattern applied to absolute paths of files to include in formatting process
        ///
        /// Only files matching the pattern will be processed
        #[arg(long, value_name = "REGEX", help_heading = "OPTIONS")]
        filter: Option<String>,
        /// Regex pattern applied to absolute paths of files that determines whether they should be included in formatting process
        ///
        /// Only applies to path values that point to a directory
@@ -463,6 +527,11 @@ pub enum Commands {
        /// Path to look for files to process
        #[arg(default_value = "./", required = false, value_name = "PATH", value_hint = ValueHint::AnyPath, help_heading = "OPTIONS")]
        path: Option<PathBuf>,
        /// Regex pattern applied to absolute paths of files to include in processing
        ///
        /// Only files matching the pattern will be processed
        #[arg(long, value_name = "REGEX", help_heading = "OPTIONS")]
        filter: Option<String>,
        /// Regex pattern applied to absolute paths of files that determines whether they should be included in processing
        ///
        /// Only applies to path values that point to a directory
+87 −1
Original line number Diff line number Diff line
use crate::cli::Arguments;
use crate::cli::{filter_by_pattern, resolve_paths, Arguments, CommandOptions};
use acorn::prelude::PathBuf;
use clap::{CommandFactory, Parser};
use futures::executor::block_on;

fn has_suffix(path: &PathBuf, suffix: &str) -> bool {
    path.to_string_lossy().replace('\\', "/").ends_with(suffix)
}

fn fixture_content_root() -> PathBuf {
    PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../tests/fixtures/filter")
}

#[test]
fn test_cli() {
@@ -8,4 +18,80 @@ fn test_cli() {
    assert!(Arguments::try_parse_from(["acorn", "check", "/path/to/file.json"]).is_ok());
    assert!(Arguments::try_parse_from(["acorn", "check", "/path/to/file.json", "--all"]).is_ok());
    assert!(Arguments::try_parse_from(["acorn", "check", "/path/to", "--ignore", "[/]valid.json$,[/]draft.json$"]).is_ok());
    assert!(Arguments::try_parse_from(["acorn", "check", "/path/to", "--filter", "[/]valid.json$,[/]draft.json$"]).is_ok());
    assert!(Arguments::try_parse_from(["acorn", "check", "/path/to", "--ignore", "[/]draft.json$", "--filter", "[/]valid.json$"]).is_ok());
    assert!(Arguments::try_parse_from(["acorn", "download", "--filter", "\\.json$"]).is_ok());
    assert!(Arguments::try_parse_from(["acorn", "download", "--ignore", "\\.png$", "--filter", "\\.json$"]).is_ok());
    assert!(Arguments::try_parse_from(["acorn", "export", "./", "--format", "pdf", "--filter", "json"]).is_ok());
    assert!(Arguments::try_parse_from(["acorn", "export", "./", "--format", "pdf", "--ignore", "png", "--filter", "json"]).is_ok());
    assert!(Arguments::try_parse_from(["acorn", "format", "./", "--filter", "json"]).is_ok());
    assert!(Arguments::try_parse_from(["acorn", "format", "./", "--ignore", "png", "--filter", "json"]).is_ok());
    assert!(Arguments::try_parse_from(["acorn", "gather", "./", "--filter", "json"]).is_ok());
    assert!(Arguments::try_parse_from(["acorn", "gather", "./", "--ignore", "png", "--filter", "json"]).is_ok());
}
#[test]
fn test_filter_paths_by_pattern_keeps_only_matching_relative_paths() {
    let root = fixture_content_root();
    let paths = vec![
        root.join("acorn/index.json"),
        root.join("sansr/index.yaml"),
        root.join("other/index.json"),
    ];
    let pattern = "^(?!.*(?:(?:acorn)|(?:sansr))).*$".to_string();
    let filtered = filter_by_pattern(paths, pattern, root.clone());
    assert_eq!(
        filtered,
        vec![
            root.join("acorn/index.json"),
            root.join("sansr/index.yaml"),
        ]
    );
}
#[test]
fn test_filter_paths_by_pattern_applies_ignore_pattern_to_relative_paths() {
    let root = fixture_content_root();
    let paths = vec![
        root.join("acorn/index.json"),
        root.join("sansr/index.yaml"),
        root.join("other/index.json"),
    ];
    let pattern = "(?:acorn)".to_string();
    let filtered = filter_by_pattern(paths, pattern, root.clone());
    assert_eq!(
        filtered,
        vec![
            root.join("sansr/index.yaml"),
            root.join("other/index.json"),
        ]
    );
}
#[test]
fn test_filter_paths_by_pattern_returns_empty_for_invalid_regex() {
    let root = fixture_content_root();
    let paths = vec![root.join("acorn/index.json")];
    let filtered = filter_by_pattern(paths, "[".to_string(), root);
    assert!(filtered.is_empty());
}
#[test]
fn test_resolve_paths_applies_filter_to_relative_local_paths() {
    let root = fixture_content_root();
    let options = CommandOptions::init()
        .maybe_filter(Some("(?:acorn)|(?:sansr)".to_string()))
        .build();
    let resolved = block_on(resolve_paths(&Some(root), &options));
    assert_eq!(resolved.len(), 2);
    assert!(resolved.iter().any(|path| has_suffix(path, "acorn/index.json")));
    assert!(resolved.iter().any(|path| has_suffix(path, "sansr/index.yaml")));
    assert!(!resolved.iter().any(|path| has_suffix(path, "other/index.json")));
}
#[test]
fn test_resolve_paths_applies_ignore_to_relative_local_paths() {
    let root = fixture_content_root();
    let options = CommandOptions::init().maybe_ignore(Some("(?:acorn)".to_string())).build();
    let resolved = block_on(resolve_paths(&Some(root), &options));
    assert_eq!(resolved.len(), 2);
    assert!(resolved.iter().any(|path| has_suffix(path, "sansr/index.yaml")));
    assert!(resolved.iter().any(|path| has_suffix(path, "other/index.json")));
    assert!(!resolved.iter().any(|path| has_suffix(path, "acorn/index.json")));
    assert!(!resolved.iter().any(|path| has_suffix(path, "other/notes.txt")));
}
+6 −4
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@ pub async fn run(
    path: &Option<PathBuf>,
    branch: &Option<String>,
    commit: &Option<String>,
    filter: &[String],
    ignore: &[String],
    skip: &[CheckCategoryArgument],
    disable_website_checks: &bool,
@@ -45,6 +46,7 @@ pub async fn run(
    let command_options = CommandOptions::init()
        .maybe_branch(branch.clone())
        .maybe_commit(commit.clone())
        .maybe_filter(regex_join(filter))
        .maybe_ignore(regex_join(ignore))
        .merge_request(*merge_request)
        .offline(offline)
@@ -121,7 +123,7 @@ fn handle(issues: &[Check], paths: &[PathBuf], options: &CheckOptions) -> Result
    };
    if !quiet {
        render(issues, terse);
        if !(no_fail || terse) {
        if !(no_fail || terse) && has_failures(issues) {
            print_summary();
        }
    }
@@ -145,19 +147,19 @@ fn handle(issues: &[Check], paths: &[PathBuf], options: &CheckOptions) -> Result
fn failure_count(issues: &[Check]) -> usize {
    issues
        .iter()
        .filter(|Check { severity, .. }| severity.is_failure())
        .filter(|issue| issue.is_failure())
        .map(Check::issue_count)
        .sum::<usize>()
}
fn filter_by_visibility(issues: &[Check], all: bool) -> Vec<Check> {
    issues
        .iter()
        .filter(|issue| all || issue.severity.is_failure())
        .filter(|issue| all || issue.is_failure())
        .cloned()
        .collect::<Vec<Check>>()
}
fn has_failures(issues: &[Check]) -> bool {
    issues.iter().any(|Check { severity, .. }| severity.is_failure())
    issues.iter().any(Check::is_failure)
}
fn infer_standard(paths: &[PathBuf]) -> Standard {
    let extensions = unique_file_extensions(paths);
+6 −0
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@ async fn test_check_valid_project() -> Result<()> {
        &None,
        &[],
        &[],
        &[],
        &true,
        &false,
        &false,
@@ -51,6 +52,7 @@ async fn test_check_valid_highlight() -> Result<()> {
        &None,
        &[],
        &[],
        &[],
        &true,
        &false,
        &false,
@@ -77,6 +79,7 @@ async fn test_check_invalid_project_with_no_fail() -> Result<()> {
        &None,
        &[],
        &[],
        &[],
        &true,
        &false,
        &false,
@@ -102,6 +105,7 @@ async fn test_check_with_skip_categories() -> Result<()> {
        &None,
        &None,
        &[],
        &[],
        &[CheckCategoryArgument::Prose, CheckCategoryArgument::Schema],
        &true,
        &false,
@@ -131,6 +135,7 @@ async fn test_check_with_different_readability_metrics() -> Result<()> {
            &None,
            &[],
            &[],
            &[],
            &true,
            &false,
            &false,
@@ -158,6 +163,7 @@ async fn test_check_terse_mode() -> Result<()> {
        &None,
        &[],
        &[],
        &[],
        &true,
        &false,
        &false,
+7 −1
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@ const DEFAULT_CONFIG_FILENAMES: [&str; 3] = [".acorn.json", ".acorn.yaml", ".aco
pub async fn run(
    config: &Option<PathBuf>,
    url: &Option<String>,
    filter: &[String],
    ignore: &[String],
    output: &Option<PathBuf>,
    database_path: &Option<PathBuf>,
@@ -33,7 +34,12 @@ pub async fn run(
        .supported(true)
        .build());
    let database = Database::<Table>::from_path(database_path.clone());
    let options = BucketOptions::init().threads(threads).quiet(quiet).ignore(ignore.to_vec()).build();
    let options = BucketOptions::init()
        .filter(filter.to_vec())
        .ignore(ignore.to_vec())
        .quiet(quiet)
        .threads(threads)
        .build();
    let config = config.clone().or_else(|| resolve_default_config_path(&PathBuf::from(".")));
    if let Some(path) = &config {
        match ApplicationConfiguration::read(path.clone()) {
Loading