Commit f172969b authored by Wohlgemuth, Jason's avatar Wohlgemuth, Jason
Browse files

feat: Initial unproven enhancement

parent 8996762a
Loading
Loading
Loading
Loading
+314 −0
Original line number Diff line number Diff line
//! Error-related types and functions for analyzer module
use crate::analyzer::readability::ReadabilityType;
use crate::analyzer::vale::ValeOutputItem;
use ariadne::{Color, Config, Report, ReportKind, Source};
use core::fmt;
use core::ops::RangeInclusive;
use validator::{ValidationError, ValidationErrorsKind};

/// Error kind
#[derive(Clone, Debug)]
pub enum ErrorKind {
    /// Readability issue where calculated index exceeds threshold of associated metric
    Readability((f64, ReadabilityType)),
    /// Prose issue found by Vale
    Vale(Vec<ValeOutputItem>),
    /// Schema validation issue found by [validator crate]
    ///
    /// [validator crate]: https://crates.io/crates/validator
    Validator(ValidationErrorsKind),
}
impl fmt::Display for ErrorKind {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            | ErrorKind::Readability((index, rtype)) => write!(f, "{rtype}: {index}"),
            | ErrorKind::Vale(items) => {
                for item in items {
                    writeln!(f, "[{}] {}", item.check, item.message)?;
                }
                Ok(())
            }
            | ErrorKind::Validator(kind) => {
                let reports = build_validation_reports(kind, "");
                for (source_text, report) in &reports {
                    let source = Source::from(source_text.as_str());
                    let mut buf = Vec::new();
                    let _ = report.write(source, &mut buf);
                    let output = String::from_utf8_lossy(&buf);
                    write!(f, "{output}")?;
                }
                Ok(())
            }
        }
    }
}
impl ErrorKind {
    /// Render schema validation errors using ariadne with source file context
    ///
    /// Shows the actual line from the JSON/YAML file where each issue occurs,
    /// with one line of context before and after. All errors are consolidated
    /// into a single unified output for better readability.
    pub fn print_with_source(&self, field: &str, source_content: &str, file_path: &str) {
        if let ErrorKind::Validator(kind) = self {
            let errors = collect_leaf_errors(kind, field);
            if errors.is_empty() {
                return;
            }
            let source = Source::from(source_content);
            let mut all_labels = Vec::new();
            // Collect all error information and try to locate them in source
            for (index, (path, err)) in errors.iter().enumerate() {
                let message = match &err.message {
                    | Some(msg) => msg.to_string(),
                    | None => format_error_description(err),
                };
                let span = find_field_in_source(source_content, path).unwrap_or(1..=1);
                let label = ariadne::Label::new((file_path, span))
                    .with_message(format!("[{}] {} — {}", index, message, path))
                    .with_color(Color::Red);
                all_labels.push(label);
            }
            // Build a single consolidated report with all errors
            if !all_labels.is_empty() {
                let mut builder = Report::build(ReportKind::Error, (file_path, 1..=1))
                    .with_config(Config::default().with_compact(true))
                    .with_message(format!(
                        "Schema validation failed ({} issue{})",
                        errors.len(),
                        if errors.len() == 1 { "" } else { "s" }
                    ));

                for label in all_labels {
                    builder = builder.with_label(label);
                }
                let _ = builder.finish().print((file_path, source.clone()));
            }
        }
    }
}
/// Build ariadne [`Report`] values from a [`ValidationErrorsKind`]
fn build_validation_reports<'a>(kind: &ValidationErrorsKind, prefix: &str) -> Vec<(String, Report<'a, RangeInclusive<usize>>)> {
    match kind {
        | ValidationErrorsKind::Field(errors) => errors.iter().map(|err| build_field_report(err, prefix)).collect(),
        | ValidationErrorsKind::Struct(nested) => nested
            .errors()
            .iter()
            .flat_map(|(field, kind)| {
                let path = if prefix.is_empty() {
                    field.to_string()
                } else {
                    format!("{prefix}.{field}")
                };
                build_validation_reports(kind, &path)
            })
            .collect(),
        | ValidationErrorsKind::List(map) => map
            .iter()
            .flat_map(|(index, nested)| {
                let path = format!("{prefix}[{index}]");
                nested
                    .errors()
                    .iter()
                    .flat_map(|(field, kind)| {
                        let inner = format!("{path}.{field}");
                        build_validation_reports(kind, &inner)
                    })
                    .collect::<Vec<_>>()
            })
            .collect(),
    }
}
/// Collect all leaf [`ValidationError`] values with their full dotted field paths
pub(crate) fn collect_leaf_errors<'a>(kind: &'a ValidationErrorsKind, prefix: &str) -> Vec<(String, &'a ValidationError)> {
    match kind {
        | ValidationErrorsKind::Field(errors) => errors.iter().map(|err| (prefix.to_string(), err)).collect(),
        | ValidationErrorsKind::Struct(nested) => nested
            .errors()
            .iter()
            .flat_map(|(field, kind)| {
                let path = if prefix.is_empty() {
                    field.to_string()
                } else {
                    format!("{prefix}.{field}")
                };
                collect_leaf_errors(kind, &path)
            })
            .collect(),
        | ValidationErrorsKind::List(map) => map
            .iter()
            .flat_map(|(index, nested)| {
                let path = format!("{prefix}[{index}]");
                nested
                    .errors()
                    .iter()
                    .flat_map(|(field, kind)| collect_leaf_errors(kind, &format!("{path}.{field}")))
                    .collect::<Vec<_>>()
            })
            .collect(),
    }
}
/// Find the byte offset range of a field name in JSON/YAML source content
///
/// Searches sequentially through dotted path segments to locate nested fields,
/// properly handling array indices and serde field name aliases.
fn find_field_in_source(source: &str, field_path: &str) -> Option<RangeInclusive<usize>> {
    let segments: Vec<&str> = field_path.split('.').collect();
    let mut search_from = 0;
    let mut result = None;
    for segment in &segments {
        // Parse field name and optional array index from segment like "websites[1]"
        let (field_name, array_idx) = if let Some(bracket_pos) = segment.find('[') {
            let name = &segment[..bracket_pos];
            let idx = segment[bracket_pos + 1..segment.len() - 1].parse::<usize>().ok();
            (name, idx)
        } else {
            (*segment, None)
        };
        // Try finding the field using the exact name, camelCase, and known aliases
        let mut found = false;
        for candidate in field_name_candidates(field_name) {
            let json_key = format!("\"{candidate}\"");
            let haystack = &source[search_from..];
            if let Some(pos) = haystack.find(&json_key) {
                let offset = search_from + pos;
                search_from = offset + json_key.len();
                result = Some(offset..=(offset + json_key.len() - 1));
                found = true;
                break;
            }
        }
        if !found {
            return result;
        }
        // Navigate into the correct array element when an index is specified
        if let Some(idx) = array_idx {
            if let Some(bracket_start) = source[search_from..].find('[') {
                let array_start = search_from + bracket_start + 1;
                if idx > 0 {
                    let mut current_idx = 0;
                    let mut depth: usize = 0;
                    for (i, byte) in source.as_bytes()[array_start..].iter().enumerate() {
                        match byte {
                            | b'{' | b'[' => depth += 1,
                            | b'}' | b']' => {
                                if depth == 0 {
                                    break;
                                }
                                depth -= 1;
                            }
                            | b',' if depth == 0 => {
                                current_idx += 1;
                                if current_idx == idx {
                                    search_from = array_start + i + 1;
                                    break;
                                }
                            }
                            | _ => {}
                        }
                    }
                } else {
                    search_from = array_start;
                }
            }
        }
    }
    result
}
/// Generate candidate JSON key names for a Rust struct field name
///
/// Handles camelCase conversion and known serde aliases.
fn field_name_candidates(field_name: &str) -> Vec<String> {
    let mut candidates = vec![field_name.to_string()];
    // Try camelCase for snake_case field names
    if field_name.contains('_') {
        let mut camel = String::new();
        let mut capitalize_next = false;
        for ch in field_name.chars() {
            if ch == '_' {
                capitalize_next = true;
            } else if capitalize_next {
                camel.push(ch.to_ascii_uppercase());
                capitalize_next = false;
            } else {
                camel.push(ch);
            }
        }
        candidates.push(camel);
    }
    // Known serde aliases from the schema
    match field_name {
        | "telephone" => candidates.push("phone".to_string()),
        | "media" => candidates.push("graphics".to_string()),
        | "content_url" => {
            candidates.push("url".to_string());
            candidates.push("href".to_string());
        }
        | "job_title" => candidates.push("title".to_string()),
        | "given_name" => candidates.push("first".to_string()),
        | "family_name" => candidates.push("last".to_string()),
        | _ => {}
    }
    candidates
}

/// Build an ariadne [`Report`] from a single [`ValidationError`]
fn build_field_report<'a>(err: &ValidationError, field: &str) -> (String, Report<'a, RangeInclusive<usize>>) {
    let message = match &err.message {
        | Some(msg) => msg.to_string(),
        | None => format_error_description(err),
    };
    let source_text = format!("{field}: {message}");
    let span = 0..=field.len().saturating_sub(1);
    let report = Report::build(ReportKind::Error, span.clone())
        .with_config(Config::default().with_compact(true))
        .with_message(format!("Schema validation failed for `{field}`"))
        .with_label(ariadne::Label::new(span).with_message(&message).with_color(Color::Red))
        .with_note(format!("validation code: {}", err.code))
        .finish();
    (source_text, report)
}
#[cfg(test)]
pub(crate) fn hightlighted_span(source: Source, span: &[u32], line_number: u32) -> RangeInclusive<usize> {
    let selected = (line_number as usize) - 1;
    let begin = source.lines().take(selected).fold((span[0] - 1) as usize, |acc, line| acc + line.len());
    let character_count = (span[1] as usize) - (span[0] as usize);
    let end = begin + character_count;
    begin..=end
}
/// Format a human-readable description from a [`ValidationError`] code and params
fn format_error_description(err: &ValidationError) -> String {
    let code = err.code.as_ref();
    let params = &err.params;
    match code {
        | "length" => {
            let min = params.get("min").map(|v| v.to_string());
            let max = params.get("max").map(|v| v.to_string());
            match (min, max) {
                | (Some(min), Some(max)) => format!("length must be between {min} and {max}"),
                | (Some(min), None) => format!("length must be at least {min}"),
                | (None, Some(max)) => format!("length must be at most {max}"),
                | (None, None) => "invalid length".to_string(),
            }
        }
        | "range" => {
            let min = params.get("min").map(|v| v.to_string());
            let max = params.get("max").map(|v| v.to_string());
            match (min, max) {
                | (Some(min), Some(max)) => format!("value must be between {min} and {max}"),
                | (Some(min), None) => format!("value must be at least {min}"),
                | (None, Some(max)) => format!("value must be at most {max}"),
                | (None, None) => "value out of range".to_string(),
            }
        }
        | "email" => "invalid email address".to_string(),
        | "url" => "invalid URL".to_string(),
        | "required" => "field is required".to_string(),
        | "regex" => "value does not match expected pattern".to_string(),
        | "custom" => params
            .get("message")
            .and_then(|v| v.as_str())
            .unwrap_or("custom validation failed")
            .to_string(),
        | _ => format!("validation failed ({code})"),
    }
}
+61 −58
Original line number Diff line number Diff line
@@ -2,13 +2,15 @@
//!
//! This is where we keep functions and interfaces necessary to execute ACORN's automated editorial style guide as well as content readability analyzer.
//!
use crate::analyzer::errors::{collect_leaf_errors, ErrorKind};
use crate::analyzer::vale::{ValeOutput, ValeOutputItem};
use crate::io::read_file;
use crate::io::{command_exists, download_binary, extract_zip, file_checksum, make_executable, standard_project_folder};
#[cfg(feature = "std")]
use crate::io::{get, InputOutput};
use crate::prelude::{self, create_dir_all, remove_file, Command, CommandOutput, Error, File, HashMap, PathBuf, Write};
use crate::schema::research_activity::ResearchActivity;
use crate::schema::{ImageObject, MediaObject, Organization, ProgrammingLanguage, VideoObject, Website};
use crate::schema::{MediaObject, Organization, ProgrammingLanguage, Website};
use crate::util::constants::{
    APPLICATION, CUSTOM_VALE_PACKAGE_NAME, DEFAULT_VALE_PACKAGE_URL, DEFAULT_VALE_ROOT, DISABLED_VALE_RULES, ENABLED_VALE_PACKAGES, ORGANIZATION,
    VALE_RELEASES_URL, VALE_VERSION,
@@ -126,18 +128,6 @@ pub enum CheckCategory {
    #[display("schema")]
    Schema,
}
/// Error kind
#[derive(Clone, Debug)]
pub enum ErrorKind {
    /// Readability issue where calculated index exceeds threshold of associated metric
    Readability((f64, ReadabilityType)),
    /// Prose issue found by Vale
    Vale(Vec<ValeOutputItem>),
    /// Schema validation issue found by [validator crate]
    ///
    /// [validator crate]: https://crates.io/crates/validator
    Validator(ValidationErrorsKind),
}
/// Data structure for holding the result of a schema validation check
#[derive(Builder, Clone, Debug, Display)]
#[builder(start_fn = init, on(String, into))]
@@ -286,11 +276,7 @@ impl Check {
            | CheckCategory::Schema => {
                if let Some(kind) = &self.errors {
                    match kind {
                        | ErrorKind::Validator(values) => match values {
                            | ValidationErrorsKind::Field(_) => 1,
                            | ValidationErrorsKind::Struct(values) => values.clone().into_errors().len(),
                            | ValidationErrorsKind::List(_) => 0,
                        },
                        | ErrorKind::Validator(values) => collect_leaf_errors(values, "").len(),
                        | _ => 0,
                    }
                } else {
@@ -407,14 +393,14 @@ impl Check {
                    info!("=> {} {} has {}", Label::pass(), path, "no schema validation issues".green().bold());
                } else {
                    let count = self.issue_count();
                    error!(
                        "=> {} Found {} schema validation issue{} in {}: \n{:#?}",
                        Label::fail(),
                        count.red(),
                        suffix(count),
                        path.italic().underline(),
                        self.errors.unwrap()
                    );
                    error!("=> {} Found {} schema validation issue{}", Label::fail(), count.red(), suffix(count));
                    // Use the errors module for detailed output if available
                    if let Some(errors) = &self.errors {
                        match &self.context {
                            | Some(content) => errors.print_with_source(&self.message, content, &path),
                            | None => eprintln!("{}", errors),
                        }
                    }
                }
            }
        }
@@ -871,12 +857,24 @@ impl Validation for ResearchActivity {
        paths
            .par_iter()
            .map(|path| match ResearchActivity::read(path) {
                | Ok(data) => data
                    .clone()
                | Ok(data) => {
                    let source = read_file(path.clone()).ok();
                    data.clone()
                        .validation_issues()
                        .into_iter()
                    .map(|issue| issue.with_uri(path.display().to_string()))
                    .collect(),
                        .map(|issue| {
                            Check::init()
                                .category(issue.category)
                                .success(issue.success)
                                .uri(path.display().to_string())
                                .message(issue.message)
                                .maybe_context(source.clone())
                                .maybe_errors(issue.errors)
                                .maybe_status_code(issue.status_code)
                                .build()
                        })
                        .collect()
                }
                | Err(why) => {
                    error!("=> {} Read research activity data at {} - {why}", Label::fail(), path.display());
                    vec![Check::init().category(CheckCategory::Schema).success(false).build()]
@@ -886,33 +884,37 @@ impl Validation for ResearchActivity {
            .collect()
    }
    fn validation_issues(self) -> Vec<Check> {
        fn errors_collect<T: Validate>(attribute: T) -> Option<Vec<Check>> {
            match attribute.validate() {
                | Ok(_) => None,
                | Err(err) => Some(
                    err.into_errors()
        let root_result = self.clone().validate();

        let media_children = match self.meta.media {
            | Some(values) => values
                .into_iter()
                        .map(|(key, value)| {
                            Check::init()
                .map(|media| {
                    let media_result = match media {
                        | MediaObject::Image(x) => x.validate(),
                        | MediaObject::Video(x) => x.validate(),
                    };
                    media_result.map_err(|err| {
                        let mut wrapped = validator::ValidationErrors::new();
                        wrapped.merge_self("media", Err(err));
                        wrapped
                    })
                })
                .collect::<Vec<_>>(),
            | None => vec![],
        };

        let merged = validator::ValidationErrors::merge_all(root_result, "media", media_children);

        match merged {
            | Ok(_) => vec![],
            | Err(err) => vec![Check::init()
                .category(CheckCategory::Schema)
                .success(false)
                                .errors(ErrorKind::Validator(value))
                                .message(key.to_string())
                                .build()
                        })
                        .collect::<Vec<Check>>(),
                ),
            }
        }
        let mut found = vec![errors_collect::<ResearchActivity>(self.clone())];
        match self.meta.media {
            | Some(values) => values.iter().for_each(|media| match media {
                | MediaObject::Image(x) => found.push(errors_collect::<ImageObject>(x.clone())),
                | MediaObject::Video(x) => found.push(errors_collect::<VideoObject>(x.clone())),
            }),
            | None => {}
                .errors(ErrorKind::Validator(ValidationErrorsKind::Struct(Box::new(err))))
                .message("".to_string())
                .build()],
        }
        found.into_iter().flatten().flatten().collect::<Vec<_>>()
    }
}
fn hightlighted_span(source: Source, span: &[u32], line_number: u32) -> RangeInclusive<usize> {
@@ -1063,5 +1065,6 @@ where
    }
}

mod errors;
#[cfg(test)]
mod tests;
+116 −12

File changed.

Preview size limit exceeded, changes collapsed.

+38 −11

File changed.

Preview size limit exceeded, changes collapsed.

+7 −7

File changed.

Contains only whitespace changes.

+1 −1

File changed.

Contains only whitespace changes.

+1 −1

File changed.

Contains only whitespace changes.

Loading