Commit d1fdd0c6 authored by Wohlgemuth, Jason's avatar Wohlgemuth, Jason
Browse files

feat: Initial creation of get_singular_form; Initial structure for get_syllable_count

parent cd28ce04
Loading
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
use acorn_lib::doctor::{MemoryInformation, NetworkInformation, SystemInformation, SystemSoftwareInformation, TableFormatPrint};
use acorn_lib::doctor::{MemoryInformation, NetworkInformation, SystemInformation, TableFormatPrint};
use acorn_lib::util::cli::Diagnostic;
use color_eyre::eyre::{Report, Result};
use tracing::warn;
@@ -15,7 +15,7 @@ pub fn run(fix: &bool, interactive: &bool, check: &[Diagnostic]) -> Result<(), R
        unimplemented!("Interactive mode is not implemented yet");
    }
    if should_run(check, Diagnostic::Software) {
        SystemSoftwareInformation::init().print();
        // SystemSoftwareInformation::init().print();
    }
    if should_run(check, Diagnostic::System) {
        SystemInformation::init().print();
+268 −1
Original line number Diff line number Diff line
use fancy_regex::Regex;
use lazy_static::lazy_static;
use std::collections::HashMap;

// Base URL for deploying ORNL data
pub const BASE_URL: &str = "https://research.ornl.gov";
@@ -44,13 +45,279 @@ pub const MAX_LENGTH_SUBTITLE: u64 = 100;
/// Maximum number of characters for a single technical approach description
pub const MAX_LENGTH_TECHNICAL: usize = 100;
/// Maximum number of characters for a title
/// TODO: This should be 35
pub const MAX_LENGTH_TITLE: u64 = 50;
/// ### Nouns with the same singular and plural forms
pub const SAME_SINGULAR_PLURAL: [&str; 110] = [
    "accommodation",
    "advice",
    "alms",
    "aircraft",
    "aluminum",
    "barracks",
    "bison",
    "binoculars",
    "bourgeois",
    "breadfruit",
    "buffalo",
    "cannon",
    "caribou",
    "chalk",
    "chassis",
    "chinos",
    "clippers",
    "clothing",
    "cod",
    "concrete",
    "corps",
    "correspondence",
    "crossroads",
    "data",
    "deer",
    "doldrums",
    "dungarees",
    "education",
    "eggfruit",
    "elk",
    "equipment",
    "eyeglasses",
    "fish",
    "flares",
    "flour",
    "food",
    "fruit",
    "furniture",
    "gallows",
    "goldfish",
    "grapefruit",
    "greenfly",
    "grouse",
    "haddock",
    "halibut",
    "head",
    "headquarters",
    "help",
    "homework",
    "hovercraft",
    "ides",
    "information",
    "insignia",
    "jackfruit",
    "jeans",
    "knickers",
    "knowledge",
    "kudos",
    "leggings",
    "lego",
    "luggage",
    "mathematics",
    "money",
    "moose",
    "monkfish",
    "mullet",
    "nailclippers",
    "news",
    "nitrogen",
    "offspring",
    "oxygen",
    "pants",
    "pyjamas",
    "passionfruit",
    "pike",
    "pliers",
    "police",
    "premises",
    "reindeer",
    "rendezvous",
    "rice",
    "salmon",
    "scissors",
    "series",
    "shambles",
    "sheep",
    "shellfish",
    "shorts",
    "shrimp",
    "smithereens",
    "spacecraft",
    "species",
    "squid",
    "staff",
    "starfruit",
    "statistics",
    "stone",
    "sugar",
    "swine",
    "tights",
    "tongs",
    "traffic",
    "trousers",
    "trout",
    "tuna",
    "tweezers",
    "wheat",
    "whitebait",
    "wood",
    "you",
];

lazy_static! {
    // Regular expressions
    /// Apostrophe
    pub static ref APOSTROPHE: Regex = Regex::new(r#"['’]"#).unwrap();
    /// Non-alphabetic
    pub static ref NON_ALPHABETIC: Regex = Regex::new(r#"[^a-zA-Z]"#).unwrap();
    pub static ref RE_DOI: Regex = Regex::new(r#"^(doi\:)?10\.\d+/.*$"#).unwrap();
    pub static ref RE_ROR: Regex = Regex::new(r#"^0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$"#).unwrap();
    pub static ref RE_IMAGE_EXTENSION: Regex = Regex::new(r#".*[.](png|PNG|jpg|JPG|jpeg|JPEG|svg|SVG)$"#).unwrap();
    pub static ref RE_IP6: Regex = Regex::new(r#"(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))"#).unwrap();
    pub static ref RE_PHONE: Regex = Regex::new(r#"^(\+\d{1,2}\s?)?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}$"#).unwrap();
    /// ###  Match single syllable pre- and suffixes
    pub static ref SINGLE: Regex = Regex::new(r#"^(?:un|fore|ware|none?|out|post|sub|pre|pro|dis|side|some)|(?:ly|less|some|ful|ers?|ness|cians?|ments?|ettes?|villes?|ships?|sides?|ports?|shires?|[gnst]ion(?:ed|s)?)$"#).unwrap();
    /// ### Match double syllable pre- and suffixes
    pub static ref DOUBLE: Regex = Regex::new(r#"^(?:above|anti|ante|counter|hyper|afore|agri|infra|intra|inter|over|semi|ultra|under|extra|dia|micro|mega|kilo|pico|nano|macro|somer)|(?:fully|berry|woman|women|edly|union|((?:[bcdfghjklmnpqrstvwxz])|[aeiou])ye?ing)$"#).unwrap();
    /// ### Match triple syllabble suffixes
    pub static ref TRIPLE: Regex = Regex::new(r#"(creations?|ology|ologist|onomy|onomist)$"#).unwrap();
    /// ### Match syllables counted as two, but should be one
    pub static ref SINGLE_SYLLABIC_ONE : Regex = Regex::new(r#"awe($|d|so)|cia(?:l|$)|tia|cius|cious|[^aeiou]giu|[aeiouy][^aeiouy]ion|iou|sia$|eous$|[oa]gue$|.[^aeiuoycgltdb]{2,}ed$|.ely$|^jua|uai|eau|^busi$|(?:[aeiouy](?:[bcfgklmnprsvwxyz]|ch|dg|g[hn]|lch|l[lv]|mm|nch|n[cgn]|r[bcnsv]|squ|s[chkls]|th)ed$)|(?:[aeiouy](?:[bdfklmnprstvy]|ch|g[hn]|lch|l[lv]|mm|nch|nn|r[nsv]|squ|s[cklst]|th)es$)"#).unwrap();
    /// ### Match two-syllable words counted as two, but should be one
    pub static ref SINGLE_SYLLABIC_TWO : Regex = Regex::new(r#"[aeiouy](?:[bcdfgklmnprstvyz]|ch|dg|g[hn]|l[lv]|mm|n[cgns]|r[cnsv]|squ|s[cklst]|th)e$"#).unwrap();
    /// ### Match syllables counted as one, but should be two
    pub static ref DOUBLE_SYLLABIC_ONE: Regex = Regex::new(r#"(?:([^aeiouy])\\1l|[^aeiouy]ie(?:r|s?t)|[aeiouym]bl|eo|ism|asm|thm|dnt|snt|uity|dea|gean|oa|ua|react?|orbed|shred|eings?|[aeiouy]sh?e[rs])$"#).unwrap();
    /// ### Match two-syllable words counted as one, but should be two
    pub static ref DOUBLE_SYLLABIC_TWO: Regex = Regex::new(r#"creat(?!u)|[^gq]ua[^auieo]|[aeiou]{3}|^(?:ia|mc|coa[dglx].)|^re(app|es|im|us)|(th|d)eist"#).unwrap();
    /// ### Match three-syllable words counted as one, but should be two
    pub static ref DOUBLE_SYLLABIC_THREE: Regex = Regex::new(r#"[^aeiou]y[ae]|[^l]lien|riet|dien|iu|io|ii|uen|[aeilotu]real|real[aeilotu]|iell|eo[^aeiou]|[aeiou]y[aeiou]"#).unwrap();
    /// ### Match four-syllable words counted as one, but should be two
    pub static ref DOUBLE_SYLLABIC_FOUR: Regex = Regex::new(r#"[^s]ia"#).unwrap();
    /// Nouns with irregular singular/plural forms
    pub static ref IRREGULAR_NOUNS: HashMap<&'static str, &'static str> = vec![
        ("child", "children"),
        ("cow", "cattle"),
        ("foot", "feet"),
        ("goose", "geese"),
        ("man", "men"),
        ("move", "moves"),
        ("person", "people"),
        ("radius", "radii"),
        ("sex", "sexes"),
        ("tooth", "teeth"),
        ("woman", "women"),
    ].into_iter().collect();
    /// Nouns with irregular plural/singular forms
    ///
    /// Inverted version of [IRREGULAR_NOUNS]
    pub static ref IRREGULAR_NOUNS_INVERTED: HashMap<&'static str, &'static str> = IRREGULAR_NOUNS.clone().into_iter().map(|(k, v)| (v, k)).collect();
    /// ### Nouns with problematic syllable counts
    pub static ref PROBLEMATIC_WORDS: HashMap<&'static str, usize> = vec![
        ("abalone", 4),
        ("abare", 3),
        ("abbruzzese", 4),
        ("abed", 2),
        ("aborigine", 5),
        ("abruzzese", 4),
        ("acreage", 3),
        ("adame", 3),
        ("adieu", 2),
        ("adobe", 3),
        ("anemone", 4),
        ("anyone", 3),
        ("apache", 3),
        ("aphrodite", 4),
        ("apostrophe", 4),
        ("ariadne", 4),
        ("cafe", 2),
        ("café", 2),
        ("calliope", 4),
        ("catastrophe", 4),
        ("chile", 2),
        ("chloe", 2),
        ("circe", 2),
        ("cliche", 2),
        ("cliché", 2),
        ("contrariety", 4),
        ("coyote", 3),
        ("daphne", 2),
        ("epitome", 4),
        ("eurydice", 4),
        ("euterpe", 3),
        ("every", 2),
        ("everywhere", 3),
        ("forever", 3),
        ("gethsemane", 4),
        ("guacamole", 4),
        ("hermione", 4),
        ("hyperbole", 4),
        ("jesse", 2),
        ("jukebox", 2),
        ("karate", 3),
        ("machete", 3),
        ("maybe", 2),
        ("naive", 2),
        ("newlywed", 3),
        ("ninety", 2),
        ("penelope", 4),
        ("people", 2),
        ("persephone", 4),
        ("phoebe", 2),
        ("pulse", 1),
        ("queue", 1),
        ("recipe", 3),
        ("reptilian", 4),
        ("resumé", 2),
        ("riverbed", 3),
        ("scotia", 3),
        ("sesame", 3),
        ("shoreline", 2),
        ("simile", 3),
        ("snuffleupagus", 5),
        ("sometimes", 2),
        ("syncope", 3),
        ("tamale", 3),
        ("waterbed", 3),
        ("wednesday", 2),
        ("viceroyship", 3),
        ("yosemite", 4),
        ("zoë", 2),
    ].into_iter().collect();
    /// ### Nouns that need to be fixed when counting syllables
    ///
    /// All counts are (correct - 1)
    pub static ref NEED_TO_BE_FIXED: HashMap<&'static str, usize> = vec![
        ("ayo", 2),
        ("australian", 3),
        ("dionysius", 5),
        ("disbursement", 3),
        ("discouragement", 4),
        ("disenfranchisement", 5),
        ("disengagement", 4),
        ("disgraceful", 3),
        ("diskette", 2),
        ("displacement", 3),
        ("distasteful", 3),
        ("distinctiveness", 4),
        ("distraction", 3),
        ("geoffrion", 4),
        ("mcquaid", 2),
        ("mcquaide", 2),
        ("mcquaig", 2),
        ("mcquain", 2),
        ("nonbusiness", 3),
        ("nonetheless", 3),
        ("nonmanagement", 4),
        ("outplacement", 3),
        ("outrageously", 4),
        ("postponement", 3),
        ("preemption", 3),
        ("preignition", 4),
        ("preinvasion", 4),
        ("preisler", 3),
        ("preoccupation", 5),
        ("prevette", 2),
        ("probusiness", 3),
        ("procurement", 3),
        ("pronouncement", 3),
        ("sidewater", 3),
        ("sidewinder", 3),
        ("ungerer", 3),
    ].into_iter().collect();
}
+69 −0
Original line number Diff line number Diff line
use crate::constants::{IRREGULAR_NOUNS, IRREGULAR_NOUNS_INVERTED, NON_ALPHABETIC, SAME_SINGULAR_PLURAL};
use console::Emoji;
use data_encoding::HEXUPPER;
use derive_more::Display;
use duct::cmd;
use fancy_regex::Regex;
use glob::glob;
use is_executable::IsExecutable;
use owo_colors::{OwoColorize, Style, Styled};
@@ -379,6 +381,73 @@ pub fn get_image_paths(root: PathBuf) -> Vec<PathBuf> {
pub fn get_parent(path: String) -> String {
    PathBuf::from(PathBuf::from(path).parent().unwrap()).display().to_string()
}
/// Get the singular form of a word
///
/// Adapted from the PHP library, [Text-Statistics](https://github.com/DaveChild/Text-Statistics)
pub fn get_singular_form(word: &str) -> String {
    const SINGULAR: [(&str, &str); 28] = [
        (r#"(quiz)zes$"#, r#"${1}"#),
        (r#"(matr)ices$"#, r#"${1}ix"#),
        (r#"(vert|ind)ices$"#, r#"${1}ex"#),
        (r#"^(ox)en$"#, r#"${1}"#),
        (r#"(alias)es$"#, r#"${1}"#),
        (r#"(octop|vir)i$"#, r#"${1}us"#),
        (r#"(cris|ax|test)es$"#, r#"${1}is"#),
        (r#"(shoe)s$"#, r#"${1}"#),
        (r#"(o)es$"#, r#"${1}"#),
        (r#"(bus)es$"#, r#"${1}"#),
        (r#"([m|l])ice$"#, r#"${1}ouse"#),
        (r#"(x|ch|ss|sh)es$"#, r#"${1}"#),
        (r#"(m)ovies$"#, r#"${1}ovie"#),
        (r#"(s)eries$"#, r#"${1}eries"#),
        (r#"([^aeiouy]|qu)ies$"#, r#"${1}y"#),
        (r#"([lr])ves$"#, r#"${1}f"#),
        (r#"(tive)s$"#, r#"${1}"#),
        (r#"(hive)s$"#, r#"${1}"#),
        (r#"(li|wi|kni)ves$"#, r#"${1}fe"#),
        (r#"(shea|loa|lea|thie)ves$"#, r#"${1}f"#),
        (r#"(^analy)ses$"#, r#"${1}sis"#),
        (r#"((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$"#, r#"${1}${2}sis"#),
        (r#"([ti])a$"#, r#"${1}um"#),
        (r#"(n)ews$"#, r#"${1}ews"#),
        (r#"(h|bl)ouses$"#, r#"${1}ouse"#),
        (r#"(corpse)s$"#, r#"${1}"#),
        (r#"(us)es$"#, r#"${1}"#),
        (r#"s$"#, r#""#),
    ];
    match word.to_lowercase().as_str() {
        | value if SAME_SINGULAR_PLURAL.contains(&value) => value.to_string(),
        | value if IRREGULAR_NOUNS.contains_key(&value) => value.to_string(),
        | value if IRREGULAR_NOUNS_INVERTED.contains_key(&value) => match IRREGULAR_NOUNS_INVERTED.get(value) {
            | Some(value) => value.to_string(),
            | None => value.to_string(),
        },
        | value => {
            let pair = SINGULAR.iter().find(|(pattern, _)| match Regex::new(pattern).unwrap().is_match(value) {
                | Ok(true) => true,
                | Ok(false) | Err(_) => false,
            });
            match pair {
                | Some((pattern, replacement)) => {
                    debug!(pattern, replacement, value, "=> {} Singular form conversion", Label::using());
                    let re = Regex::new(pattern).unwrap();
                    re.replace_all(value, *replacement).to_string()
                }
                | None => value.to_string(),
            }
        }
    }
}
pub fn get_syllable_count(text: &str) -> usize {
    fn sanitize(value: &str) -> String {
        NON_ALPHABETIC.replace_all(value, "").to_lowercase()
    }
    fn get_syllables(word: String) -> usize {
        word.len()
    }
    let tokens = text.split_whitespace().map(sanitize).collect::<Vec<String>>();
    tokens.into_iter().map(get_syllables).sum()
}
#[cfg(any(unix, target_os = "wasi", target_os = "redox"))]
pub fn make_executable(path: &PathBuf) -> bool {
    use std::os::unix::fs::PermissionsExt;
+26 −0
Original line number Diff line number Diff line
@@ -40,6 +40,32 @@ fn test_get_files_from_git() {
    assert!(files.is_empty());
}
#[test]
fn test_get_singular_form() {
    assert_eq!("", get_singular_form(""));
    assert_eq!("man", get_singular_form("men"));
    assert_eq!("aborigine", get_singular_form("aborigines"));
    assert_eq!("banana", get_singular_form("banana"));
    assert_eq!("banana", get_singular_form("bananas"));
    assert_eq!("buffalo", get_singular_form("buffalo"));
    assert_eq!("cafe", get_singular_form("cafes"));
    assert_eq!("goose", get_singular_form("geese"));
    assert_eq!("goose", get_singular_form("goose"));
    assert_eq!("house", get_singular_form("houses"));
    assert_eq!("index", get_singular_form("indices"));
    assert_eq!("matrix", get_singular_form("matrices"));
    assert_eq!("mouse", get_singular_form("mice"));
    assert_eq!("money", get_singular_form("money"));
    assert_eq!("quiz", get_singular_form("quiz"));
    assert_eq!("quiz", get_singular_form("quizzes"));
    assert_eq!("radius", get_singular_form("radii"));
    assert_eq!("vertex", get_singular_form("vertices"));
}
#[test]
fn test_get_syllable_count() {
    assert_eq!(0, get_syllable_count(""));
    assert_eq!(1, get_syllable_count("a"));
}
#[test]
fn test_is_ip6() {}
#[test]
fn test_semantic_version() {