Loading acorn-cli/src/commands/doctor/mod.rs +2 −2 Original line number Diff line number Diff line use acorn_lib::doctor::{MemoryInformation, NetworkInformation, SystemInformation, SystemSoftwareInformation, TableFormatPrint}; use acorn_lib::doctor::{MemoryInformation, NetworkInformation, SystemInformation, TableFormatPrint}; use acorn_lib::util::cli::Diagnostic; use color_eyre::eyre::{Report, Result}; use tracing::warn; Loading @@ -15,7 +15,7 @@ pub fn run(fix: &bool, interactive: &bool, check: &[Diagnostic]) -> Result<(), R unimplemented!("Interactive mode is not implemented yet"); } if should_run(check, Diagnostic::Software) { SystemSoftwareInformation::init().print(); // SystemSoftwareInformation::init().print(); } if should_run(check, Diagnostic::System) { SystemInformation::init().print(); Loading acorn-lib/src/constants.rs +268 −1 Original line number Diff line number Diff line use fancy_regex::Regex; use lazy_static::lazy_static; use std::collections::HashMap; // Base URL for deploying ORNL data pub const BASE_URL: &str = "https://research.ornl.gov"; Loading Loading @@ -44,13 +45,279 @@ pub const MAX_LENGTH_SUBTITLE: u64 = 100; /// Maximum number of characters for a single technical approach description pub const MAX_LENGTH_TECHNICAL: usize = 100; /// Maximum number of characters for a title /// TODO: This should be 35 pub const MAX_LENGTH_TITLE: u64 = 50; /// ### Nouns with the same singular and plural forms pub const SAME_SINGULAR_PLURAL: [&str; 110] = [ "accommodation", "advice", "alms", "aircraft", "aluminum", "barracks", "bison", "binoculars", "bourgeois", "breadfruit", "buffalo", "cannon", "caribou", "chalk", "chassis", "chinos", "clippers", "clothing", "cod", "concrete", "corps", "correspondence", "crossroads", "data", "deer", "doldrums", "dungarees", "education", "eggfruit", "elk", "equipment", "eyeglasses", "fish", "flares", "flour", "food", "fruit", "furniture", "gallows", "goldfish", "grapefruit", "greenfly", "grouse", "haddock", "halibut", "head", "headquarters", "help", "homework", "hovercraft", "ides", "information", "insignia", "jackfruit", "jeans", "knickers", "knowledge", "kudos", "leggings", "lego", "luggage", "mathematics", "money", "moose", "monkfish", "mullet", "nailclippers", "news", "nitrogen", "offspring", "oxygen", "pants", "pyjamas", "passionfruit", "pike", "pliers", "police", "premises", "reindeer", "rendezvous", "rice", "salmon", "scissors", "series", "shambles", "sheep", "shellfish", "shorts", "shrimp", "smithereens", "spacecraft", "species", "squid", "staff", "starfruit", "statistics", "stone", "sugar", "swine", "tights", "tongs", "traffic", "trousers", "trout", "tuna", "tweezers", "wheat", "whitebait", "wood", "you", ]; lazy_static! { // Regular expressions /// Apostrophe pub static ref APOSTROPHE: Regex = Regex::new(r#"['’]"#).unwrap(); /// Non-alphabetic pub static ref NON_ALPHABETIC: Regex = Regex::new(r#"[^a-zA-Z]"#).unwrap(); pub static ref RE_DOI: Regex = Regex::new(r#"^(doi\:)?10\.\d+/.*$"#).unwrap(); pub static ref RE_ROR: Regex = Regex::new(r#"^0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$"#).unwrap(); pub static ref RE_IMAGE_EXTENSION: Regex = Regex::new(r#".*[.](png|PNG|jpg|JPG|jpeg|JPEG|svg|SVG)$"#).unwrap(); pub static ref RE_IP6: Regex = Regex::new(r#"(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))"#).unwrap(); pub static ref RE_PHONE: Regex = Regex::new(r#"^(\+\d{1,2}\s?)?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}$"#).unwrap(); /// ### Match single syllable pre- and suffixes pub static ref SINGLE: Regex = Regex::new(r#"^(?:un|fore|ware|none?|out|post|sub|pre|pro|dis|side|some)|(?:ly|less|some|ful|ers?|ness|cians?|ments?|ettes?|villes?|ships?|sides?|ports?|shires?|[gnst]ion(?:ed|s)?)$"#).unwrap(); /// ### Match double syllable pre- and suffixes pub static ref DOUBLE: Regex = Regex::new(r#"^(?:above|anti|ante|counter|hyper|afore|agri|infra|intra|inter|over|semi|ultra|under|extra|dia|micro|mega|kilo|pico|nano|macro|somer)|(?:fully|berry|woman|women|edly|union|((?:[bcdfghjklmnpqrstvwxz])|[aeiou])ye?ing)$"#).unwrap(); /// ### Match triple syllabble suffixes pub static ref TRIPLE: Regex = Regex::new(r#"(creations?|ology|ologist|onomy|onomist)$"#).unwrap(); /// ### Match syllables counted as two, but should be one pub static ref SINGLE_SYLLABIC_ONE : Regex = Regex::new(r#"awe($|d|so)|cia(?:l|$)|tia|cius|cious|[^aeiou]giu|[aeiouy][^aeiouy]ion|iou|sia$|eous$|[oa]gue$|.[^aeiuoycgltdb]{2,}ed$|.ely$|^jua|uai|eau|^busi$|(?:[aeiouy](?:[bcfgklmnprsvwxyz]|ch|dg|g[hn]|lch|l[lv]|mm|nch|n[cgn]|r[bcnsv]|squ|s[chkls]|th)ed$)|(?:[aeiouy](?:[bdfklmnprstvy]|ch|g[hn]|lch|l[lv]|mm|nch|nn|r[nsv]|squ|s[cklst]|th)es$)"#).unwrap(); /// ### Match two-syllable words counted as two, but should be one pub static ref SINGLE_SYLLABIC_TWO : Regex = Regex::new(r#"[aeiouy](?:[bcdfgklmnprstvyz]|ch|dg|g[hn]|l[lv]|mm|n[cgns]|r[cnsv]|squ|s[cklst]|th)e$"#).unwrap(); /// ### Match syllables counted as one, but should be two pub static ref DOUBLE_SYLLABIC_ONE: Regex = Regex::new(r#"(?:([^aeiouy])\\1l|[^aeiouy]ie(?:r|s?t)|[aeiouym]bl|eo|ism|asm|thm|dnt|snt|uity|dea|gean|oa|ua|react?|orbed|shred|eings?|[aeiouy]sh?e[rs])$"#).unwrap(); /// ### Match two-syllable words counted as one, but should be two pub static ref DOUBLE_SYLLABIC_TWO: Regex = Regex::new(r#"creat(?!u)|[^gq]ua[^auieo]|[aeiou]{3}|^(?:ia|mc|coa[dglx].)|^re(app|es|im|us)|(th|d)eist"#).unwrap(); /// ### Match three-syllable words counted as one, but should be two pub static ref DOUBLE_SYLLABIC_THREE: Regex = Regex::new(r#"[^aeiou]y[ae]|[^l]lien|riet|dien|iu|io|ii|uen|[aeilotu]real|real[aeilotu]|iell|eo[^aeiou]|[aeiou]y[aeiou]"#).unwrap(); /// ### Match four-syllable words counted as one, but should be two pub static ref DOUBLE_SYLLABIC_FOUR: Regex = Regex::new(r#"[^s]ia"#).unwrap(); /// Nouns with irregular singular/plural forms pub static ref IRREGULAR_NOUNS: HashMap<&'static str, &'static str> = vec![ ("child", "children"), ("cow", "cattle"), ("foot", "feet"), ("goose", "geese"), ("man", "men"), ("move", "moves"), ("person", "people"), ("radius", "radii"), ("sex", "sexes"), ("tooth", "teeth"), ("woman", "women"), ].into_iter().collect(); /// Nouns with irregular plural/singular forms /// /// Inverted version of [IRREGULAR_NOUNS] pub static ref IRREGULAR_NOUNS_INVERTED: HashMap<&'static str, &'static str> = IRREGULAR_NOUNS.clone().into_iter().map(|(k, v)| (v, k)).collect(); /// ### Nouns with problematic syllable counts pub static ref PROBLEMATIC_WORDS: HashMap<&'static str, usize> = vec![ ("abalone", 4), ("abare", 3), ("abbruzzese", 4), ("abed", 2), ("aborigine", 5), ("abruzzese", 4), ("acreage", 3), ("adame", 3), ("adieu", 2), ("adobe", 3), ("anemone", 4), ("anyone", 3), ("apache", 3), ("aphrodite", 4), ("apostrophe", 4), ("ariadne", 4), ("cafe", 2), ("café", 2), ("calliope", 4), ("catastrophe", 4), ("chile", 2), ("chloe", 2), ("circe", 2), ("cliche", 2), ("cliché", 2), ("contrariety", 4), ("coyote", 3), ("daphne", 2), ("epitome", 4), ("eurydice", 4), ("euterpe", 3), ("every", 2), ("everywhere", 3), ("forever", 3), ("gethsemane", 4), ("guacamole", 4), ("hermione", 4), ("hyperbole", 4), ("jesse", 2), ("jukebox", 2), ("karate", 3), ("machete", 3), ("maybe", 2), ("naive", 2), ("newlywed", 3), ("ninety", 2), ("penelope", 4), ("people", 2), ("persephone", 4), ("phoebe", 2), ("pulse", 1), ("queue", 1), ("recipe", 3), ("reptilian", 4), ("resumé", 2), ("riverbed", 3), ("scotia", 3), ("sesame", 3), ("shoreline", 2), ("simile", 3), ("snuffleupagus", 5), ("sometimes", 2), ("syncope", 3), ("tamale", 3), ("waterbed", 3), ("wednesday", 2), ("viceroyship", 3), ("yosemite", 4), ("zoë", 2), ].into_iter().collect(); /// ### Nouns that need to be fixed when counting syllables /// /// All counts are (correct - 1) pub static ref NEED_TO_BE_FIXED: HashMap<&'static str, usize> = vec![ ("ayo", 2), ("australian", 3), ("dionysius", 5), ("disbursement", 3), ("discouragement", 4), ("disenfranchisement", 5), ("disengagement", 4), ("disgraceful", 3), ("diskette", 2), ("displacement", 3), ("distasteful", 3), ("distinctiveness", 4), ("distraction", 3), ("geoffrion", 4), ("mcquaid", 2), ("mcquaide", 2), ("mcquaig", 2), ("mcquain", 2), ("nonbusiness", 3), ("nonetheless", 3), ("nonmanagement", 4), ("outplacement", 3), ("outrageously", 4), ("postponement", 3), ("preemption", 3), ("preignition", 4), ("preinvasion", 4), ("preisler", 3), ("preoccupation", 5), ("prevette", 2), ("probusiness", 3), ("procurement", 3), ("pronouncement", 3), ("sidewater", 3), ("sidewinder", 3), ("ungerer", 3), ].into_iter().collect(); } acorn-lib/src/util/mod.rs +69 −0 Original line number Diff line number Diff line use crate::constants::{IRREGULAR_NOUNS, IRREGULAR_NOUNS_INVERTED, NON_ALPHABETIC, SAME_SINGULAR_PLURAL}; use console::Emoji; use data_encoding::HEXUPPER; use derive_more::Display; use duct::cmd; use fancy_regex::Regex; use glob::glob; use is_executable::IsExecutable; use owo_colors::{OwoColorize, Style, Styled}; Loading Loading @@ -379,6 +381,73 @@ pub fn get_image_paths(root: PathBuf) -> Vec<PathBuf> { pub fn get_parent(path: String) -> String { PathBuf::from(PathBuf::from(path).parent().unwrap()).display().to_string() } /// Get the singular form of a word /// /// Adapted from the PHP library, [Text-Statistics](https://github.com/DaveChild/Text-Statistics) pub fn get_singular_form(word: &str) -> String { const SINGULAR: [(&str, &str); 28] = [ (r#"(quiz)zes$"#, r#"${1}"#), (r#"(matr)ices$"#, r#"${1}ix"#), (r#"(vert|ind)ices$"#, r#"${1}ex"#), (r#"^(ox)en$"#, r#"${1}"#), (r#"(alias)es$"#, r#"${1}"#), (r#"(octop|vir)i$"#, r#"${1}us"#), (r#"(cris|ax|test)es$"#, r#"${1}is"#), (r#"(shoe)s$"#, r#"${1}"#), (r#"(o)es$"#, r#"${1}"#), (r#"(bus)es$"#, r#"${1}"#), (r#"([m|l])ice$"#, r#"${1}ouse"#), (r#"(x|ch|ss|sh)es$"#, r#"${1}"#), (r#"(m)ovies$"#, r#"${1}ovie"#), (r#"(s)eries$"#, r#"${1}eries"#), (r#"([^aeiouy]|qu)ies$"#, r#"${1}y"#), (r#"([lr])ves$"#, r#"${1}f"#), (r#"(tive)s$"#, r#"${1}"#), (r#"(hive)s$"#, r#"${1}"#), (r#"(li|wi|kni)ves$"#, r#"${1}fe"#), (r#"(shea|loa|lea|thie)ves$"#, r#"${1}f"#), (r#"(^analy)ses$"#, r#"${1}sis"#), (r#"((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$"#, r#"${1}${2}sis"#), (r#"([ti])a$"#, r#"${1}um"#), (r#"(n)ews$"#, r#"${1}ews"#), (r#"(h|bl)ouses$"#, r#"${1}ouse"#), (r#"(corpse)s$"#, r#"${1}"#), (r#"(us)es$"#, r#"${1}"#), (r#"s$"#, r#""#), ]; match word.to_lowercase().as_str() { | value if SAME_SINGULAR_PLURAL.contains(&value) => value.to_string(), | value if IRREGULAR_NOUNS.contains_key(&value) => value.to_string(), | value if IRREGULAR_NOUNS_INVERTED.contains_key(&value) => match IRREGULAR_NOUNS_INVERTED.get(value) { | Some(value) => value.to_string(), | None => value.to_string(), }, | value => { let pair = SINGULAR.iter().find(|(pattern, _)| match Regex::new(pattern).unwrap().is_match(value) { | Ok(true) => true, | Ok(false) | Err(_) => false, }); match pair { | Some((pattern, replacement)) => { debug!(pattern, replacement, value, "=> {} Singular form conversion", Label::using()); let re = Regex::new(pattern).unwrap(); re.replace_all(value, *replacement).to_string() } | None => value.to_string(), } } } } pub fn get_syllable_count(text: &str) -> usize { fn sanitize(value: &str) -> String { NON_ALPHABETIC.replace_all(value, "").to_lowercase() } fn get_syllables(word: String) -> usize { word.len() } let tokens = text.split_whitespace().map(sanitize).collect::<Vec<String>>(); tokens.into_iter().map(get_syllables).sum() } #[cfg(any(unix, target_os = "wasi", target_os = "redox"))] pub fn make_executable(path: &PathBuf) -> bool { use std::os::unix::fs::PermissionsExt; Loading acorn-lib/src/util/tests/mod.rs +26 −0 Original line number Diff line number Diff line Loading @@ -40,6 +40,32 @@ fn test_get_files_from_git() { assert!(files.is_empty()); } #[test] fn test_get_singular_form() { assert_eq!("", get_singular_form("")); assert_eq!("man", get_singular_form("men")); assert_eq!("aborigine", get_singular_form("aborigines")); assert_eq!("banana", get_singular_form("banana")); assert_eq!("banana", get_singular_form("bananas")); assert_eq!("buffalo", get_singular_form("buffalo")); assert_eq!("cafe", get_singular_form("cafes")); assert_eq!("goose", get_singular_form("geese")); assert_eq!("goose", get_singular_form("goose")); assert_eq!("house", get_singular_form("houses")); assert_eq!("index", get_singular_form("indices")); assert_eq!("matrix", get_singular_form("matrices")); assert_eq!("mouse", get_singular_form("mice")); assert_eq!("money", get_singular_form("money")); assert_eq!("quiz", get_singular_form("quiz")); assert_eq!("quiz", get_singular_form("quizzes")); assert_eq!("radius", get_singular_form("radii")); assert_eq!("vertex", get_singular_form("vertices")); } #[test] fn test_get_syllable_count() { assert_eq!(0, get_syllable_count("")); assert_eq!(1, get_syllable_count("a")); } #[test] fn test_is_ip6() {} #[test] fn test_semantic_version() { Loading Loading
acorn-cli/src/commands/doctor/mod.rs +2 −2 Original line number Diff line number Diff line use acorn_lib::doctor::{MemoryInformation, NetworkInformation, SystemInformation, SystemSoftwareInformation, TableFormatPrint}; use acorn_lib::doctor::{MemoryInformation, NetworkInformation, SystemInformation, TableFormatPrint}; use acorn_lib::util::cli::Diagnostic; use color_eyre::eyre::{Report, Result}; use tracing::warn; Loading @@ -15,7 +15,7 @@ pub fn run(fix: &bool, interactive: &bool, check: &[Diagnostic]) -> Result<(), R unimplemented!("Interactive mode is not implemented yet"); } if should_run(check, Diagnostic::Software) { SystemSoftwareInformation::init().print(); // SystemSoftwareInformation::init().print(); } if should_run(check, Diagnostic::System) { SystemInformation::init().print(); Loading
acorn-lib/src/constants.rs +268 −1 Original line number Diff line number Diff line use fancy_regex::Regex; use lazy_static::lazy_static; use std::collections::HashMap; // Base URL for deploying ORNL data pub const BASE_URL: &str = "https://research.ornl.gov"; Loading Loading @@ -44,13 +45,279 @@ pub const MAX_LENGTH_SUBTITLE: u64 = 100; /// Maximum number of characters for a single technical approach description pub const MAX_LENGTH_TECHNICAL: usize = 100; /// Maximum number of characters for a title /// TODO: This should be 35 pub const MAX_LENGTH_TITLE: u64 = 50; /// ### Nouns with the same singular and plural forms pub const SAME_SINGULAR_PLURAL: [&str; 110] = [ "accommodation", "advice", "alms", "aircraft", "aluminum", "barracks", "bison", "binoculars", "bourgeois", "breadfruit", "buffalo", "cannon", "caribou", "chalk", "chassis", "chinos", "clippers", "clothing", "cod", "concrete", "corps", "correspondence", "crossroads", "data", "deer", "doldrums", "dungarees", "education", "eggfruit", "elk", "equipment", "eyeglasses", "fish", "flares", "flour", "food", "fruit", "furniture", "gallows", "goldfish", "grapefruit", "greenfly", "grouse", "haddock", "halibut", "head", "headquarters", "help", "homework", "hovercraft", "ides", "information", "insignia", "jackfruit", "jeans", "knickers", "knowledge", "kudos", "leggings", "lego", "luggage", "mathematics", "money", "moose", "monkfish", "mullet", "nailclippers", "news", "nitrogen", "offspring", "oxygen", "pants", "pyjamas", "passionfruit", "pike", "pliers", "police", "premises", "reindeer", "rendezvous", "rice", "salmon", "scissors", "series", "shambles", "sheep", "shellfish", "shorts", "shrimp", "smithereens", "spacecraft", "species", "squid", "staff", "starfruit", "statistics", "stone", "sugar", "swine", "tights", "tongs", "traffic", "trousers", "trout", "tuna", "tweezers", "wheat", "whitebait", "wood", "you", ]; lazy_static! { // Regular expressions /// Apostrophe pub static ref APOSTROPHE: Regex = Regex::new(r#"['’]"#).unwrap(); /// Non-alphabetic pub static ref NON_ALPHABETIC: Regex = Regex::new(r#"[^a-zA-Z]"#).unwrap(); pub static ref RE_DOI: Regex = Regex::new(r#"^(doi\:)?10\.\d+/.*$"#).unwrap(); pub static ref RE_ROR: Regex = Regex::new(r#"^0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$"#).unwrap(); pub static ref RE_IMAGE_EXTENSION: Regex = Regex::new(r#".*[.](png|PNG|jpg|JPG|jpeg|JPEG|svg|SVG)$"#).unwrap(); pub static ref RE_IP6: Regex = Regex::new(r#"(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))"#).unwrap(); pub static ref RE_PHONE: Regex = Regex::new(r#"^(\+\d{1,2}\s?)?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}$"#).unwrap(); /// ### Match single syllable pre- and suffixes pub static ref SINGLE: Regex = Regex::new(r#"^(?:un|fore|ware|none?|out|post|sub|pre|pro|dis|side|some)|(?:ly|less|some|ful|ers?|ness|cians?|ments?|ettes?|villes?|ships?|sides?|ports?|shires?|[gnst]ion(?:ed|s)?)$"#).unwrap(); /// ### Match double syllable pre- and suffixes pub static ref DOUBLE: Regex = Regex::new(r#"^(?:above|anti|ante|counter|hyper|afore|agri|infra|intra|inter|over|semi|ultra|under|extra|dia|micro|mega|kilo|pico|nano|macro|somer)|(?:fully|berry|woman|women|edly|union|((?:[bcdfghjklmnpqrstvwxz])|[aeiou])ye?ing)$"#).unwrap(); /// ### Match triple syllabble suffixes pub static ref TRIPLE: Regex = Regex::new(r#"(creations?|ology|ologist|onomy|onomist)$"#).unwrap(); /// ### Match syllables counted as two, but should be one pub static ref SINGLE_SYLLABIC_ONE : Regex = Regex::new(r#"awe($|d|so)|cia(?:l|$)|tia|cius|cious|[^aeiou]giu|[aeiouy][^aeiouy]ion|iou|sia$|eous$|[oa]gue$|.[^aeiuoycgltdb]{2,}ed$|.ely$|^jua|uai|eau|^busi$|(?:[aeiouy](?:[bcfgklmnprsvwxyz]|ch|dg|g[hn]|lch|l[lv]|mm|nch|n[cgn]|r[bcnsv]|squ|s[chkls]|th)ed$)|(?:[aeiouy](?:[bdfklmnprstvy]|ch|g[hn]|lch|l[lv]|mm|nch|nn|r[nsv]|squ|s[cklst]|th)es$)"#).unwrap(); /// ### Match two-syllable words counted as two, but should be one pub static ref SINGLE_SYLLABIC_TWO : Regex = Regex::new(r#"[aeiouy](?:[bcdfgklmnprstvyz]|ch|dg|g[hn]|l[lv]|mm|n[cgns]|r[cnsv]|squ|s[cklst]|th)e$"#).unwrap(); /// ### Match syllables counted as one, but should be two pub static ref DOUBLE_SYLLABIC_ONE: Regex = Regex::new(r#"(?:([^aeiouy])\\1l|[^aeiouy]ie(?:r|s?t)|[aeiouym]bl|eo|ism|asm|thm|dnt|snt|uity|dea|gean|oa|ua|react?|orbed|shred|eings?|[aeiouy]sh?e[rs])$"#).unwrap(); /// ### Match two-syllable words counted as one, but should be two pub static ref DOUBLE_SYLLABIC_TWO: Regex = Regex::new(r#"creat(?!u)|[^gq]ua[^auieo]|[aeiou]{3}|^(?:ia|mc|coa[dglx].)|^re(app|es|im|us)|(th|d)eist"#).unwrap(); /// ### Match three-syllable words counted as one, but should be two pub static ref DOUBLE_SYLLABIC_THREE: Regex = Regex::new(r#"[^aeiou]y[ae]|[^l]lien|riet|dien|iu|io|ii|uen|[aeilotu]real|real[aeilotu]|iell|eo[^aeiou]|[aeiou]y[aeiou]"#).unwrap(); /// ### Match four-syllable words counted as one, but should be two pub static ref DOUBLE_SYLLABIC_FOUR: Regex = Regex::new(r#"[^s]ia"#).unwrap(); /// Nouns with irregular singular/plural forms pub static ref IRREGULAR_NOUNS: HashMap<&'static str, &'static str> = vec![ ("child", "children"), ("cow", "cattle"), ("foot", "feet"), ("goose", "geese"), ("man", "men"), ("move", "moves"), ("person", "people"), ("radius", "radii"), ("sex", "sexes"), ("tooth", "teeth"), ("woman", "women"), ].into_iter().collect(); /// Nouns with irregular plural/singular forms /// /// Inverted version of [IRREGULAR_NOUNS] pub static ref IRREGULAR_NOUNS_INVERTED: HashMap<&'static str, &'static str> = IRREGULAR_NOUNS.clone().into_iter().map(|(k, v)| (v, k)).collect(); /// ### Nouns with problematic syllable counts pub static ref PROBLEMATIC_WORDS: HashMap<&'static str, usize> = vec![ ("abalone", 4), ("abare", 3), ("abbruzzese", 4), ("abed", 2), ("aborigine", 5), ("abruzzese", 4), ("acreage", 3), ("adame", 3), ("adieu", 2), ("adobe", 3), ("anemone", 4), ("anyone", 3), ("apache", 3), ("aphrodite", 4), ("apostrophe", 4), ("ariadne", 4), ("cafe", 2), ("café", 2), ("calliope", 4), ("catastrophe", 4), ("chile", 2), ("chloe", 2), ("circe", 2), ("cliche", 2), ("cliché", 2), ("contrariety", 4), ("coyote", 3), ("daphne", 2), ("epitome", 4), ("eurydice", 4), ("euterpe", 3), ("every", 2), ("everywhere", 3), ("forever", 3), ("gethsemane", 4), ("guacamole", 4), ("hermione", 4), ("hyperbole", 4), ("jesse", 2), ("jukebox", 2), ("karate", 3), ("machete", 3), ("maybe", 2), ("naive", 2), ("newlywed", 3), ("ninety", 2), ("penelope", 4), ("people", 2), ("persephone", 4), ("phoebe", 2), ("pulse", 1), ("queue", 1), ("recipe", 3), ("reptilian", 4), ("resumé", 2), ("riverbed", 3), ("scotia", 3), ("sesame", 3), ("shoreline", 2), ("simile", 3), ("snuffleupagus", 5), ("sometimes", 2), ("syncope", 3), ("tamale", 3), ("waterbed", 3), ("wednesday", 2), ("viceroyship", 3), ("yosemite", 4), ("zoë", 2), ].into_iter().collect(); /// ### Nouns that need to be fixed when counting syllables /// /// All counts are (correct - 1) pub static ref NEED_TO_BE_FIXED: HashMap<&'static str, usize> = vec![ ("ayo", 2), ("australian", 3), ("dionysius", 5), ("disbursement", 3), ("discouragement", 4), ("disenfranchisement", 5), ("disengagement", 4), ("disgraceful", 3), ("diskette", 2), ("displacement", 3), ("distasteful", 3), ("distinctiveness", 4), ("distraction", 3), ("geoffrion", 4), ("mcquaid", 2), ("mcquaide", 2), ("mcquaig", 2), ("mcquain", 2), ("nonbusiness", 3), ("nonetheless", 3), ("nonmanagement", 4), ("outplacement", 3), ("outrageously", 4), ("postponement", 3), ("preemption", 3), ("preignition", 4), ("preinvasion", 4), ("preisler", 3), ("preoccupation", 5), ("prevette", 2), ("probusiness", 3), ("procurement", 3), ("pronouncement", 3), ("sidewater", 3), ("sidewinder", 3), ("ungerer", 3), ].into_iter().collect(); }
acorn-lib/src/util/mod.rs +69 −0 Original line number Diff line number Diff line use crate::constants::{IRREGULAR_NOUNS, IRREGULAR_NOUNS_INVERTED, NON_ALPHABETIC, SAME_SINGULAR_PLURAL}; use console::Emoji; use data_encoding::HEXUPPER; use derive_more::Display; use duct::cmd; use fancy_regex::Regex; use glob::glob; use is_executable::IsExecutable; use owo_colors::{OwoColorize, Style, Styled}; Loading Loading @@ -379,6 +381,73 @@ pub fn get_image_paths(root: PathBuf) -> Vec<PathBuf> { pub fn get_parent(path: String) -> String { PathBuf::from(PathBuf::from(path).parent().unwrap()).display().to_string() } /// Get the singular form of a word /// /// Adapted from the PHP library, [Text-Statistics](https://github.com/DaveChild/Text-Statistics) pub fn get_singular_form(word: &str) -> String { const SINGULAR: [(&str, &str); 28] = [ (r#"(quiz)zes$"#, r#"${1}"#), (r#"(matr)ices$"#, r#"${1}ix"#), (r#"(vert|ind)ices$"#, r#"${1}ex"#), (r#"^(ox)en$"#, r#"${1}"#), (r#"(alias)es$"#, r#"${1}"#), (r#"(octop|vir)i$"#, r#"${1}us"#), (r#"(cris|ax|test)es$"#, r#"${1}is"#), (r#"(shoe)s$"#, r#"${1}"#), (r#"(o)es$"#, r#"${1}"#), (r#"(bus)es$"#, r#"${1}"#), (r#"([m|l])ice$"#, r#"${1}ouse"#), (r#"(x|ch|ss|sh)es$"#, r#"${1}"#), (r#"(m)ovies$"#, r#"${1}ovie"#), (r#"(s)eries$"#, r#"${1}eries"#), (r#"([^aeiouy]|qu)ies$"#, r#"${1}y"#), (r#"([lr])ves$"#, r#"${1}f"#), (r#"(tive)s$"#, r#"${1}"#), (r#"(hive)s$"#, r#"${1}"#), (r#"(li|wi|kni)ves$"#, r#"${1}fe"#), (r#"(shea|loa|lea|thie)ves$"#, r#"${1}f"#), (r#"(^analy)ses$"#, r#"${1}sis"#), (r#"((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$"#, r#"${1}${2}sis"#), (r#"([ti])a$"#, r#"${1}um"#), (r#"(n)ews$"#, r#"${1}ews"#), (r#"(h|bl)ouses$"#, r#"${1}ouse"#), (r#"(corpse)s$"#, r#"${1}"#), (r#"(us)es$"#, r#"${1}"#), (r#"s$"#, r#""#), ]; match word.to_lowercase().as_str() { | value if SAME_SINGULAR_PLURAL.contains(&value) => value.to_string(), | value if IRREGULAR_NOUNS.contains_key(&value) => value.to_string(), | value if IRREGULAR_NOUNS_INVERTED.contains_key(&value) => match IRREGULAR_NOUNS_INVERTED.get(value) { | Some(value) => value.to_string(), | None => value.to_string(), }, | value => { let pair = SINGULAR.iter().find(|(pattern, _)| match Regex::new(pattern).unwrap().is_match(value) { | Ok(true) => true, | Ok(false) | Err(_) => false, }); match pair { | Some((pattern, replacement)) => { debug!(pattern, replacement, value, "=> {} Singular form conversion", Label::using()); let re = Regex::new(pattern).unwrap(); re.replace_all(value, *replacement).to_string() } | None => value.to_string(), } } } } pub fn get_syllable_count(text: &str) -> usize { fn sanitize(value: &str) -> String { NON_ALPHABETIC.replace_all(value, "").to_lowercase() } fn get_syllables(word: String) -> usize { word.len() } let tokens = text.split_whitespace().map(sanitize).collect::<Vec<String>>(); tokens.into_iter().map(get_syllables).sum() } #[cfg(any(unix, target_os = "wasi", target_os = "redox"))] pub fn make_executable(path: &PathBuf) -> bool { use std::os::unix::fs::PermissionsExt; Loading
acorn-lib/src/util/tests/mod.rs +26 −0 Original line number Diff line number Diff line Loading @@ -40,6 +40,32 @@ fn test_get_files_from_git() { assert!(files.is_empty()); } #[test] fn test_get_singular_form() { assert_eq!("", get_singular_form("")); assert_eq!("man", get_singular_form("men")); assert_eq!("aborigine", get_singular_form("aborigines")); assert_eq!("banana", get_singular_form("banana")); assert_eq!("banana", get_singular_form("bananas")); assert_eq!("buffalo", get_singular_form("buffalo")); assert_eq!("cafe", get_singular_form("cafes")); assert_eq!("goose", get_singular_form("geese")); assert_eq!("goose", get_singular_form("goose")); assert_eq!("house", get_singular_form("houses")); assert_eq!("index", get_singular_form("indices")); assert_eq!("matrix", get_singular_form("matrices")); assert_eq!("mouse", get_singular_form("mice")); assert_eq!("money", get_singular_form("money")); assert_eq!("quiz", get_singular_form("quiz")); assert_eq!("quiz", get_singular_form("quizzes")); assert_eq!("radius", get_singular_form("radii")); assert_eq!("vertex", get_singular_form("vertices")); } #[test] fn test_get_syllable_count() { assert_eq!(0, get_syllable_count("")); assert_eq!(1, get_syllable_count("a")); } #[test] fn test_is_ip6() {} #[test] fn test_semantic_version() { Loading