feat: Initial creation of get_singular_form; Initial structure for get_syllable_count (d1fdd0c6) · Commits · Research Enablement / ACORN

acorn-cli/src/commands/doctor/mod.rs

+2 −2

Original line number	Diff line number	Diff line
		use acorn_lib::doctor::{MemoryInformation, NetworkInformation, SystemInformation, SystemSoftwareInformation, TableFormatPrint};
		use acorn_lib::doctor::{MemoryInformation, NetworkInformation, SystemInformation, TableFormatPrint};
		use acorn_lib::util::cli::Diagnostic;
		use color_eyre::eyre::{Report, Result};
		use tracing::warn;
		@@ -15,7 +15,7 @@ pub fn run(fix: &bool, interactive: &bool, check: &[Diagnostic]) -> Result<(), R
		unimplemented!("Interactive mode is not implemented yet");
		}
		if should_run(check, Diagnostic::Software) {
		SystemSoftwareInformation::init().print();
		// SystemSoftwareInformation::init().print();
		}
		if should_run(check, Diagnostic::System) {
		SystemInformation::init().print();

acorn-lib/src/constants.rs

+268 −1

Original line number	Diff line number	Diff line
		use fancy_regex::Regex;
		use lazy_static::lazy_static;
		use std::collections::HashMap;

		// Base URL for deploying ORNL data
		pub const BASE_URL: &str = "https://research.ornl.gov";
		@@ -44,13 +45,279 @@ pub const MAX_LENGTH_SUBTITLE: u64 = 100;
		/// Maximum number of characters for a single technical approach description
		pub const MAX_LENGTH_TECHNICAL: usize = 100;
		/// Maximum number of characters for a title
		/// TODO: This should be 35
		pub const MAX_LENGTH_TITLE: u64 = 50;
		/// ### Nouns with the same singular and plural forms
		pub const SAME_SINGULAR_PLURAL: [&str; 110] = [
		"accommodation",
		"advice",
		"alms",
		"aircraft",
		"aluminum",
		"barracks",
		"bison",
		"binoculars",
		"bourgeois",
		"breadfruit",
		"buffalo",
		"cannon",
		"caribou",
		"chalk",
		"chassis",
		"chinos",
		"clippers",
		"clothing",
		"cod",
		"concrete",
		"corps",
		"correspondence",
		"crossroads",
		"data",
		"deer",
		"doldrums",
		"dungarees",
		"education",
		"eggfruit",
		"elk",
		"equipment",
		"eyeglasses",
		"fish",
		"flares",
		"flour",
		"food",
		"fruit",
		"furniture",
		"gallows",
		"goldfish",
		"grapefruit",
		"greenfly",
		"grouse",
		"haddock",
		"halibut",
		"head",
		"headquarters",
		"help",
		"homework",
		"hovercraft",
		"ides",
		"information",
		"insignia",
		"jackfruit",
		"jeans",
		"knickers",
		"knowledge",
		"kudos",
		"leggings",
		"lego",
		"luggage",
		"mathematics",
		"money",
		"moose",
		"monkfish",
		"mullet",
		"nailclippers",
		"news",
		"nitrogen",
		"offspring",
		"oxygen",
		"pants",
		"pyjamas",
		"passionfruit",
		"pike",
		"pliers",
		"police",
		"premises",
		"reindeer",
		"rendezvous",
		"rice",
		"salmon",
		"scissors",
		"series",
		"shambles",
		"sheep",
		"shellfish",
		"shorts",
		"shrimp",
		"smithereens",
		"spacecraft",
		"species",
		"squid",
		"staff",
		"starfruit",
		"statistics",
		"stone",
		"sugar",
		"swine",
		"tights",
		"tongs",
		"traffic",
		"trousers",
		"trout",
		"tuna",
		"tweezers",
		"wheat",
		"whitebait",
		"wood",
		"you",
		];

		lazy_static! {
		// Regular expressions
		/// Apostrophe
		pub static ref APOSTROPHE: Regex = Regex::new(r#"['’]"#).unwrap();
		/// Non-alphabetic
		pub static ref NON_ALPHABETIC: Regex = Regex::new(r#"[^a-zA-Z]"#).unwrap();
		pub static ref RE_DOI: Regex = Regex::new(r#"^(doi\:)?10\.\d+/.*$"#).unwrap();
		pub static ref RE_ROR: Regex = Regex::new(r#"^0[a-hj-km-np-tv-z\|0-9]{6}[0-9]{2}$"#).unwrap();
		pub static ref RE_IMAGE_EXTENSION: Regex = Regex::new(r#".*[.](png\|PNG\|jpg\|JPG\|jpeg\|JPEG\|svg\|SVG)$"#).unwrap();
		pub static ref RE_IP6: Regex = Regex::new(r#"(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}\|([0-9a-fA-F]{1,4}:){1,7}:\|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}\|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}\|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}\|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}\|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}\|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})\|:((:[0-9a-fA-F]{1,4}){1,7}\|:)\|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}\|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]\|(2[0-4]\|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]\|(2[0-4]\|1{0,1}[0-9]){0,1}[0-9])\|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]\|(2[0-4]\|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]\|(2[0-4]\|1{0,1}[0-9]){0,1}[0-9]))"#).unwrap();
		pub static ref RE_PHONE: Regex = Regex::new(r#"^(\+\d{1,2}\s?)?$?\d{3}$?[\s.-]?\d{3}[\s.-]?\d{4}$"#).unwrap();
		/// ### Match single syllable pre- and suffixes
		pub static ref SINGLE: Regex = Regex::new(r#"^(?:un\|fore\|ware\|none?\|out\|post\|sub\|pre\|pro\|dis\|side\|some)\|(?:ly\|less\|some\|ful\|ers?\|ness\|cians?\|ments?\|ettes?\|villes?\|ships?\|sides?\|ports?\|shires?\|[gnst]ion(?:ed\|s)?)$"#).unwrap();
		/// ### Match double syllable pre- and suffixes
		pub static ref DOUBLE: Regex = Regex::new(r#"^(?:above\|anti\|ante\|counter\|hyper\|afore\|agri\|infra\|intra\|inter\|over\|semi\|ultra\|under\|extra\|dia\|micro\|mega\|kilo\|pico\|nano\|macro\|somer)\|(?:fully\|berry\|woman\|women\|edly\|union\|((?:[bcdfghjklmnpqrstvwxz])\|[aeiou])ye?ing)$"#).unwrap();
		/// ### Match triple syllabble suffixes
		pub static ref TRIPLE: Regex = Regex::new(r#"(creations?\|ology\|ologist\|onomy\|onomist)$"#).unwrap();
		/// ### Match syllables counted as two, but should be one
		pub static ref SINGLE_SYLLABIC_ONE : Regex = Regex::new(r#"awe($\|d\|so)\|cia(?:l\|$)\|tia\|cius\|cious\|[^aeiou]giu\|[aeiouy][^aeiouy]ion\|iou\|sia$\|eous$\|[oa]gue$\|.[^aeiuoycgltdb]{2,}ed$\|.ely$\|^jua\|uai\|eau\|^busi$\|(?:[aeiouy](?:[bcfgklmnprsvwxyz]\|ch\|dg\|g[hn]\|lch\|l[lv]\|mm\|nch\|n[cgn]\|r[bcnsv]\|squ\|s[chkls]\|th)ed$)\|(?:[aeiouy](?:[bdfklmnprstvy]\|ch\|g[hn]\|lch\|l[lv]\|mm\|nch\|nn\|r[nsv]\|squ\|s[cklst]\|th)es$)"#).unwrap();
		/// ### Match two-syllable words counted as two, but should be one
		pub static ref SINGLE_SYLLABIC_TWO : Regex = Regex::new(r#"[aeiouy](?:[bcdfgklmnprstvyz]\|ch\|dg\|g[hn]\|l[lv]\|mm\|n[cgns]\|r[cnsv]\|squ\|s[cklst]\|th)e$"#).unwrap();
		/// ### Match syllables counted as one, but should be two
		pub static ref DOUBLE_SYLLABIC_ONE: Regex = Regex::new(r#"(?:([^aeiouy])\\1l\|[^aeiouy]ie(?:r\|s?t)\|[aeiouym]bl\|eo\|ism\|asm\|thm\|dnt\|snt\|uity\|dea\|gean\|oa\|ua\|react?\|orbed\|shred\|eings?\|[aeiouy]sh?e[rs])$"#).unwrap();
		/// ### Match two-syllable words counted as one, but should be two
		pub static ref DOUBLE_SYLLABIC_TWO: Regex = Regex::new(r#"creat(?!u)\|[^gq]ua[^auieo]\|[aeiou]{3}\|^(?:ia\|mc\|coa[dglx].)\|^re(app\|es\|im\|us)\|(th\|d)eist"#).unwrap();
		/// ### Match three-syllable words counted as one, but should be two
		pub static ref DOUBLE_SYLLABIC_THREE: Regex = Regex::new(r#"[^aeiou]y[ae]\|[^l]lien\|riet\|dien\|iu\|io\|ii\|uen\|[aeilotu]real\|real[aeilotu]\|iell\|eo[^aeiou]\|[aeiou]y[aeiou]"#).unwrap();
		/// ### Match four-syllable words counted as one, but should be two
		pub static ref DOUBLE_SYLLABIC_FOUR: Regex = Regex::new(r#"[^s]ia"#).unwrap();
		/// Nouns with irregular singular/plural forms
		pub static ref IRREGULAR_NOUNS: HashMap<&'static str, &'static str> = vec![
		("child", "children"),
		("cow", "cattle"),
		("foot", "feet"),
		("goose", "geese"),
		("man", "men"),
		("move", "moves"),
		("person", "people"),
		("radius", "radii"),
		("sex", "sexes"),
		("tooth", "teeth"),
		("woman", "women"),
		].into_iter().collect();
		/// Nouns with irregular plural/singular forms
		///
		/// Inverted version of [IRREGULAR_NOUNS]
		pub static ref IRREGULAR_NOUNS_INVERTED: HashMap<&'static str, &'static str> = IRREGULAR_NOUNS.clone().into_iter().map(\|(k, v)\| (v, k)).collect();
		/// ### Nouns with problematic syllable counts
		pub static ref PROBLEMATIC_WORDS: HashMap<&'static str, usize> = vec![
		("abalone", 4),
		("abare", 3),
		("abbruzzese", 4),
		("abed", 2),
		("aborigine", 5),
		("abruzzese", 4),
		("acreage", 3),
		("adame", 3),
		("adieu", 2),
		("adobe", 3),
		("anemone", 4),
		("anyone", 3),
		("apache", 3),
		("aphrodite", 4),
		("apostrophe", 4),
		("ariadne", 4),
		("cafe", 2),
		("café", 2),
		("calliope", 4),
		("catastrophe", 4),
		("chile", 2),
		("chloe", 2),
		("circe", 2),
		("cliche", 2),
		("cliché", 2),
		("contrariety", 4),
		("coyote", 3),
		("daphne", 2),
		("epitome", 4),
		("eurydice", 4),
		("euterpe", 3),
		("every", 2),
		("everywhere", 3),
		("forever", 3),
		("gethsemane", 4),
		("guacamole", 4),
		("hermione", 4),
		("hyperbole", 4),
		("jesse", 2),
		("jukebox", 2),
		("karate", 3),
		("machete", 3),
		("maybe", 2),
		("naive", 2),
		("newlywed", 3),
		("ninety", 2),
		("penelope", 4),
		("people", 2),
		("persephone", 4),
		("phoebe", 2),
		("pulse", 1),
		("queue", 1),
		("recipe", 3),
		("reptilian", 4),
		("resumé", 2),
		("riverbed", 3),
		("scotia", 3),
		("sesame", 3),
		("shoreline", 2),
		("simile", 3),
		("snuffleupagus", 5),
		("sometimes", 2),
		("syncope", 3),
		("tamale", 3),
		("waterbed", 3),
		("wednesday", 2),
		("viceroyship", 3),
		("yosemite", 4),
		("zoë", 2),
		].into_iter().collect();
		/// ### Nouns that need to be fixed when counting syllables
		///
		/// All counts are (correct - 1)
		pub static ref NEED_TO_BE_FIXED: HashMap<&'static str, usize> = vec![
		("ayo", 2),
		("australian", 3),
		("dionysius", 5),
		("disbursement", 3),
		("discouragement", 4),
		("disenfranchisement", 5),
		("disengagement", 4),
		("disgraceful", 3),
		("diskette", 2),
		("displacement", 3),
		("distasteful", 3),
		("distinctiveness", 4),
		("distraction", 3),
		("geoffrion", 4),
		("mcquaid", 2),
		("mcquaide", 2),
		("mcquaig", 2),
		("mcquain", 2),
		("nonbusiness", 3),
		("nonetheless", 3),
		("nonmanagement", 4),
		("outplacement", 3),
		("outrageously", 4),
		("postponement", 3),
		("preemption", 3),
		("preignition", 4),
		("preinvasion", 4),
		("preisler", 3),
		("preoccupation", 5),
		("prevette", 2),
		("probusiness", 3),
		("procurement", 3),
		("pronouncement", 3),
		("sidewater", 3),
		("sidewinder", 3),
		("ungerer", 3),
		].into_iter().collect();
		}

acorn-lib/src/util/mod.rs

+69 −0

Original line number	Diff line number	Diff line
		use crate::constants::{IRREGULAR_NOUNS, IRREGULAR_NOUNS_INVERTED, NON_ALPHABETIC, SAME_SINGULAR_PLURAL};
		use console::Emoji;
		use data_encoding::HEXUPPER;
		use derive_more::Display;
		use duct::cmd;
		use fancy_regex::Regex;
		use glob::glob;
		use is_executable::IsExecutable;
		use owo_colors::{OwoColorize, Style, Styled};
		@@ -379,6 +381,73 @@ pub fn get_image_paths(root: PathBuf) -> Vec<PathBuf> {
		pub fn get_parent(path: String) -> String {
		PathBuf::from(PathBuf::from(path).parent().unwrap()).display().to_string()
		}
		/// Get the singular form of a word
		///
		/// Adapted from the PHP library, [Text-Statistics](https://github.com/DaveChild/Text-Statistics)
		pub fn get_singular_form(word: &str) -> String {
		const SINGULAR: [(&str, &str); 28] = [
		(r#"(quiz)zes$"#, r#"${1}"#),
		(r#"(matr)ices$"#, r#"${1}ix"#),
		(r#"(vert\|ind)ices$"#, r#"${1}ex"#),
		(r#"^(ox)en$"#, r#"${1}"#),
		(r#"(alias)es$"#, r#"${1}"#),
		(r#"(octop\|vir)i$"#, r#"${1}us"#),
		(r#"(cris\|ax\|test)es$"#, r#"${1}is"#),
		(r#"(shoe)s$"#, r#"${1}"#),
		(r#"(o)es$"#, r#"${1}"#),
		(r#"(bus)es$"#, r#"${1}"#),
		(r#"([m\|l])ice$"#, r#"${1}ouse"#),
		(r#"(x\|ch\|ss\|sh)es$"#, r#"${1}"#),
		(r#"(m)ovies$"#, r#"${1}ovie"#),
		(r#"(s)eries$"#, r#"${1}eries"#),
		(r#"([^aeiouy]\|qu)ies$"#, r#"${1}y"#),
		(r#"([lr])ves$"#, r#"${1}f"#),
		(r#"(tive)s$"#, r#"${1}"#),
		(r#"(hive)s$"#, r#"${1}"#),
		(r#"(li\|wi\|kni)ves$"#, r#"${1}fe"#),
		(r#"(shea\|loa\|lea\|thie)ves$"#, r#"${1}f"#),
		(r#"(^analy)ses$"#, r#"${1}sis"#),
		(r#"((a)naly\|(b)a\|(d)iagno\|(p)arenthe\|(p)rogno\|(s)ynop\|(t)he)ses$"#, r#"${1}${2}sis"#),
		(r#"([ti])a$"#, r#"${1}um"#),
		(r#"(n)ews$"#, r#"${1}ews"#),
		(r#"(h\|bl)ouses$"#, r#"${1}ouse"#),
		(r#"(corpse)s$"#, r#"${1}"#),
		(r#"(us)es$"#, r#"${1}"#),
		(r#"s$"#, r#""#),
		];
		match word.to_lowercase().as_str() {
		\| value if SAME_SINGULAR_PLURAL.contains(&value) => value.to_string(),
		\| value if IRREGULAR_NOUNS.contains_key(&value) => value.to_string(),
		\| value if IRREGULAR_NOUNS_INVERTED.contains_key(&value) => match IRREGULAR_NOUNS_INVERTED.get(value) {
		\| Some(value) => value.to_string(),
		\| None => value.to_string(),
		},
		\| value => {
		let pair = SINGULAR.iter().find(\|(pattern, _)\| match Regex::new(pattern).unwrap().is_match(value) {
		\| Ok(true) => true,
		\| Ok(false) \| Err(_) => false,
		});
		match pair {
		\| Some((pattern, replacement)) => {
		debug!(pattern, replacement, value, "=> {} Singular form conversion", Label::using());
		let re = Regex::new(pattern).unwrap();
		re.replace_all(value, *replacement).to_string()
		}
		\| None => value.to_string(),
		}
		}
		}
		}
		pub fn get_syllable_count(text: &str) -> usize {
		fn sanitize(value: &str) -> String {
		NON_ALPHABETIC.replace_all(value, "").to_lowercase()
		}
		fn get_syllables(word: String) -> usize {
		word.len()
		}
		let tokens = text.split_whitespace().map(sanitize).collect::<Vec<String>>();
		tokens.into_iter().map(get_syllables).sum()
		}
		#[cfg(any(unix, target_os = "wasi", target_os = "redox"))]
		pub fn make_executable(path: &PathBuf) -> bool {
		use std::os::unix::fs::PermissionsExt;

acorn-lib/src/util/tests/mod.rs

+26 −0

Original line number	Diff line number	Diff line
		@@ -40,6 +40,32 @@ fn test_get_files_from_git() {
		assert!(files.is_empty());
		}
		#[test]
		fn test_get_singular_form() {
		assert_eq!("", get_singular_form(""));
		assert_eq!("man", get_singular_form("men"));
		assert_eq!("aborigine", get_singular_form("aborigines"));
		assert_eq!("banana", get_singular_form("banana"));
		assert_eq!("banana", get_singular_form("bananas"));
		assert_eq!("buffalo", get_singular_form("buffalo"));
		assert_eq!("cafe", get_singular_form("cafes"));
		assert_eq!("goose", get_singular_form("geese"));
		assert_eq!("goose", get_singular_form("goose"));
		assert_eq!("house", get_singular_form("houses"));
		assert_eq!("index", get_singular_form("indices"));
		assert_eq!("matrix", get_singular_form("matrices"));
		assert_eq!("mouse", get_singular_form("mice"));
		assert_eq!("money", get_singular_form("money"));
		assert_eq!("quiz", get_singular_form("quiz"));
		assert_eq!("quiz", get_singular_form("quizzes"));
		assert_eq!("radius", get_singular_form("radii"));
		assert_eq!("vertex", get_singular_form("vertices"));
		}
		#[test]
		fn test_get_syllable_count() {
		assert_eq!(0, get_syllable_count(""));
		assert_eq!(1, get_syllable_count("a"));
		}
		#[test]
		fn test_is_ip6() {}
		#[test]
		fn test_semantic_version() {