Loading acorn-cli/src/cli/arguments.rs +3 −0 Original line number Diff line number Diff line Loading @@ -113,6 +113,9 @@ pub enum Standard { /// Dublin Core Metadata Initiative (DCMI) #[display("dcmi")] Dcmi, /// DOCX-derived text #[display("docx")] Docx, /// InvenioRDM #[display("invenio")] Invenio, Loading acorn-cli/src/commands/check/mod.rs +5 −1 Original line number Diff line number Diff line Loading @@ -8,7 +8,7 @@ use acorn::io::unique_file_extensions; use acorn::prelude::{exit, PathBuf}; use acorn::schema::research_activity::ResearchActivity; use acorn::schema::standard::cff::Cff; use acorn::schema::standard::text::Text; use acorn::schema::standard::text::{Docx, Text}; use acorn::util::constants::ENV_READABILITY_METRIC; use acorn::util::{print_values_as_table, regex_join, Label}; use acorn::{fail, skip}; Loading Loading @@ -92,6 +92,7 @@ fn apply_early_exit_policy(results: Vec<Check>, category: &CheckCategory, option async fn collect(paths: &[PathBuf], check_options: &CheckOptions) -> Vec<Check> { match infer_standard(paths) { | Standard::Cff => collect_checks_for::<Cff>(paths, check_options).await, | Standard::Docx => collect_checks_for::<Docx>(paths, check_options).await, | Standard::Rads => collect_checks_for::<ResearchActivity>(paths, check_options).await, | Standard::Text => collect_checks_for::<Text>(paths, check_options).await, | _ => unimplemented!(), Loading Loading @@ -158,6 +159,9 @@ fn infer_standard(paths: &[PathBuf]) -> Standard { if extensions.len() == 1 && extensions.contains(&"cff".to_string()) { warn!("=> {} Inferred standard (CFF)", Label::using()); Standard::Cff } else if extensions.len() == 1 && extensions.contains(&"docx".to_string()) { warn!("=> {} Inferred standard (DOCX)", Label::using()); Standard::Docx } else if extensions.len() == 1 && extensions.contains(&"txt".to_string()) { warn!("=> {} Inferred standard (Text)", Label::using()); Standard::Text Loading acorn-lib/src/analyzer/check.rs +3 −0 Original line number Diff line number Diff line Loading @@ -94,6 +94,9 @@ pub enum Standard { /// InvenioRDM #[display("invenio")] Invenio, /// DOCX-derived text #[display("docx")] Docx, /// Plain text #[display("text")] Text, Loading acorn-lib/src/analyzer/mod.rs +36 −1 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ use crate::prelude::{self, create_dir_all, remove_file, write, Command, CommandO use crate::schema::pid::{PersistentIdentifier, PersistentIdentifierParse, DOI}; use crate::schema::research_activity::ResearchActivity; use crate::schema::standard::cff::{Cff, Identifier, IdentifierType, Reference}; use crate::schema::standard::text::Text; use crate::schema::standard::text::{Docx, Text}; use crate::schema::{Organization, ProgrammingLanguage, Website}; use crate::util::constants::{APPLICATION, CUSTOM_VALE_PACKAGE_NAME, DEFAULT_VALE_PACKAGE_URL, DEFAULT_VALE_ROOT, VALE_RELEASES_URL, VALE_VERSION}; use crate::util::{is_uri_or_path, Constant, Label, SemanticVersion, StringConversion}; Loading Loading @@ -233,6 +233,41 @@ impl Analysis for Cff { } } #[async_trait] impl Analysis for Docx { fn standard() -> Standard { Standard::Docx } async fn check_prose(paths: &[PathBuf], options: Option<&CheckOptions>) -> Vec<Check> { check_prose_for::<Self>(paths, options).await } async fn check_quality(paths: &[PathBuf], _options: Option<&CheckOptions>) -> Vec<Check> { paths .par_iter() .map(|path| match Self::read(path) { | Ok(_) => check_ok!(CheckCategory::Quality), | Err(why) => check_err!( CheckCategory::Quality, message: "Cannot read DOCX data", context: why.to_string() ), }) .collect() } async fn check_readability(paths: &[PathBuf], options: Option<&CheckOptions>) -> Vec<Check> { check_readability_for::<Self>(paths, options) } async fn check_schema(_paths: &[PathBuf], _options: Option<&CheckOptions>) -> Vec<Check> { vec![] } #[cfg(feature = "std")] async fn check_websites(_paths: &[PathBuf], _options: Option<&CheckOptions>) -> Vec<Check> { vec![] } fn output_path(path: &Path, _data: &Self) -> PathBuf { standard_project_folder("check", None).join(path.to_path_buf().file_name_with_parent()) } } #[async_trait] impl Analysis for ResearchActivity { fn standard() -> Standard { Standard::ResearchActivityData Loading acorn-lib/src/schema/standard/tests/text.rs +42 −3 Original line number Diff line number Diff line #[cfg(feature = "std")] use crate::io::InputOutput; use crate::schema::standard::text::Text; use crate::schema::standard::text::{Docx, Text}; #[cfg(feature = "std")] use crate::test_utils::unique_path; use crate::util::{ToMarkdown, ToProse}; use crate::test_utils::{fixture_path, unique_path}; use crate::util::{Unstructured, ToMarkdown, ToProse}; use validator::Validate; #[test] Loading @@ -13,6 +13,7 @@ fn test_text_to_prose_and_markdown_passthrough() { }; assert_eq!(data.to_prose(), "Line one\n\nLine two"); assert_eq!(data.to_markdown(), "Line one\n\nLine two"); assert_eq!(data.content(), "Line one\n\nLine two"); } #[test] fn test_text_validate_is_noop() { Loading Loading @@ -43,3 +44,41 @@ fn test_text_input_output_rejects_unsupported_extension() { let result = Text::read(output); assert!(result.is_err()); } #[test] fn test_docx_to_prose_and_markdown_passthrough() { let data = Docx { content: "Line one\n\nLine two".to_string(), }; assert_eq!(data.to_prose(), "Line one\n\nLine two"); assert_eq!(data.to_markdown(), "Line one\n\nLine two"); assert_eq!(data.content(), "Line one\n\nLine two"); } #[test] fn test_docx_validate_is_noop() { let data = Docx { content: "any content should validate".to_string(), }; assert!(data.validate().is_ok()); } #[cfg(feature = "std")] #[test] fn test_docx_input_output_read_docx_fixture() { let source = fixture_path("acorn.docx"); let result = Docx::read(source).expect("failed to read docx file"); assert!(result.content.contains("ACORN")); } #[cfg(feature = "std")] #[test] fn test_docx_input_output_write_rejects_docx_extension() { let output = unique_path("docx-io", "docx"); if let Some(parent) = output.parent() { std::fs::create_dir_all(parent).expect("failed to create test_artifacts directory"); } let source = Docx { content: "plain text content".to_string(), }; let result = source.write(output.clone()); assert!(result.is_err()); let _cleanup = std::fs::remove_file(output); } Loading
acorn-cli/src/cli/arguments.rs +3 −0 Original line number Diff line number Diff line Loading @@ -113,6 +113,9 @@ pub enum Standard { /// Dublin Core Metadata Initiative (DCMI) #[display("dcmi")] Dcmi, /// DOCX-derived text #[display("docx")] Docx, /// InvenioRDM #[display("invenio")] Invenio, Loading
acorn-cli/src/commands/check/mod.rs +5 −1 Original line number Diff line number Diff line Loading @@ -8,7 +8,7 @@ use acorn::io::unique_file_extensions; use acorn::prelude::{exit, PathBuf}; use acorn::schema::research_activity::ResearchActivity; use acorn::schema::standard::cff::Cff; use acorn::schema::standard::text::Text; use acorn::schema::standard::text::{Docx, Text}; use acorn::util::constants::ENV_READABILITY_METRIC; use acorn::util::{print_values_as_table, regex_join, Label}; use acorn::{fail, skip}; Loading Loading @@ -92,6 +92,7 @@ fn apply_early_exit_policy(results: Vec<Check>, category: &CheckCategory, option async fn collect(paths: &[PathBuf], check_options: &CheckOptions) -> Vec<Check> { match infer_standard(paths) { | Standard::Cff => collect_checks_for::<Cff>(paths, check_options).await, | Standard::Docx => collect_checks_for::<Docx>(paths, check_options).await, | Standard::Rads => collect_checks_for::<ResearchActivity>(paths, check_options).await, | Standard::Text => collect_checks_for::<Text>(paths, check_options).await, | _ => unimplemented!(), Loading Loading @@ -158,6 +159,9 @@ fn infer_standard(paths: &[PathBuf]) -> Standard { if extensions.len() == 1 && extensions.contains(&"cff".to_string()) { warn!("=> {} Inferred standard (CFF)", Label::using()); Standard::Cff } else if extensions.len() == 1 && extensions.contains(&"docx".to_string()) { warn!("=> {} Inferred standard (DOCX)", Label::using()); Standard::Docx } else if extensions.len() == 1 && extensions.contains(&"txt".to_string()) { warn!("=> {} Inferred standard (Text)", Label::using()); Standard::Text Loading
acorn-lib/src/analyzer/check.rs +3 −0 Original line number Diff line number Diff line Loading @@ -94,6 +94,9 @@ pub enum Standard { /// InvenioRDM #[display("invenio")] Invenio, /// DOCX-derived text #[display("docx")] Docx, /// Plain text #[display("text")] Text, Loading
acorn-lib/src/analyzer/mod.rs +36 −1 Original line number Diff line number Diff line Loading @@ -13,7 +13,7 @@ use crate::prelude::{self, create_dir_all, remove_file, write, Command, CommandO use crate::schema::pid::{PersistentIdentifier, PersistentIdentifierParse, DOI}; use crate::schema::research_activity::ResearchActivity; use crate::schema::standard::cff::{Cff, Identifier, IdentifierType, Reference}; use crate::schema::standard::text::Text; use crate::schema::standard::text::{Docx, Text}; use crate::schema::{Organization, ProgrammingLanguage, Website}; use crate::util::constants::{APPLICATION, CUSTOM_VALE_PACKAGE_NAME, DEFAULT_VALE_PACKAGE_URL, DEFAULT_VALE_ROOT, VALE_RELEASES_URL, VALE_VERSION}; use crate::util::{is_uri_or_path, Constant, Label, SemanticVersion, StringConversion}; Loading Loading @@ -233,6 +233,41 @@ impl Analysis for Cff { } } #[async_trait] impl Analysis for Docx { fn standard() -> Standard { Standard::Docx } async fn check_prose(paths: &[PathBuf], options: Option<&CheckOptions>) -> Vec<Check> { check_prose_for::<Self>(paths, options).await } async fn check_quality(paths: &[PathBuf], _options: Option<&CheckOptions>) -> Vec<Check> { paths .par_iter() .map(|path| match Self::read(path) { | Ok(_) => check_ok!(CheckCategory::Quality), | Err(why) => check_err!( CheckCategory::Quality, message: "Cannot read DOCX data", context: why.to_string() ), }) .collect() } async fn check_readability(paths: &[PathBuf], options: Option<&CheckOptions>) -> Vec<Check> { check_readability_for::<Self>(paths, options) } async fn check_schema(_paths: &[PathBuf], _options: Option<&CheckOptions>) -> Vec<Check> { vec![] } #[cfg(feature = "std")] async fn check_websites(_paths: &[PathBuf], _options: Option<&CheckOptions>) -> Vec<Check> { vec![] } fn output_path(path: &Path, _data: &Self) -> PathBuf { standard_project_folder("check", None).join(path.to_path_buf().file_name_with_parent()) } } #[async_trait] impl Analysis for ResearchActivity { fn standard() -> Standard { Standard::ResearchActivityData Loading
acorn-lib/src/schema/standard/tests/text.rs +42 −3 Original line number Diff line number Diff line #[cfg(feature = "std")] use crate::io::InputOutput; use crate::schema::standard::text::Text; use crate::schema::standard::text::{Docx, Text}; #[cfg(feature = "std")] use crate::test_utils::unique_path; use crate::util::{ToMarkdown, ToProse}; use crate::test_utils::{fixture_path, unique_path}; use crate::util::{Unstructured, ToMarkdown, ToProse}; use validator::Validate; #[test] Loading @@ -13,6 +13,7 @@ fn test_text_to_prose_and_markdown_passthrough() { }; assert_eq!(data.to_prose(), "Line one\n\nLine two"); assert_eq!(data.to_markdown(), "Line one\n\nLine two"); assert_eq!(data.content(), "Line one\n\nLine two"); } #[test] fn test_text_validate_is_noop() { Loading Loading @@ -43,3 +44,41 @@ fn test_text_input_output_rejects_unsupported_extension() { let result = Text::read(output); assert!(result.is_err()); } #[test] fn test_docx_to_prose_and_markdown_passthrough() { let data = Docx { content: "Line one\n\nLine two".to_string(), }; assert_eq!(data.to_prose(), "Line one\n\nLine two"); assert_eq!(data.to_markdown(), "Line one\n\nLine two"); assert_eq!(data.content(), "Line one\n\nLine two"); } #[test] fn test_docx_validate_is_noop() { let data = Docx { content: "any content should validate".to_string(), }; assert!(data.validate().is_ok()); } #[cfg(feature = "std")] #[test] fn test_docx_input_output_read_docx_fixture() { let source = fixture_path("acorn.docx"); let result = Docx::read(source).expect("failed to read docx file"); assert!(result.content.contains("ACORN")); } #[cfg(feature = "std")] #[test] fn test_docx_input_output_write_rejects_docx_extension() { let output = unique_path("docx-io", "docx"); if let Some(parent) = output.parent() { std::fs::create_dir_all(parent).expect("failed to create test_artifacts directory"); } let source = Docx { content: "plain text content".to_string(), }; let result = source.write(output.clone()); assert!(result.is_err()); let _cleanup = std::fs::remove_file(output); }