Loading acorn-lib/src/io/mod.rs +46 −5 Original line number Diff line number Diff line Loading @@ -23,7 +23,7 @@ use crate::fail; use crate::prelude::{canonicalize, create_dir_all, io, var, BufReader, Cursor, Error, File, PathBuf, Read, Write}; #[cfg(any(unix, target_os = "wasi", target_os = "redox"))] use crate::prelude::{set_permissions, Permissions, PermissionsExt}; use crate::util::constants::{APPLICATION, ORGANIZATION, QUALIFIER}; use crate::util::constants::{APPLICATION, LARGE_FILE_THRESHOLD_BYTES, ORGANIZATION, QUALIFIER}; #[cfg(windows)] use crate::util::file_extension; use crate::util::{generate_guid, suffix, Label, MimeType, SemanticVersion, ToAbsoluteString, ToStrings}; Loading Loading @@ -876,6 +876,43 @@ where /// .collect(); /// ``` pub fn read_file<P>(path: P) -> Result<String, io::Error> where P: Into<PathBuf> + Clone + Send, { let path_buf = path.into(); let filename = path_buf.file_name().unwrap_or_default().to_string_lossy().to_string(); let is_large_file = match path_buf.metadata() { | Ok(metadata) => metadata.len() >= LARGE_FILE_THRESHOLD_BYTES, | Err(_) => false, }; if is_large_file { trace!(filename, "=> {} Read file with large-file strategy", Label::using()); read_large_file(path_buf) } else { match File::open(&path_buf) { | Ok(file) => { let mut reader = BufReader::new(file); let mut content = String::new(); match reader.read_to_string(&mut content) { | Ok(_) => Ok(content), | Err(why) => { error!(filename, "=> {} Read file content", Label::fail()); Err(why) } } } | Err(why) => { error!(filename, "=> {} Read file", Label::fail()); Err(why) } } } } /// Reads large files and returns the contents as a string. /// /// This function uses a larger buffered reader and pre-allocates the output string /// using file metadata when available. pub fn read_large_file<P>(path: P) -> Result<String, io::Error> where P: Into<PathBuf> + Clone + Send, { Loading @@ -884,16 +921,20 @@ where let file = match File::open(&path_buf) { | Ok(file) => file, | Err(why) => { error!(filename, "=> {} Read file", Label::fail()); error!(filename, "=> {} Read large file", Label::fail()); return Err(why); } }; let mut reader = BufReader::new(file); let mut content = String::new(); let capacity = match file.metadata() { | Ok(metadata) => usize::try_from(metadata.len()).unwrap_or(0), | Err(_) => 0, }; let mut reader = BufReader::with_capacity(1024 * 1024, file); let mut content = if capacity > 0 { String::with_capacity(capacity) } else { String::new() }; match reader.read_to_string(&mut content) { | Ok(_) => Ok(content), | Err(why) => { error!(filename, "=> {} Read file content", Label::fail()); error!(filename, "=> {} Read large file content", Label::fail()); Err(why) } } Loading acorn-lib/src/io/tests/mod.rs +22 −2 Original line number Diff line number Diff line Loading @@ -4,10 +4,10 @@ use crate::io::bagit::{Bag, BagInfo, Save}; use crate::io::config::{ApplicationConfiguration, Bucket}; use crate::io::{ archive, file_checksum, files_all, files_from_git_branch, files_from_git_commit, files_from_gitlab_merge_request, filter_git_command_result, filter_ignored, image_paths, InputOutput, filter_ignored, image_paths, read_file, read_large_file, InputOutput, }; use crate::{Location, Repository, Scheme}; use std::fs::{create_dir_all, read_to_string, remove_dir_all}; use std::fs::{create_dir_all, read_to_string, remove_dir_all, remove_file, write}; use std::path::PathBuf; use std::time::{SystemTime, UNIX_EPOCH}; Loading Loading @@ -423,3 +423,23 @@ fn test_image_paths() { let files = image_paths(path); assert_eq!(files.len(), 0); } #[test] fn test_read_large_file() { let stamp = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos(); let path = std::env::temp_dir().join(format!("acorn-read-large-file-{stamp}.txt")); let expected = "ACORN ".repeat(500_000); write(path.clone(), expected.clone()).unwrap(); let content = read_large_file(path.clone()).unwrap(); assert_eq!(content, expected); remove_file(path).unwrap(); } #[test] fn test_read_file_large_content() { let stamp = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos(); let path = std::env::temp_dir().join(format!("acorn-read-file-large-{stamp}.txt")); let expected = "ACORN-LARGE ".repeat(800_000); write(path.clone(), expected.clone()).unwrap(); let content = read_file(path.clone()).unwrap(); assert_eq!(content, expected); remove_file(path).unwrap(); } acorn-lib/src/util/constants.rs +2 −0 Original line number Diff line number Diff line Loading @@ -62,6 +62,8 @@ pub const APPLICATION: &str = "acorn"; pub const ORGANIZATION: &str = "ornl"; /// Organization qualifier pub const QUALIFIER: &str = "org"; /// File size threshold in bytes for switching to large-file read strategy (100 MB). pub const LARGE_FILE_THRESHOLD_BYTES: u64 = 100 * 1024 * 1024; // Schema defaults /// Default affiliation pub const DEFAULT_AFFILIATION: &str = "Oak Ridge National Laboratory"; Loading lcov.info +2527 −2557 File changed.Preview size limit exceeded, changes collapsed. Show changes Loading
acorn-lib/src/io/mod.rs +46 −5 Original line number Diff line number Diff line Loading @@ -23,7 +23,7 @@ use crate::fail; use crate::prelude::{canonicalize, create_dir_all, io, var, BufReader, Cursor, Error, File, PathBuf, Read, Write}; #[cfg(any(unix, target_os = "wasi", target_os = "redox"))] use crate::prelude::{set_permissions, Permissions, PermissionsExt}; use crate::util::constants::{APPLICATION, ORGANIZATION, QUALIFIER}; use crate::util::constants::{APPLICATION, LARGE_FILE_THRESHOLD_BYTES, ORGANIZATION, QUALIFIER}; #[cfg(windows)] use crate::util::file_extension; use crate::util::{generate_guid, suffix, Label, MimeType, SemanticVersion, ToAbsoluteString, ToStrings}; Loading Loading @@ -876,6 +876,43 @@ where /// .collect(); /// ``` pub fn read_file<P>(path: P) -> Result<String, io::Error> where P: Into<PathBuf> + Clone + Send, { let path_buf = path.into(); let filename = path_buf.file_name().unwrap_or_default().to_string_lossy().to_string(); let is_large_file = match path_buf.metadata() { | Ok(metadata) => metadata.len() >= LARGE_FILE_THRESHOLD_BYTES, | Err(_) => false, }; if is_large_file { trace!(filename, "=> {} Read file with large-file strategy", Label::using()); read_large_file(path_buf) } else { match File::open(&path_buf) { | Ok(file) => { let mut reader = BufReader::new(file); let mut content = String::new(); match reader.read_to_string(&mut content) { | Ok(_) => Ok(content), | Err(why) => { error!(filename, "=> {} Read file content", Label::fail()); Err(why) } } } | Err(why) => { error!(filename, "=> {} Read file", Label::fail()); Err(why) } } } } /// Reads large files and returns the contents as a string. /// /// This function uses a larger buffered reader and pre-allocates the output string /// using file metadata when available. pub fn read_large_file<P>(path: P) -> Result<String, io::Error> where P: Into<PathBuf> + Clone + Send, { Loading @@ -884,16 +921,20 @@ where let file = match File::open(&path_buf) { | Ok(file) => file, | Err(why) => { error!(filename, "=> {} Read file", Label::fail()); error!(filename, "=> {} Read large file", Label::fail()); return Err(why); } }; let mut reader = BufReader::new(file); let mut content = String::new(); let capacity = match file.metadata() { | Ok(metadata) => usize::try_from(metadata.len()).unwrap_or(0), | Err(_) => 0, }; let mut reader = BufReader::with_capacity(1024 * 1024, file); let mut content = if capacity > 0 { String::with_capacity(capacity) } else { String::new() }; match reader.read_to_string(&mut content) { | Ok(_) => Ok(content), | Err(why) => { error!(filename, "=> {} Read file content", Label::fail()); error!(filename, "=> {} Read large file content", Label::fail()); Err(why) } } Loading
acorn-lib/src/io/tests/mod.rs +22 −2 Original line number Diff line number Diff line Loading @@ -4,10 +4,10 @@ use crate::io::bagit::{Bag, BagInfo, Save}; use crate::io::config::{ApplicationConfiguration, Bucket}; use crate::io::{ archive, file_checksum, files_all, files_from_git_branch, files_from_git_commit, files_from_gitlab_merge_request, filter_git_command_result, filter_ignored, image_paths, InputOutput, filter_ignored, image_paths, read_file, read_large_file, InputOutput, }; use crate::{Location, Repository, Scheme}; use std::fs::{create_dir_all, read_to_string, remove_dir_all}; use std::fs::{create_dir_all, read_to_string, remove_dir_all, remove_file, write}; use std::path::PathBuf; use std::time::{SystemTime, UNIX_EPOCH}; Loading Loading @@ -423,3 +423,23 @@ fn test_image_paths() { let files = image_paths(path); assert_eq!(files.len(), 0); } #[test] fn test_read_large_file() { let stamp = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos(); let path = std::env::temp_dir().join(format!("acorn-read-large-file-{stamp}.txt")); let expected = "ACORN ".repeat(500_000); write(path.clone(), expected.clone()).unwrap(); let content = read_large_file(path.clone()).unwrap(); assert_eq!(content, expected); remove_file(path).unwrap(); } #[test] fn test_read_file_large_content() { let stamp = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos(); let path = std::env::temp_dir().join(format!("acorn-read-file-large-{stamp}.txt")); let expected = "ACORN-LARGE ".repeat(800_000); write(path.clone(), expected.clone()).unwrap(); let content = read_file(path.clone()).unwrap(); assert_eq!(content, expected); remove_file(path).unwrap(); }
acorn-lib/src/util/constants.rs +2 −0 Original line number Diff line number Diff line Loading @@ -62,6 +62,8 @@ pub const APPLICATION: &str = "acorn"; pub const ORGANIZATION: &str = "ornl"; /// Organization qualifier pub const QUALIFIER: &str = "org"; /// File size threshold in bytes for switching to large-file read strategy (100 MB). pub const LARGE_FILE_THRESHOLD_BYTES: u64 = 100 * 1024 * 1024; // Schema defaults /// Default affiliation pub const DEFAULT_AFFILIATION: &str = "Oak Ridge National Laboratory"; Loading