Loading ADL/api/app.py +5 −0 Original line number Diff line number Diff line Loading @@ -20,3 +20,8 @@ async def startup() -> None: async def shutdown() -> None: if database.is_connected: await database.disconnect() @app.get('/healthz') async def health_and_liveness_probe(): return {'status': 'healthy'} ADL/api/models.py +4 −0 Original line number Diff line number Diff line from datetime import datetime import databases import sqlalchemy from ormar import Integer, Model, String Loading Loading @@ -48,6 +50,8 @@ class Files(ARMPath): datastream: str = String(max_length=30) md5_checksum: str = String(max_length=32, min_length=32, regex=r'[a-fA-F\d]+') type: str = "file" start_time: datetime end_time: datetime @property_field def name(self) -> str: Loading ADL/api/v1.py +19 −11 Original line number Diff line number Diff line from datetime import datetime from typing import List from fastapi.routing import APIRouter Loading @@ -9,29 +10,24 @@ from .models import Datastreams, Files, Sites router = APIRouter(route_class=versioned_api_route(1)) @router.get('/healthz') async def health_and_liveness_probe(): return {'status': 'healthy'} @router.get("/fs/", response_model=List[Sites]) async def list_sites(): # if config.debug: # print(Sites.objects.build_select_expression()) if config.debug: print(Sites.objects.build_select_expression()) return await Sites.objects.all() @router.get("/fs/{site}", response_model=List[Datastreams]) async def list_datastreams_for_site(site: Sites.site_code): # if config.debug: # print(Datastreams.objects.build_select_expression()) if config.debug: print(Datastreams.objects.build_select_expression()) return await Datastreams.objects.filter(site_code=site).all() @router.get('/fs/{site}/{datastream}', response_model=List[Files]) async def list_files_for_datastream(site: Sites.site_code, datastream: Datastreams.datastream): # if config.debug: # print(Files.objects.build_select_expression()) if config.debug: print(Files.objects.build_select_expression()) return await Files.objects.filter(datastream=datastream).all() Loading @@ -41,3 +37,15 @@ async def stat_file(site: Sites.site_code, datastream: Datastreams.datastream, f print(Files.objects.filter(versioned_filename=filename).build_select_expression()) return await Files.objects.filter(versioned_filename=filename).get() @router.get('/search', response_model=List[Files]) async def search(datastream: Files.datastream, start: datetime, end: datetime): results = Files.objects.filter(start_time__gte=start, end_time__lte=end) if datastream: results = results.filter(datastream=datastream) if config.debug: print(results.build_select_expression()) return await results.get() ADL/client/archive.py 0 → 100644 +53 −0 Original line number Diff line number Diff line import json import re from datetime import datetime import pandas as pd import xarray as xr ATTR_REGEX = r'^[a-z]{3}\w*[A-Z][0-9]*\.[a-z\d]\d:att:(\S+)\s+val\s+(.+)$' SITE_REGEX = r'^[a-z]{3}$' DATASTREAM_REGEX = r'^([a-z]{3})\w*([A-Z][0-9]*)\.([a-z\d]\d)$' class Archive(object): def __init__(self, username, password): self._selected_sites = None self._selected_datastreams = None self._selected_time_start = datetime.mix self._selected_time_stop = datetime.max self.attributes = {} self.token = None def __getitem__(self, key): if isinstance(key, str): if re.match(SITE_REGEX, key): self._selected_sites = set({key}) if re.match(DATASTREAM_REGEX, key): self._selected_datastreams = set({key}) elif isinstance(key, slice): if key.start: self._selected_time_start = pd.to_datetime(key.start) if key.stop: self._selected_time_stop = pd.to_datetime(key.stop) return self @property def file_count(self): if not self._selected_datastreams: return 29796186 if self._selected_time_start: return (self._selected_time_stop - self._selected_time_start).days @property def file_list(self): return ( f'https://meta-archive.arm.gov/sgp/sgpaerioe1turnC1.c1/sgpaerioe1turnC1.c1.{file_date.strftime("%Y%m%d")}.0000.nc.v0' for file_date in pd.date_range(self._selected_time_start, self._selected_time_stop) ) def to_xarray(self): return xr.open_mfdataset(self.file_list) def __repr__(self): attr_list = ', '.join(f'{attr}: {getattr(self, attr)}' for attr in ('_selected_time_start', '_selected_time_stop', '_selected_sites', '_selected_datastreams')) return f'Archive({attr_list})' ADL/client/fs.py +1 −0 Original line number Diff line number Diff line Loading @@ -6,6 +6,7 @@ from . import config class ARMFs(HTTPFileSystem): protocol = 'arm' def __init__(self, *args, **storage_options): Loading Loading
ADL/api/app.py +5 −0 Original line number Diff line number Diff line Loading @@ -20,3 +20,8 @@ async def startup() -> None: async def shutdown() -> None: if database.is_connected: await database.disconnect() @app.get('/healthz') async def health_and_liveness_probe(): return {'status': 'healthy'}
ADL/api/models.py +4 −0 Original line number Diff line number Diff line from datetime import datetime import databases import sqlalchemy from ormar import Integer, Model, String Loading Loading @@ -48,6 +50,8 @@ class Files(ARMPath): datastream: str = String(max_length=30) md5_checksum: str = String(max_length=32, min_length=32, regex=r'[a-fA-F\d]+') type: str = "file" start_time: datetime end_time: datetime @property_field def name(self) -> str: Loading
ADL/api/v1.py +19 −11 Original line number Diff line number Diff line from datetime import datetime from typing import List from fastapi.routing import APIRouter Loading @@ -9,29 +10,24 @@ from .models import Datastreams, Files, Sites router = APIRouter(route_class=versioned_api_route(1)) @router.get('/healthz') async def health_and_liveness_probe(): return {'status': 'healthy'} @router.get("/fs/", response_model=List[Sites]) async def list_sites(): # if config.debug: # print(Sites.objects.build_select_expression()) if config.debug: print(Sites.objects.build_select_expression()) return await Sites.objects.all() @router.get("/fs/{site}", response_model=List[Datastreams]) async def list_datastreams_for_site(site: Sites.site_code): # if config.debug: # print(Datastreams.objects.build_select_expression()) if config.debug: print(Datastreams.objects.build_select_expression()) return await Datastreams.objects.filter(site_code=site).all() @router.get('/fs/{site}/{datastream}', response_model=List[Files]) async def list_files_for_datastream(site: Sites.site_code, datastream: Datastreams.datastream): # if config.debug: # print(Files.objects.build_select_expression()) if config.debug: print(Files.objects.build_select_expression()) return await Files.objects.filter(datastream=datastream).all() Loading @@ -41,3 +37,15 @@ async def stat_file(site: Sites.site_code, datastream: Datastreams.datastream, f print(Files.objects.filter(versioned_filename=filename).build_select_expression()) return await Files.objects.filter(versioned_filename=filename).get() @router.get('/search', response_model=List[Files]) async def search(datastream: Files.datastream, start: datetime, end: datetime): results = Files.objects.filter(start_time__gte=start, end_time__lte=end) if datastream: results = results.filter(datastream=datastream) if config.debug: print(results.build_select_expression()) return await results.get()
ADL/client/archive.py 0 → 100644 +53 −0 Original line number Diff line number Diff line import json import re from datetime import datetime import pandas as pd import xarray as xr ATTR_REGEX = r'^[a-z]{3}\w*[A-Z][0-9]*\.[a-z\d]\d:att:(\S+)\s+val\s+(.+)$' SITE_REGEX = r'^[a-z]{3}$' DATASTREAM_REGEX = r'^([a-z]{3})\w*([A-Z][0-9]*)\.([a-z\d]\d)$' class Archive(object): def __init__(self, username, password): self._selected_sites = None self._selected_datastreams = None self._selected_time_start = datetime.mix self._selected_time_stop = datetime.max self.attributes = {} self.token = None def __getitem__(self, key): if isinstance(key, str): if re.match(SITE_REGEX, key): self._selected_sites = set({key}) if re.match(DATASTREAM_REGEX, key): self._selected_datastreams = set({key}) elif isinstance(key, slice): if key.start: self._selected_time_start = pd.to_datetime(key.start) if key.stop: self._selected_time_stop = pd.to_datetime(key.stop) return self @property def file_count(self): if not self._selected_datastreams: return 29796186 if self._selected_time_start: return (self._selected_time_stop - self._selected_time_start).days @property def file_list(self): return ( f'https://meta-archive.arm.gov/sgp/sgpaerioe1turnC1.c1/sgpaerioe1turnC1.c1.{file_date.strftime("%Y%m%d")}.0000.nc.v0' for file_date in pd.date_range(self._selected_time_start, self._selected_time_stop) ) def to_xarray(self): return xr.open_mfdataset(self.file_list) def __repr__(self): attr_list = ', '.join(f'{attr}: {getattr(self, attr)}' for attr in ('_selected_time_start', '_selected_time_stop', '_selected_sites', '_selected_datastreams')) return f'Archive({attr_list})'
ADL/client/fs.py +1 −0 Original line number Diff line number Diff line Loading @@ -6,6 +6,7 @@ from . import config class ARMFs(HTTPFileSystem): protocol = 'arm' def __init__(self, *args, **storage_options): Loading