Commit 03c0ee7e authored by Hines, Jesse's avatar Hines, Jesse
Browse files

Ad script to submit druid ingests

Also fix fugaku ingest causing ram issues
parent 74963e40
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -30,11 +30,11 @@ docker compose up --wait
```
The API server will be hosted on http://localhost:8081. The dashboard will be hosted on http://localhost:8080.

You'll need at least 16 GiB of RAM, preferably 32 GiB for druid and RAPS to run smoothly.
You'll need at least 32 GiB of RAM for druid and RAPS to run smoothly.

If you want to run replay data locally, you'll need to download the datasets and then ingest them in
Druid. You can fetch the datasets with `./scripts/fetch.sh` and submit the druid ingests for them
under `./druid_ingests` using the Druid UI at http://localhost:8888.
Druid. You can fetch the datasets with `./scripts/fetch_data.sh`, and use the `./scripts/submit_data_ingests.py`
script to ingest them into druid.

View the server logs with:
```bash
+2 −38
Original line number Diff line number Diff line
@@ -3,13 +3,6 @@
    "spec": {
        "ioConfig": {
            "type": "index_parallel",
            // "inputSource": {
            //     "type": "s3",
            //     "objectGlob": "**.parquet",
            //     "prefixes": [
            //         "s3://scratch/raps-datasets/fugaku/"
            //     ]
            // },
            "inputSource": {
                "type": "local",
                "baseDir": "/data/fugaku/",
@@ -25,7 +18,8 @@
                "type": "dynamic"
            },
            "maxNumConcurrentSubTasks": 2,
            "maxRowsInMemory": 100000
            "maxRowsInMemory": 100000,
            "awaitSegmentAvailabilityTimeoutMillis": 1800000
        },
        "dataSchema": {
            "dataSource": "svc-ts-exadigit-data-fugaku",
@@ -39,36 +33,6 @@
                        "name": "__time",
                        "type": "expression",
                        "expression": "timestamp_parse(sdt)"
                    },
                    {
                        "name": "adt",
                        "type": "expression",
                        "expression": "timestamp_format(timestamp_parse(adt))"
                    },
                    {
                        "name": "qdt",
                        "type": "expression",
                        "expression": "timestamp_format(timestamp_parse(qdt))"
                    },
                    {
                        "name": "schedsdt",
                        "type": "expression",
                        "expression": "timestamp_format(timestamp_parse(schedsdt))"
                    },
                    {
                        "name": "deldt",
                        "type": "expression",
                        "expression": "timestamp_format(timestamp_parse(deldt))"
                    },
                    {
                        "name": "sdt",
                        "type": "expression",
                        "expression": "timestamp_format(timestamp_parse(sdt))"
                    },
                    {
                        "name": "edt",
                        "type": "expression",
                        "expression": "timestamp_format(timestamp_parse(edt))"
                    }
                ]
            },
+2 −7
Original line number Diff line number Diff line
@@ -3,12 +3,6 @@
    "spec": {
        "ioConfig": {
            "type": "index_parallel",
            // "inputSource": {
            //     "type": "s3",
            //     "prefixes": [
            //         "s3://scratch/raps-datasets/lassen/final_csm_allocation_history_hashed.csv"
            //     ]
            // },
            "inputSource": {
                "type": "local",
                "baseDir": "/data/lassen/final_csm_allocation_history_hashed.csv",
@@ -25,7 +19,8 @@
                "type": "dynamic"
            },
            "maxNumConcurrentSubTasks": 2,
            "maxRowsInMemory": 100000
            "maxRowsInMemory": 100000,
            "awaitSegmentAvailabilityTimeoutMillis": 1800000
        },
        "dataSchema": {
            "dataSource": "svc-ts-exadigit-data-lassen-allocation-history",
+2 −7
Original line number Diff line number Diff line
@@ -3,12 +3,6 @@
    "spec": {
        "ioConfig": {
            "type": "index_parallel",
            // "inputSource": {
            //     "type": "s3",
            //     "prefixes": [
            //         "s3://scratch/raps-datasets/lassen/final_csm_allocation_node_history_with_time.csv"
            //     ]
            // },
            "inputSource": {
                "type": "local",
                "baseDir": "/data/lassen/final_csm_allocation_node_history_with_time.csv",
@@ -25,7 +19,8 @@
                "type": "dynamic"
            },
            "maxNumConcurrentSubTasks": 2,
            "maxRowsInMemory": 100000
            "maxRowsInMemory": 100000,
            "awaitSegmentAvailabilityTimeoutMillis": 1800000
        },
        "dataSchema": {
            "dataSource": "svc-ts-exadigit-data-lassen-node-history",
+3 −8
Original line number Diff line number Diff line
@@ -3,12 +3,6 @@
    "spec": {
        "ioConfig": {
            "type": "index_parallel",
            // "inputSource": {
            //     "type": "s3",
            //     "prefixes": [
            //         "s3://scratch/raps-datasets/lassen/final_csm_step_history.csv"
            //     ]
            // },
            "inputSource": {
                "type": "local",
                "baseDir": "/data/lassen/final_csm_step_history.csv",
@@ -25,10 +19,11 @@
                "type": "dynamic"
            },
            "maxNumConcurrentSubTasks": 2,
            "maxRowsInMemory": 100000
            "maxRowsInMemory": 100000,
            "awaitSegmentAvailabilityTimeoutMillis": 1800000
        },
        "dataSchema": {
            "dataSource": "svc-ts-exadigit-data-fugaku-lassen-step-history",
            "dataSource": "svc-ts-exadigit-data-lassen-step-history",
            "timestampSpec": {
                "column": "!!!_no_such_column_!!!",
                "missingValue": "2010-01-01T00:00:00Z"
Loading