Yes, sure. This is my last test, decreasing the sharding and disabling partitioning:
CREATE TABLE IF NOT EXISTS "doc"."metrics" (
"timestamp" TIMESTAMP WITHOUT TIME ZONE NOT NULL,
"labels_hash" TEXT NOT NULL,
"labels" OBJECT(DYNAMIC) AS (
"instance" TEXT,
"prometheus" TEXT,
"job" TEXT,
"__name__" TEXT,
"quantile" TEXT,
"le" TEXT,
"dialer_name" TEXT,
"version" TEXT,
"reason" TEXT,
"listener_name" TEXT,
"slice" TEXT,
"handler" TEXT,
"code" TEXT,
"goversion" TEXT,
"goarch" TEXT,
"goos" TEXT,
"branch" TEXT,
"revision" TEXT,
"tags" TEXT,
"alertmanager" TEXT,
"remote_name" TEXT,
"url" TEXT,
"role" TEXT,
"event" TEXT,
"call" TEXT,
"endpoint" TEXT,
"name" TEXT,
"config" TEXT,
"interval" TEXT,
"scrape_job" TEXT,
"type" TEXT,
"consumer" TEXT,
"appid" TEXT,
"interface" TEXT,
"taskid" TEXT,
"cpu" TEXT,
"hash" TEXT,
"core" TEXT,
"package" TEXT,
"device" TEXT,
"mode" TEXT,
"controller" TEXT,
"csrow" TEXT,
"fstype" TEXT,
"mountpoint" TEXT,
"chip" TEXT,
"sensor" TEXT,
"chip_name" TEXT,
"label" TEXT,
"broadcast" TEXT,
"address" TEXT,
"duplex" TEXT,
"operstate" TEXT,
"collector" TEXT,
"nodename" TEXT,
"machine" TEXT,
"release" TEXT,
"sysname" TEXT,
"domainname" TEXT,
"file" TEXT,
"master" TEXT,
"generation" TEXT,
"major" TEXT,
"minor" TEXT,
"patchlevel" TEXT,
"implementation" TEXT,
"cluster" TEXT,
"clientid" TEXT,
"topic" TEXT,
"delayedoperation" TEXT,
"request" TEXT,
"error" TEXT,
"networkprocessor" TEXT,
"partition" TEXT,
"value" TEXT,
"environment" TEXT,
"exported_instance" TEXT,
"table" TEXT,
"mechanism" TEXT,
"database" TEXT,
"_target" TEXT,
"state" TEXT,
"stage" TEXT,
"node" TEXT,
"meta_package" TEXT,
"meta_hostname" TEXT,
"meta_ip_address" TEXT,
"method" TEXT
),
"value" DOUBLE PRECISION,
"valueRaw" BIGINT,
PRIMARY KEY ("timestamp", "labels_hash")
)
CLUSTERED INTO 3 SHARDS
WITH (
"allocation.max_retries" = 5,
"blocks.metadata" = false,
"blocks.read" = false,
"blocks.read_only" = false,
"blocks.read_only_allow_delete" = false,
"blocks.write" = false,
codec = 'default',
column_policy = 'strict',
"mapping.total_fields.limit" = 1000,
max_ngram_diff = 1,
max_shingle_diff = 3,
number_of_replicas = '0-1',
"routing.allocation.enable" = 'all',
"routing.allocation.total_shards_per_node" = -1,
"store.type" = 'fs',
"translog.durability" = 'REQUEST',
"translog.flush_threshold_size" = 536870912,
"translog.sync_interval" = 5000,
"unassigned.node_left.delayed_timeout" = 60000,
"write.wait_for_active_shards" = '1'
)
I initially created the table as the documentation suggested:
"timestamp" TIMESTAMP,
"labels_hash" STRING,
"labels" OBJECT(DYNAMIC),
"value" DOUBLE,
"valueRaw" LONG,
"day__generated" TIMESTAMP GENERATED ALWAYS AS date_trunc('day', "timestamp"),
PRIMARY KEY ("timestamp", "labels_hash", "day__generated")
) PARTITIONED BY ("day__generated");
And another test was to define every field in my dynamic object with INDEX OFF, but the disk usage was pretty much the same.
For the memory part, I don’t have anything, sorry. I removed the metrics table in CrateDB many times, and also restarted both containers multiple times. Also the strange part of it is that I stopped prometheus and the adapter was still consuming the same amount of memory and CPU!