Hello all,
We’ve done quite a bit or rationalization on our data by changing the compression and removing indexes and changing fields sizes down, is there any potential space saving that we might have missed ?.
I know its quite subjective topic, but I’m wondering if storing data in arrays might help ?
The schema we have is below, with indexes turned off on columns we can afford to.
CREATE TABLE IF NOT EXISTS "doc"."safe_1526595589" (
"ts" TIMESTAMP WITH TIME ZONE,
"meta_8" INTEGER,
"a12" REAL INDEX OFF,
"a13" REAL INDEX OFF,
"f" REAL INDEX OFF,
"In" DOUBLE PRECISION INDEX OFF,
"t" REAL,
"system_6" REAL,
"system_7" REAL,
"system_8" REAL,
"e" DOUBLE PRECISION INDEX OFF,
"ex" DOUBLE PRECISION INDEX OFF,
"re" DOUBLE PRECISION INDEX OFF,
"rex" DOUBLE PRECISION INDEX OFF,
"ae" DOUBLE PRECISION INDEX OFF,
"e1" DOUBLE PRECISION INDEX OFF,
"ex1" DOUBLE PRECISION INDEX OFF,
"re1" DOUBLE PRECISION INDEX OFF,
"rex1" DOUBLE PRECISION INDEX OFF,
"ae1" DOUBLE PRECISION INDEX OFF,
"e2" DOUBLE PRECISION INDEX OFF,
"ex2" DOUBLE PRECISION INDEX OFF,
"re2" DOUBLE PRECISION INDEX OFF,
"rex2" DOUBLE PRECISION INDEX OFF,
"ae2" DOUBLE PRECISION INDEX OFF,
"e3" DOUBLE PRECISION INDEX OFF,
"ex3" DOUBLE PRECISION INDEX OFF,
"re3" DOUBLE PRECISION INDEX OFF,
"rex3" DOUBLE PRECISION INDEX OFF,
"ae3" DOUBLE PRECISION INDEX OFF,
"pulse" INTEGER,
"counter_22" REAL,
"counter_23" REAL,
"counter_24" REAL,
"v" REAL INDEX OFF,
"u" REAL INDEX OFF,
"i" REAL INDEX OFF,
"p" REAL INDEX OFF,
"q" REAL INDEX OFF,
"s" REAL INDEX OFF,
"pf" REAL INDEX OFF,
"d" REAL INDEX OFF,
"v1" REAL INDEX OFF,
"u1" REAL INDEX OFF,
"i1" REAL INDEX OFF,
"p1" REAL INDEX OFF,
"q1" REAL INDEX OFF,
"s1" REAL INDEX OFF,
"pf1" REAL INDEX OFF,
"d1" REAL INDEX OFF,
"v2" REAL INDEX OFF,
"u2" REAL INDEX OFF,
"i2" REAL INDEX OFF,
"p2" REAL INDEX OFF,
"q2" REAL INDEX OFF,
"s2" REAL INDEX OFF,
"pf2" REAL INDEX OFF,
"d2" REAL INDEX OFF,
"v3" REAL INDEX OFF,
"u3" REAL INDEX OFF,
"i3" REAL INDEX OFF,
"p3" REAL INDEX OFF,
"q3" REAL INDEX OFF,
"s3" REAL INDEX OFF,
"pf3" REAL INDEX OFF,
"d3" REAL INDEX OFF,
"vpeak" REAL INDEX OFF,
"upeak" REAL INDEX OFF,
"ipeak" REAL INDEX OFF,
"ppeak" REAL INDEX OFF,
"qpeak" REAL INDEX OFF,
"speak" REAL INDEX OFF,
"pfpeak" REAL INDEX OFF,
"dmax" REAL INDEX OFF,
"vpeak1" REAL INDEX OFF,
"upeak1" REAL INDEX OFF,
"ipeak1" REAL INDEX OFF,
"ppeak1" REAL INDEX OFF,
"qpeak1" REAL INDEX OFF,
"speak1" REAL INDEX OFF,
"pfpeak1" REAL INDEX OFF,
"dmax1" REAL INDEX OFF,
"vpeak2" REAL INDEX OFF,
"upeak2" REAL INDEX OFF,
"ipeak2" REAL INDEX OFF,
"ppeak2" REAL INDEX OFF,
"qpeak2" REAL INDEX OFF,
"speak2" REAL INDEX OFF,
"pfpeak2" REAL INDEX OFF,
"dmax2" REAL INDEX OFF,
"vpeak3" REAL INDEX OFF,
"upeak3" REAL INDEX OFF,
"ipeak3" REAL INDEX OFF,
"ppeak3" REAL INDEX OFF,
"qpeak3" REAL INDEX OFF,
"speak3" REAL INDEX OFF,
"pfpeak3" REAL INDEX OFF,
"dmax3" REAL INDEX OFF,
"id" VARCHAR(40) NOT NULL,
"roundts" TIMESTAMP WITH TIME ZONE NOT NULL,
"roundts_month" TIMESTAMP WITH TIME ZONE GENERATED ALWAYS AS date_bin(CAST('P2W' AS interval), "roundts", CAST(1672531200000 AS bigint)) NOT NULL,
PRIMARY KEY ("roundts", "roundts_month", "id")
)
CLUSTERED BY ("id") INTO 3 SHARDS
PARTITIONED BY ("roundts_month")
WITH (
"allocation.max_retries" = 5,
"blocks.metadata" = false,
"blocks.read" = false,
"blocks.read_only" = false,
"blocks.read_only_allow_delete" = false,
"blocks.write" = false,
codec = 'best_compression',
column_policy = 'dynamic',
"mapping.total_fields.limit" = 1000,
max_ngram_diff = 1,
max_shingle_diff = 3,
number_of_replicas = '0-1',
"routing.allocation.enable" = 'all',
"routing.allocation.total_shards_per_node" = -1,
"store.type" = 'fs',
"translog.durability" = 'REQUEST',
"translog.flush_threshold_size" = 536870912,
"translog.sync_interval" = 5000,
"unassigned.node_left.delayed_timeout" = 60000,
"write.wait_for_active_shards" = '1'
)
Many thanks
David