1+ /* ********** Partitioning Data (using Utilization data set) ****************/
2+
3+ -- Load data from text file
4+ COPY time_series .utilization (
5+ event_time,server_id, cpu_utilization, free_memory, session_cnt
6+ ) FROM ' C:\t emp-sql\u tilization.txt' DELIMITER ' ,' ;
7+
8+
9+ -- check data
10+ SELECT * FROM time_series .utilization
11+ LIMIT 5 ;
12+
13+
14+
15+ /* average CPU utilization by server id : cost - 3687.71*/
16+ EXPLAIN SELECT server_id, AVG (cpu_utilization)
17+ FROM time_series .utilization
18+ WHERE event_time BETWEEN ' 2019-03-05' AND ' 2019-03-06'
19+ GROUP BY server_id;
20+
21+
22+ -- ---------- we will create index to speed things up ---------------
23+
24+ -- 1) index of event time, server id
25+ CREATE INDEX idx_event_time_utilization
26+ ON time_series .utilization (event_time, server_id);
27+
28+ /* after indexing cost - 3651.71*/
29+ EXPLAIN SELECT server_id, AVG (cpu_utilization)
30+ FROM time_series .utilization
31+ WHERE event_time BETWEEN ' 2019-03-05' AND ' 2019-03-06'
32+ GROUP BY server_id;
33+
34+ DROP INDEX time_series .idx_event_time_utilization ;
35+
36+
37+ -- 2) index of server id, event time (switch order)
38+ CREATE INDEX idx_server_event_utilization
39+ ON time_series .utilization (server_id, event_time);
40+
41+
42+ /*
43+ now the index is not even used - 3687.71
44+
45+ Notice that when event time is in second column to be indexed.
46+ So with only index being server_id as first then by event_time, we wouldn't able to use index at all.
47+ */
48+ EXPLAIN SELECT server_id, AVG (cpu_utilization)
49+ FROM time_series .utilization
50+ WHERE event_time BETWEEN ' 2019-03-05' AND ' 2019-03-06'
51+ GROUP BY server_id;
0 commit comments