Skip to content

Commit e09a9b3

Browse files
committed
indexing using Utilization dataset
1 parent ecc9e3d commit e09a9b3

File tree

1 file changed

+51
-0
lines changed

1 file changed

+51
-0
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*********** Partitioning Data (using Utilization data set) ****************/
2+
3+
-- Load data from text file
4+
COPY time_series.utilization(
5+
event_time,server_id, cpu_utilization, free_memory, session_cnt
6+
) FROM 'C:\temp-sql\utilization.txt' DELIMITER ',';
7+
8+
9+
-- check data
10+
SELECT * FROM time_series.utilization
11+
LIMIT 5;
12+
13+
14+
15+
/* average CPU utilization by server id : cost - 3687.71*/
16+
EXPLAIN SELECT server_id, AVG(cpu_utilization)
17+
FROM time_series.utilization
18+
WHERE event_time BETWEEN '2019-03-05' AND '2019-03-06'
19+
GROUP BY server_id;
20+
21+
22+
------------ we will create index to speed things up ---------------
23+
24+
-- 1) index of event time, server id
25+
CREATE INDEX idx_event_time_utilization
26+
ON time_series.utilization(event_time, server_id);
27+
28+
/* after indexing cost - 3651.71*/
29+
EXPLAIN SELECT server_id, AVG(cpu_utilization)
30+
FROM time_series.utilization
31+
WHERE event_time BETWEEN '2019-03-05' AND '2019-03-06'
32+
GROUP BY server_id;
33+
34+
DROP INDEX time_series.idx_event_time_utilization;
35+
36+
37+
-- 2) index of server id, event time (switch order)
38+
CREATE INDEX idx_server_event_utilization
39+
ON time_series.utilization(server_id, event_time);
40+
41+
42+
/*
43+
now the index is not even used - 3687.71
44+
45+
Notice that when event time is in second column to be indexed.
46+
So with only index being server_id as first then by event_time, we wouldn't able to use index at all.
47+
*/
48+
EXPLAIN SELECT server_id, AVG(cpu_utilization)
49+
FROM time_series.utilization
50+
WHERE event_time BETWEEN '2019-03-05' AND '2019-03-06'
51+
GROUP BY server_id;

0 commit comments

Comments
 (0)