|
| 1 | +/*********** Modeling Time Series Data *****************/ |
| 2 | + |
| 3 | + |
| 4 | +------------- Load Time Series Data -------------------- |
| 5 | +COPY time_series.location_temp(event_time, location_id, temp_celcius) |
| 6 | +FROM 'C:/temp-sql/location_temp.txt' DELIMITER ','; |
| 7 | + |
| 8 | + |
| 9 | +/* quick check the data */ |
| 10 | +SELECT * FROM time_series.location_temp |
| 11 | +ORDER BY event_time |
| 12 | +LIMIT 5; |
| 13 | + |
| 14 | +---------------- Indexing Data ---------------------------- |
| 15 | + |
| 16 | +------ Indexing on Location ----------------- |
| 17 | +/* check avg temp of each location*/ |
| 18 | +EXPLAIN SELECT location_id, AVG(temp_celcius) |
| 19 | +FROM time_series.location_temp |
| 20 | +GROUP BY location_id; |
| 21 | + |
| 22 | + |
| 23 | +/* create index and check the query execution time again */ |
| 24 | +CREATE INDEX idx_loc_location_temp |
| 25 | +ON time_series.location_temp(location_id); |
| 26 | + |
| 27 | +/* we can see that it is still not using index , so we will put additional where clause to narrow down */ |
| 28 | +EXPLAIN ANALYZE SELECT location_id, AVG(temp_celcius) |
| 29 | +FROM time_series.location_temp |
| 30 | +GROUP BY location_id; |
| 31 | + |
| 32 | + |
| 33 | +/* now index got used (bitmap index) */ |
| 34 | +EXPLAIN ANALYZE SELECT location_id, AVG(temp_celcius) |
| 35 | +FROM time_series.location_temp |
| 36 | +WHERE location_id = '2' |
| 37 | +GROUP BY location_id; |
| 38 | + |
| 39 | +/* let's drop index and see how the impact looks like now */ |
| 40 | +DROP INDEX time_series.idx_loc_location_temp; |
| 41 | + |
| 42 | + |
| 43 | +EXPLAIN ANALYZE SELECT location_id, AVG(temp_celcius) |
| 44 | +FROM time_series.location_temp |
| 45 | +WHERE location_id = '2' |
| 46 | +GROUP BY location_id; |
| 47 | + |
| 48 | + |
| 49 | +------ Indexing on Event_Time and Location ID ----------------- |
| 50 | + |
| 51 | +-- 34090 |
| 52 | +EXPLAIN SELECT location_id, AVG(temp_celcius) |
| 53 | +FROM time_series.location_temp |
| 54 | +WHERE event_time BETWEEN '2019-03-05' AND '2019-03-06' |
| 55 | +GROUP BY location_id; |
| 56 | + |
| 57 | +/* create index */ |
| 58 | +CREATE INDEX idx_even_time_location_temp |
| 59 | +ON time_series.location_temp(event_time, location_id); |
| 60 | + |
| 61 | +/* check again and it is still not using the index */ |
| 62 | +EXPLAIN SELECT location_id, AVG(temp_celcius) |
| 63 | +FROM time_series.location_temp |
| 64 | +WHERE event_time BETWEEN '2019-03-05' AND '2019-03-06' |
| 65 | +GROUP BY location_id; |
| 66 | + |
| 67 | + |
| 68 | +/* now let's make more selective, adding hour, min , sec */ |
| 69 | +EXPLAIN SELECT location_id, AVG(temp_celcius) |
| 70 | +FROM time_series.location_temp |
| 71 | +WHERE event_time BETWEEN '2019-03-05 00:00:00' AND '2019-03-05 00:20:00' |
| 72 | +GROUP BY location_id; |
| 73 | + |
| 74 | + |
| 75 | +/* |
| 76 | +So in summary, Indexes are very helpful in some cases, especially when we are working with limited subset of data. |
| 77 | +But they don't always help when we are working with larget time span. |
| 78 | +*/ |
| 79 | + |
| 80 | + |
0 commit comments