1
+ #pragma once
2
+
3
+ /*
4
+ MIT License
5
+
6
+ Copyright (c) 2018 Duncan Ogilvie
7
+
8
+ Permission is hereby granted, free of charge, to any person obtaining a copy
9
+ of this software and associated documentation files (the "Software"), to deal
10
+ in the Software without restriction, including without limitation the rights
11
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
+ copies of the Software, and to permit persons to whom the Software is
13
+ furnished to do so, subject to the following conditions:
14
+
15
+ The above copyright notice and this permission notice shall be included in all
16
+ copies or substantial portions of the Software.
17
+
18
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
+ SOFTWARE.
25
+ */
26
+
27
+ struct IBufferedFileReader
28
+ {
29
+ enum Direction
30
+ {
31
+ Right,
32
+ Left
33
+ };
34
+
35
+ virtual ~IBufferedFileReader () {}
36
+ virtual bool isopen () const = 0;
37
+ virtual bool read (void * dest, uint64_t index, size_t size) = 0;
38
+ virtual uint64_t size () = 0;
39
+ virtual void setbuffersize (size_t size) = 0;
40
+ virtual void setbufferdirection (Direction direction) = 0;
41
+
42
+ bool readchar (uint64_t index, char & ch)
43
+ {
44
+ return read (&ch, index, 1 );
45
+ }
46
+
47
+ bool readstring (uint64_t index, size_t size, std::string & str)
48
+ {
49
+ str.resize (size);
50
+ return read ((char *)str.c_str (), index, size);
51
+ }
52
+
53
+ bool readvector (uint64_t index, size_t size, std::vector<char > & vec)
54
+ {
55
+ vec.resize (size);
56
+ return read (vec.data (), index, size);
57
+ }
58
+ };
59
+
60
+ class HandleFileReader : public IBufferedFileReader
61
+ {
62
+ HANDLE mHandle = INVALID_HANDLE_VALUE;
63
+ uint64_t mFileSize = -1 ;
64
+
65
+ std::vector<char > mBuffer ;
66
+ size_t mBufferIndex = 0 ;
67
+ size_t mBufferSize = 0 ;
68
+ Direction mBufferDirection = Right;
69
+
70
+ bool readnobuffer (void * dest, uint64_t index, size_t size)
71
+ {
72
+ if (!isopen ())
73
+ return false ;
74
+
75
+ LARGE_INTEGER distance;
76
+ distance.QuadPart = index;
77
+ if (!SetFilePointerEx (mHandle , distance, nullptr , FILE_BEGIN))
78
+ return false ;
79
+
80
+ DWORD read = 0 ;
81
+ return !!ReadFile (mHandle , dest, (DWORD)size, &read, nullptr );
82
+ }
83
+
84
+ public:
85
+ HandleFileReader (const wchar_t * szFileName)
86
+ {
87
+ mHandle = CreateFileW (szFileName, GENERIC_READ, FILE_SHARE_READ, nullptr , OPEN_EXISTING, 0 , nullptr );
88
+ if (mHandle != INVALID_HANDLE_VALUE)
89
+ {
90
+ LARGE_INTEGER size;
91
+ if (GetFileSizeEx (mHandle , &size))
92
+ {
93
+ mFileSize = size.QuadPart ;
94
+ }
95
+ else
96
+ {
97
+ CloseHandle (mHandle );
98
+ mHandle = INVALID_HANDLE_VALUE;
99
+ }
100
+ }
101
+ }
102
+
103
+ ~HandleFileReader () override
104
+ {
105
+ if (isopen ())
106
+ {
107
+ CloseHandle (mHandle );
108
+ mHandle = INVALID_HANDLE_VALUE;
109
+ }
110
+ }
111
+
112
+ HandleFileReader (const HandleFileReader &) = delete ;
113
+
114
+ bool isopen () const override
115
+ {
116
+ return mHandle != INVALID_HANDLE_VALUE;
117
+ }
118
+
119
+ bool read (void * dest, uint64_t index, size_t size) override
120
+ {
121
+ if (index + size > mFileSize )
122
+ return false ;
123
+
124
+ if (size > mBufferSize )
125
+ return readnobuffer (dest, index, size);
126
+
127
+ if (index < mBufferIndex || index + size > mBufferIndex + mBuffer .size ())
128
+ {
129
+ auto bufferSize = std::min (uint64_t (mBufferSize ), mFileSize - index);
130
+ mBuffer .resize (size_t (bufferSize));
131
+ mBufferIndex = size_t (index);
132
+ /* if (mBufferDirection == Left)
133
+ {
134
+ if (mBufferIndex >= mBufferSize + size)
135
+ mBufferIndex -= mBufferSize + size;
136
+ }*/
137
+ if (!readnobuffer (mBuffer .data (), mBufferIndex , mBuffer .size ()))
138
+ return false ;
139
+ }
140
+
141
+ if (size == 1 )
142
+ {
143
+ *(unsigned char *)dest = mBuffer [index - mBufferIndex ];
144
+ }
145
+ else
146
+ {
147
+ #ifdef _DEBUG
148
+ auto dst = (unsigned char *)dest;
149
+ for (size_t i = 0 ; i < size; i++)
150
+ dst[i] = mBuffer .at (index - mBufferIndex + i);
151
+ #else
152
+ memcpy (dest, mBuffer .data () + (index - mBufferIndex ), size);
153
+ #endif // _DEBUG
154
+ }
155
+
156
+ return true ;
157
+ }
158
+
159
+ uint64_t size () override
160
+ {
161
+ return mFileSize ;
162
+ }
163
+
164
+ void setbuffersize (size_t size) override
165
+ {
166
+ mBufferSize = size_t (std::min (uint64_t (size), mFileSize ));
167
+ }
168
+
169
+ void setbufferdirection (Direction direction) override
170
+ {
171
+ mBufferDirection = direction;
172
+ }
173
+ };
174
+
175
+ class FileLines
176
+ {
177
+ std::vector<uint64_t > mLines ;
178
+ std::unique_ptr<IBufferedFileReader> mReader ;
179
+
180
+ public:
181
+ bool isopen ()
182
+ {
183
+ return mReader && mReader ->isopen ();
184
+ }
185
+
186
+ bool open (const wchar_t * szFileName)
187
+ {
188
+ if (isopen ())
189
+ return false ;
190
+ mReader = std::make_unique<HandleFileReader>(szFileName);
191
+ return mReader ->isopen ();
192
+ }
193
+
194
+ bool parse ()
195
+ {
196
+ if (!isopen ())
197
+ return false ;
198
+ auto filesize = mReader ->size ();
199
+ mReader ->setbufferdirection (IBufferedFileReader::Right);
200
+ mReader ->setbuffersize (10 * 1024 * 1024 );
201
+ size_t curIndex = 0 , curSize = 0 ;
202
+ for (uint64_t i = 0 ; i < filesize; i++)
203
+ {
204
+ /* if (mLines.size() % 100000 == 0)
205
+ printf("%zu\n", i);*/
206
+ char ch;
207
+ if (!mReader ->readchar (i, ch))
208
+ return false ;
209
+ if (ch == ' \r ' )
210
+ continue ;
211
+ if (ch == ' \n ' )
212
+ {
213
+ mLines .push_back (curIndex);
214
+ curIndex = i + 1 ;
215
+ curSize = 0 ;
216
+ continue ;
217
+ }
218
+ curSize++;
219
+ }
220
+ if (curSize > 0 )
221
+ mLines .push_back (curIndex);
222
+ mLines .push_back (filesize + 1 );
223
+ return true ;
224
+ }
225
+
226
+ size_t size () const
227
+ {
228
+ return mLines .size () - 1 ;
229
+ }
230
+
231
+ std::string operator [](size_t index)
232
+ {
233
+ auto lineStart = mLines [index];
234
+ auto nextLineStart = mLines [index + 1 ];
235
+ std::string result;
236
+ mReader ->readstring (lineStart, nextLineStart - lineStart - 1 , result);
237
+ while (!result.empty () && result.back () == ' \r ' )
238
+ result.pop_back ();
239
+ return result;
240
+ }
241
+ };
0 commit comments