www.mooseframework.org
DelimitedFileReader.C
Go to the documentation of this file.
1 /****************************************************************/
2 /* DO NOT MODIFY THIS HEADER */
3 /* MOOSE - Multiphysics Object Oriented Simulation Environment */
4 /* */
5 /* (c) 2010 Battelle Energy Alliance, LLC */
6 /* ALL RIGHTS RESERVED */
7 /* */
8 /* Prepared by Battelle Energy Alliance, LLC */
9 /* Under Contract No. DE-AC07-05ID14517 */
10 /* With the U. S. Department of Energy */
11 /* */
12 /* See COPYRIGHT for full restrictions */
13 /****************************************************************/
14 
15 // STL includes
16 #include <sstream>
17 #include <iomanip>
18 #include <iterator>
19 
20 // MOOSE includes
21 #include "DelimitedFileReader.h"
22 #include "MooseUtils.h"
23 #include "MooseError.h"
24 #include "pcrecpp.h"
25 
26 namespace MooseUtils
27 {
28 
29 DelimitedFileReader::DelimitedFileReader(const std::string & filename,
30  const libMesh::Parallel::Communicator * comm)
31  : _filename(filename),
32  _header_flag(HeaderFlag::AUTO),
33  _ignore_empty_lines(true),
34  _communicator(comm),
35  _format_flag(FormatFlag::COLUMNS)
36 {
37 }
38 
39 void
41 {
42  // Number of columns
43  std::size_t n_cols;
44 
45  // Storage for the raw data
46  std::vector<double> raw;
47  std::size_t size_raw = 0;
48  std::size_t size_offsets = 0;
49 
50  // Read data
51  if (_communicator == nullptr || _communicator->rank() == 0)
52  {
53  // Check the file
55 
56  // Create the file stream and do nothing if the file is empty
57  std::ifstream stream_data(_filename);
58  if (stream_data.peek() == std::ifstream::traits_type::eof())
59  return;
60 
61  // Read/generate the header
63  readRowData(stream_data, raw);
64  else
65  readColumnData(stream_data, raw);
66 
67  // Set the number of columns
68  n_cols = _names.size();
69 
70  // Close the stream
71  stream_data.close();
72 
73  // Set raw data vector size
74  size_raw = raw.size();
75  size_offsets = _row_offsets.size();
76  }
77 
78  if (_communicator != nullptr)
79  {
80  // Broadcast column names
81  _communicator->broadcast(n_cols);
82  _names.resize(n_cols);
83  _communicator->broadcast(_names);
84 
85  // Broadcast raw data
86  _communicator->broadcast(size_raw);
87  raw.resize(size_raw);
88  _communicator->broadcast(raw);
89 
90  // Broadcast row offsets
92  {
93  _communicator->broadcast(size_offsets);
94  _row_offsets.resize(size_offsets);
95  _communicator->broadcast(_row_offsets);
96  }
97  }
98 
99  // Resize the internal storage
100  _data.resize(n_cols);
101 
102  // Process "row" formatted data
104  {
105  std::vector<double>::iterator start = raw.begin();
106  for (std::size_t j = 0; j < n_cols; ++j)
107  {
108  _data[j] = std::vector<double>(start, start + _row_offsets[j]);
109  std::advance(start, _row_offsets[j]);
110  }
111  }
112 
113  // Process "column" formatted data
114  else
115  {
116  mooseAssert(raw.size() % n_cols == 0,
117  "The raw data is not evenly divisible by the number of columns.");
118  const std::size_t n_rows = raw.size() / n_cols;
119  for (std::size_t j = 0; j < n_cols; ++j)
120  {
121  _data[j].resize(n_rows);
122  for (std::size_t i = 0; i < n_rows; ++i)
123  _data[j][i] = raw[i * n_cols + j];
124  }
125  }
126 }
127 
128 const std::vector<std::string> &
130 {
131  return _names;
132 }
133 
134 const std::vector<std::vector<double>> &
136 {
137  return _data;
138 }
139 
140 const std::vector<double> &
141 DelimitedFileReader::getData(const std::string & name) const
142 {
143  const auto it = find(_names.begin(), _names.end(), name);
144  if (it == _names.end())
145  mooseError("Could not find '", name, "' in header of file ", _filename, ".");
146  return _data[std::distance(_names.begin(), it)];
147 }
148 
149 const std::vector<double> &
150 DelimitedFileReader::getData(std::size_t index) const
151 {
152  if (index >= _data.size())
153  mooseError("The supplied index ",
154  index,
155  " is out-of-range for the available data in file '",
156  _filename,
157  "' which contains ",
158  _data.size(),
159  " items.");
160  return _data[index];
161 }
162 
163 void
164 DelimitedFileReader::readColumnData(std::ifstream & stream_data, std::vector<double> & output)
165 {
166  // Local storage for the data being read
167  std::string line;
168  std::vector<double> row;
169 
170  // Keep track of the line number for error reporting
171  unsigned int count = 0;
172 
173  // Number of columns expected based on the first row of the data
174  std::size_t n_cols = INVALID_SIZE;
175 
176  // Read the lines
177  while (std::getline(stream_data, line))
178  {
179  // Increment line counter and clear any tokenized data
180  count++;
181  row.clear();
182 
183  // Ignore empty and/or comment lines, if applicable
184  if (preprocessLine(line, count))
185  continue;
186 
187  // Read header, if the header exists and the column names do not exist.
188  if (_names.empty() && header(line))
189  {
190  MooseUtils::tokenize(line, _names, 1, delimiter(line));
191  for (std::string & str : _names)
192  str = MooseUtils::trim(str);
193  continue;
194  }
195 
196  // Separate the row and error if it fails
197  processLine(line, row, count);
198 
199  // Set the number of columns
200  if (n_cols == INVALID_SIZE)
201  n_cols = row.size();
202 
203  // Check number of columns
204  if (row.size() != n_cols)
205  mooseError("The number of columns read (",
206  row.size(),
207  ") does not match the number of columns expected (",
208  n_cols,
209  ") based on the first row of the file when reading row ",
210  count,
211  " in file ",
212  _filename,
213  ".");
214 
215  // Append data
216  output.insert(output.end(), row.begin(), row.end());
217  }
218 
219  // If the names have not been assigned, create the default names
220  if (_names.empty())
221  {
222  _names.resize(n_cols);
223  int padding = MooseUtils::numDigits(n_cols);
224  for (std::size_t i = 0; i < n_cols; ++i)
225  {
226  std::stringstream ss;
227  ss << "column_" << std::setw(padding) << std::setfill('0') << i;
228  _names[i] = ss.str();
229  }
230  }
231 }
232 
233 void
234 DelimitedFileReader::readRowData(std::ifstream & stream_data, std::vector<double> & output)
235 {
236  // Local storage for the data being read
237  std::string line;
238  std::vector<double> row;
239  unsigned int linenum = 0; // line number in file
240 
241  // Clear existing data
242  _names.clear();
243  _row_offsets.clear();
244 
245  // Read the lines
246  while (std::getline(stream_data, line))
247  {
248  // Increment line counter and clear any tokenized data
249  linenum++;
250  row.clear();
251 
252  // Ignore empty lines
253  if (preprocessLine(line, linenum))
254  continue;
255 
256  if (header(line))
257  {
258  std::size_t index = line.find_first_of(delimiter(line));
259  _names.push_back(line.substr(0, index));
260  line = line.substr(index);
261  }
262 
263  // Separate the row and error if it fails
264  processLine(line, row, linenum);
265 
266  // Store row offsets to allow for un-even rows
267  _row_offsets.push_back(row.size());
268 
269  // Append data
270  output.insert(output.end(), row.begin(), row.end());
271  }
272 
273  // Assign row names if not provided via header
274  if (_names.empty())
275  {
276  int padding = MooseUtils::numDigits(_row_offsets.size());
277  for (std::size_t i = 0; i < _row_offsets.size(); ++i)
278  {
279  std::stringstream ss;
280  ss << "row_" << std::setw(padding) << std::setfill('0') << i;
281  _names.push_back(ss.str());
282  }
283  }
284 }
285 
286 bool
287 DelimitedFileReader::preprocessLine(std::string & line, const unsigned int & num)
288 {
289  // Handle row comments
290  std::size_t index = _row_comment.empty() ? line.size() : line.find_first_of(_row_comment);
291  line = MooseUtils::trim(line.substr(0, index));
292 
293  // Ignore empty lines
294  if (line.empty())
295  {
297  return true;
298  else
299  mooseError("Failed to read line ", num, " in file ", _filename, ". The line is empty.");
300  }
301  return false;
302 }
303 
304 void
305 DelimitedFileReader::processLine(const std::string & line,
306  std::vector<double> & row,
307  const unsigned int & num)
308 {
309  // Separate the row and error if it fails
310  bool status = MooseUtils::tokenizeAndConvert<double>(line, row, delimiter(line));
311  if (!status)
312  mooseError("Failed to convert a delimited data into double when reading line ",
313  num,
314  " in file ",
315  _filename,
316  ".\n LINE ",
317  num,
318  ": ",
319  line);
320 }
321 
322 const std::string &
323 DelimitedFileReader::delimiter(const std::string & line)
324 {
325  if (_delimiter.empty())
326  {
327  if (line.find(",") != std::string::npos)
328  _delimiter = ",";
329  else if (line.find("\t") != std::string::npos)
330  _delimiter = "\t";
331  else
332  _delimiter = " ";
333  }
334  return _delimiter;
335 }
336 
337 bool
338 DelimitedFileReader::header(const std::string & line)
339 {
340  switch (_header_flag)
341  {
342  case HeaderFlag::OFF:
343  return false;
344  case HeaderFlag::ON:
345  return true;
346  default:
347 
348  // Attempt to convert the line, if it fails assume it is a header
349  std::vector<double> row;
350  bool contains_alpha = !MooseUtils::tokenizeAndConvert<double>(line, row, delimiter(line));
351 
352  // Based on auto detect set the flag to TRUE|FALSE to short-circuit this check for each line
353  // in the case of row data.
354  _header_flag = contains_alpha ? HeaderFlag::ON : HeaderFlag::OFF;
355  return contains_alpha;
356  }
357 }
358 
360 // DEPRECATED METHODS (TODO: To be removed after applications are updated)
362 DelimitedFileReader::DelimitedFileReader(const std::string & filename,
363  const bool header,
364  const std::string delimiter,
365  const libMesh::Parallel::Communicator * comm)
366  : _filename(filename),
367  _header_flag(header ? HeaderFlag::ON : HeaderFlag::AUTO),
368  _delimiter(delimiter),
369  _ignore_empty_lines(true),
370  _communicator(comm),
371  _format_flag(FormatFlag::COLUMNS)
372 {
373  mooseDeprecated("Use setHeader and setDelimiter method rather than specifying in constructor.");
374 }
375 
376 const std::vector<std::string> &
378 {
379  mooseDeprecated("Use getNames instead.");
380  return getNames();
381 }
382 
383 const std::vector<std::vector<double>> &
385 {
386  mooseDeprecated("Use getData instead.");
387  return getData();
388 }
389 
390 const std::vector<double> &
391 DelimitedFileReader::getColumnData(const std::string & name) const
392 {
393  mooseDeprecated("Use getData instead.");
394  return getData(name);
395 }
396 
397 void
399 {
400  mooseDeprecated("Use header method with HeaderFlag input.");
402 }
403 
404 } // MooseUtils
std::vector< std::vector< double > > _data
Storage for the read data columns.
void setHeaderFlag(HeaderFlag value)
void read()
Perform the actual data reading.
void tokenize(const std::string &str, std::vector< T > &elements, unsigned int min_len=1, const std::string &delims="/")
This function will split the passed in string on a set of delimiters appending the substrings to the ...
Definition: MooseUtils.h:350
std::string _delimiter
The delimiter separating the supplied data entires.
HeaderFlag _header_flag
Flag indicating if the file contains a header.
void mooseError(Args &&...args)
Emit an error message with the given stringified, concatenated args and terminate the application...
Definition: MooseError.h:182
bool _ignore_empty_lines
Flag for ignoring empty lines.
bool preprocessLine(std::string &line, const unsigned int &num)
Check the content of the line and if it should be skipped.
const std::vector< std::string > & getNames() const
Return the column/row names.
void mooseDeprecated(Args &&...args)
Emit a deprecated code/feature message with the given stringified, concatenated args.
Definition: MooseError.h:202
std::vector< std::string > _names
Storage for the read or generated column names.
const std::vector< std::vector< double > > & getData() const
Return the rows/columns of data.
FormatFlag _format_flag
Format "rows" vs "columns".
const std::vector< std::string > & getColumnNames() const
bool checkFileReadable(const std::string &filename, bool check_line_endings=false, bool throw_on_unreadable=true)
Checks to see if a file is readable (exists and permissions)
Definition: MooseUtils.C:121
bool header(const std::string &line)
Return the header flag, if it is set to AUTO attempt to determine if a header exists in line...
const libMesh::Parallel::Communicator * _communicator
Communicator.
void readColumnData(std::ifstream &stream_data, std::vector< double > &output)
Read the numeric data as rows or columns into a single vector.
DelimitedFileReader(const std::string &filename, const libMesh::Parallel::Communicator *comm=nullptr)
const std::string _filename
The supplied filename.
void readRowData(std::ifstream &stream_data, std::vector< double > &output)
int numDigits(const T &num)
Return the number of digits for a number.
Definition: MooseUtils.h:445
std::string trim(const std::string &str, const std::string &white_space=" \t\n\v\f\r")
Standard scripting language trim function.
Definition: MooseUtils.C:95
MPI_Comm comm
std::string _row_comment
Hide row comments.
const std::string & delimiter(const std::string &line)
Determine the delimiter.
const std::vector< std::vector< double > > & getColumnData() const
std::vector< std::size_t > _row_offsets
Row offsets (only used with _format == "rows")
void processLine(const std::string &line, std::vector< double > &row, const unsigned int &num)
Populate supplied vector with content from line.