Commit db97efeb authored by Paul Asmuth's avatar Paul Asmuth
Browse files

read utf-8 byte order mark

parent b940241e
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -58,6 +58,12 @@ add_executable(test/test-ui
    ${FNORDMETRIC_UI_SOURCES}
    src/ui/ui_test.cc)

add_test(InputStreamTest test/test-input-stream)
add_executable(test/test-input-stream
    ${FNORDMETRIC_SOURCES}
    ${FNORDMETRIC_CSV_BACKEND_SOURCES}
    src/util/inputstream_test.cc)

add_test(CSVBackendTest test/test-csv-backend)
add_executable(test/test-csv-backend
    ${FNORDMETRIC_UI_SOURCES}
+0 −1
Original line number Diff line number Diff line
@@ -32,7 +32,6 @@ TEST_CASE(CSVInputStreamTest, TestInvalidFileName, [] () {

  EXPECT_EXCEPTION(errmsg, [] () {
    auto csv_file = CSVInputStream::openFile("test/fixtures/invalid.csv");
    EXPECT(csv_file.get() != nullptr);
  });
});

+24 −9
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@ std::unique_ptr<FileInputStream> FileInputStream::openFile(
  }

  auto csv_file = new FileInputStream(fd);
  csv_file->readNextChunk();
  return std::unique_ptr<FileInputStream>(csv_file);
}

@@ -30,17 +31,9 @@ FileInputStream::FileInputStream(int fd) : fd_(fd) {}

bool FileInputStream::readNextByte(char* target) {
  if (buf_pos_ >= buf_len_) {
    int bytes_read = read(fd_, buf_, sizeof(buf_));

    if (bytes_read < 0) {
      RAISE_ERRNO(RuntimeException, "read() failed");
    }

    buf_pos_ = 0;
    buf_len_ = bytes_read;
    readNextChunk();
  }


  if (buf_pos_ < buf_len_) {
    *target = buf_[buf_pos_++];
    return true;
@@ -49,5 +42,27 @@ bool FileInputStream::readNextByte(char* target) {
  }
}

FileInputStream::kByteOrderMark FileInputStream::readByteOrderMark() {
  static char kByteOrderMarkUTF8[] = "\xEF\xBB\xBF";
  if (buf_pos_ + 2 < buf_len_ &&
      strncmp(buf_ + buf_pos_, kByteOrderMarkUTF8, 3) == 0) {
    buf_pos_ += 3;
    return BOM_UTF8;
  }

  return BOM_UNKNOWN;
}

void FileInputStream::readNextChunk() {
  int bytes_read = read(fd_, buf_, sizeof(buf_));

  if (bytes_read < 0) {
    RAISE_ERRNO(RuntimeException, "read() failed");
  }

  buf_pos_ = 0;
  buf_len_ = bytes_read;
}

}
}
+11 −0
Original line number Diff line number Diff line
@@ -30,6 +30,10 @@ public:

class FileInputStream : public InputStream {
public:
  enum kByteOrderMark {
    BOM_UNKNOWN,
    BOM_UTF8
  };

  /**
   * Open a new file input stream from the provided file path. Throws an
@@ -56,8 +60,15 @@ public:
   */
  bool readNextByte(char* target) override;

  /**
   * Read the byte order mark of the file
   */
  kByteOrderMark readByteOrderMark();

protected:

  void readNextChunk();

  char buf_[8192]; // FIXPAUL make configurable
  size_t buf_len_;
  size_t buf_pos_;
+39 −0
Original line number Diff line number Diff line
/**
 * This file is part of the "FnordStream" project
 *   Copyright (c) 2014 Paul Asmuth, Google Inc.
 *
 * Licensed under the MIT license (see LICENSE).
 */
#include <stdlib.h>
#include <stdio.h>
#include "inputstream.h"
#include "runtimeexception.h"
#include "unittest.h"

using namespace fnordmetric::util;

UNIT_TEST(FileInputStreamTest);

TEST_CASE(FileInputStreamTest, TestOpenFile, [] () {
  auto file = FileInputStream::openFile(
      "test/fixtures/gbp_per_country.csv");

  EXPECT(file.get() != nullptr);
});

TEST_CASE(FileInputStreamTest, TestInvalidFileName, [] () {
  auto errmsg = "error opening file 'test/fixtures/invalid.csv': "
      "No such file or directory";

  EXPECT_EXCEPTION(errmsg, [] () {
    auto file = FileInputStream::openFile("test/fixtures/invalid.csv");
  });
});

TEST_CASE(FileInputStreamTest, TestReadUTF8ByteOrderMark, [] () {
  auto file = FileInputStream::openFile(
      "test/fixtures/gbp_per_country.csv");

  EXPECT(file.get() != nullptr);
  EXPECT(file->readByteOrderMark() == FileInputStream::BOM_UTF8);
});
Loading