influxdb/wal/tests/file_rollover.rs

use std::fs;
use wal::{WalBuilder, WritePayload};

#[macro_use]
mod helpers;
use crate::helpers::*;

#[test]
#[allow(clippy::cognitive_complexity)]
fn file_rollover() -> Result {
    let dir = test_helpers::tmp_dir()?;

    // Set the file rollover size limit low to test rollover
    let builder = WalBuilder::new(dir.as_ref()).file_rollover_size(100);
    let mut wal = builder.clone().wal()?;

    // Should start without existing WAL files
    let wal_files = wal_file_names(&dir.as_ref());
    assert!(wal_files.is_empty());

    // Reading the WAL should return Ok(empty vec)
    let wal_entries = all_entries(&builder)?;
    assert!(wal_entries.is_empty());

    // Write one WAL entry when there are no existing WAL files
    create_and_sync_batch!(wal, ["some data within the file limit"]);

    // There should now be one existing WAL file
    assert_filenames_for_sequence_numbers!(dir, [0]);

    // Should be able to read the entry back out
    let wal_entries = all_entries(&builder)?;
    assert_eq!(1, wal_entries.len());
    assert_entry!(wal_entries[0], 0, b"some data within the file limit");

    // Write one WAL entry when there is an existing WAL file that is currently
    // under the size limit, should end up in the same WAL file
    create_and_sync_batch!(wal, ["some more data that puts the file over the limit"]);

    // There should still be one existing WAL file
    assert_filenames_for_sequence_numbers!(dir, [0]);

    // Should be able to read the entries back out
    let wal_entries = all_entries(&builder)?;
    assert_eq!(2, wal_entries.len());
    assert_entry!(wal_entries[0], 0, b"some data within the file limit");
    assert_entry!(
        wal_entries[1],
        1,
        b"some more data that puts the file over the limit",
    );

    // Write one WAL entry, and because the existing file is over the size limit,
    // this entry should end up in a new WAL file
    create_and_sync_batch!(
        wal,
        ["some more data, this should now be rolled over into the next WAL file"]
    );

    // There should now be two existing WAL files
    assert_filenames_for_sequence_numbers!(dir, [0, 2]);

    // Should be able to read the entries back out
    let wal_entries = all_entries(&builder)?;
    assert_eq!(3, wal_entries.len());
    assert_entry!(wal_entries[0], 0, b"some data within the file limit");
    assert_entry!(
        wal_entries[1],
        1,
        b"some more data that puts the file over the limit"
    );
    assert_entry!(
        wal_entries[2],
        2,
        b"some more data, this should now be rolled over into the next WAL file"
    );

    // Write two WAL entries, one that could fit in the existing file but puts the
    // file over the limit. Because the two entries are in one sync batch, they
    // both will end up in the existing file even though it's over the limit
    // after the first entry.
    create_and_sync_batch!(
        wal,
        [
            "one entry that puts the existing file over the limit",
            "another entry"
        ]
    );

    // There should still be two existing WAL files
    assert_filenames_for_sequence_numbers!(dir, [0, 2]);

    // Should be able to read the entries back out
    let wal_entries = all_entries(&builder)?;
    assert_eq!(5, wal_entries.len());
    assert_entry!(wal_entries[0], 0, b"some data within the file limit");
    assert_entry!(
        wal_entries[1],
        1,
        b"some more data that puts the file over the limit"
    );
    assert_entry!(
        wal_entries[2],
        2,
        b"some more data, this should now be rolled over into the next WAL file"
    );
    assert_entry!(
        wal_entries[3],
        3,
        b"one entry that puts the existing file over the limit"
    );
    assert_entry!(wal_entries[4], 4, b"another entry");

    // Some process deletes the first WAL file
    let path = dir.path().join(file_name_for_sequence_number(0));
    fs::remove_file(path)?;

    // Should be able to read the remaining entries back out
    let wal_entries = all_entries(&builder)?;
    assert_eq!(3, wal_entries.len());
    assert_entry!(
        wal_entries[0],
        2,
        b"some more data, this should now be rolled over into the next WAL file"
    );
    assert_entry!(
        wal_entries[1],
        3,
        b"one entry that puts the existing file over the limit"
    );
    assert_entry!(wal_entries[2], 4, b"another entry");

    Ok(())
}
feat: Initial prototype of WriteBuffer and WAL (#271) This is the initial prototype of the WriteBuffer and WAL. This does the following: * accepts a slice of ParsedLine into the DB * writes those into an in memory structure with tags represented as u32 dictionaries and all field types supported * persists those writes into the WAL as Flatbuffer blobs (one WAL entry per slice of lines written, or WriteBatch) * has a method to return a table from the buffer as an Arrow RecordBatch * recovers the WAL after the database is closed and opened back up again * has a single test that covers the end-to-end from the DB side * It doesn't include partitioning yet. Although the write_lines method does actually try to do partitions on time. That'll get changed to be something more general defined by a per database configuration. * hooked up to the v2 HTTP write API * hooked up to a read API which will execute a SQL query against the data in the buffer This includes a refactor of the WAL: Refactors the WAL to remove async and threading so that it can be moved higher up. This simplifies the API while keeping just about the same amount of code in ParitionStore to handle the asynchronous writes. This also modifies the WAL to remove the SideFile implementation, which was causing significant performance problems and write amplification. The downside is that WAL writes are no longer guarranteed atomic. Further, this modifies the WAL to keep the active segement file handle open. Appends now don't have to list the directory contents and look for the latest file and open the file handle to do appends, which should also improve performance and reduce iops. 2020-09-08 18:12:16 +00:00			`use std::fs;`
refactor: rename delorean_wal --> wal, conslidate wal_writer (#411) 2020-11-05 14:25:29 +00:00			`use wal::{WalBuilder, WritePayload};`
feat: WAL file rollover based on size of file 2020-05-13 13:22:41 +00:00
test: Move file size tests out of the file rollover tests 2020-05-18 19:14:19 +00:00			`#[macro_use]`
			`mod helpers;`
			`use crate::helpers::*;`
feat: WAL file rollover based on size of file 2020-05-13 13:22:41 +00:00
			`#[test]`
			`#[allow(clippy::cognitive_complexity)]`
			`fn file_rollover() -> Result {`
refactor: rename all crates that start with`delorean_` in preparation for rename (#415) * refactor: rename delorean_cluster --> cluster * refactor: rebane delorean_generated_types --> generated_types * refactor: rename delorean_write_buffer --> write_buffer * refactor: rename delorean_ingest --> ingest * refactor: rename delorean_storage --> storage * refactor: rename delorean_tsm --> tsm * refactor: rename delorean_test_helpers --> test_helpers * refactor: rename delorean_arrow --> arrow_deps * refactor: rename delorean_line_parser --> influxdb_line_protocol 2020-11-05 18:44:36 +00:00			`let dir = test_helpers::tmp_dir()?;`
feat: WAL file rollover based on size of file 2020-05-13 13:22:41 +00:00
			`// Set the file rollover size limit low to test rollover`
			`let builder = WalBuilder::new(dir.as_ref()).file_rollover_size(100);`
feat: Initial prototype of WriteBuffer and WAL (#271) This is the initial prototype of the WriteBuffer and WAL. This does the following: * accepts a slice of ParsedLine into the DB * writes those into an in memory structure with tags represented as u32 dictionaries and all field types supported * persists those writes into the WAL as Flatbuffer blobs (one WAL entry per slice of lines written, or WriteBatch) * has a method to return a table from the buffer as an Arrow RecordBatch * recovers the WAL after the database is closed and opened back up again * has a single test that covers the end-to-end from the DB side * It doesn't include partitioning yet. Although the write_lines method does actually try to do partitions on time. That'll get changed to be something more general defined by a per database configuration. * hooked up to the v2 HTTP write API * hooked up to a read API which will execute a SQL query against the data in the buffer This includes a refactor of the WAL: Refactors the WAL to remove async and threading so that it can be moved higher up. This simplifies the API while keeping just about the same amount of code in ParitionStore to handle the asynchronous writes. This also modifies the WAL to remove the SideFile implementation, which was causing significant performance problems and write amplification. The downside is that WAL writes are no longer guarranteed atomic. Further, this modifies the WAL to keep the active segement file handle open. Appends now don't have to list the directory contents and look for the latest file and open the file handle to do appends, which should also improve performance and reduce iops. 2020-09-08 18:12:16 +00:00			`let mut wal = builder.clone().wal()?;`
feat: WAL file rollover based on size of file 2020-05-13 13:22:41 +00:00
			`// Should start without existing WAL files`
			`let wal_files = wal_file_names(&dir.as_ref());`
			`assert!(wal_files.is_empty());`

			`// Reading the WAL should return Ok(empty vec)`
			`let wal_entries = all_entries(&builder)?;`
			`assert!(wal_entries.is_empty());`

			`// Write one WAL entry when there are no existing WAL files`
feat: Initial prototype of WriteBuffer and WAL (#271) This is the initial prototype of the WriteBuffer and WAL. This does the following: * accepts a slice of ParsedLine into the DB * writes those into an in memory structure with tags represented as u32 dictionaries and all field types supported * persists those writes into the WAL as Flatbuffer blobs (one WAL entry per slice of lines written, or WriteBatch) * has a method to return a table from the buffer as an Arrow RecordBatch * recovers the WAL after the database is closed and opened back up again * has a single test that covers the end-to-end from the DB side * It doesn't include partitioning yet. Although the write_lines method does actually try to do partitions on time. That'll get changed to be something more general defined by a per database configuration. * hooked up to the v2 HTTP write API * hooked up to a read API which will execute a SQL query against the data in the buffer This includes a refactor of the WAL: Refactors the WAL to remove async and threading so that it can be moved higher up. This simplifies the API while keeping just about the same amount of code in ParitionStore to handle the asynchronous writes. This also modifies the WAL to remove the SideFile implementation, which was causing significant performance problems and write amplification. The downside is that WAL writes are no longer guarranteed atomic. Further, this modifies the WAL to keep the active segement file handle open. Appends now don't have to list the directory contents and look for the latest file and open the file handle to do appends, which should also improve performance and reduce iops. 2020-09-08 18:12:16 +00:00			`create_and_sync_batch!(wal, ["some data within the file limit"]);`
feat: WAL file rollover based on size of file 2020-05-13 13:22:41 +00:00
			`// There should now be one existing WAL file`
			`assert_filenames_for_sequence_numbers!(dir, [0]);`

			`// Should be able to read the entry back out`
			`let wal_entries = all_entries(&builder)?;`
			`assert_eq!(1, wal_entries.len());`
			`assert_entry!(wal_entries[0], 0, b"some data within the file limit");`

style: wrap comments Runs rustfmt with the new config. 2020-12-11 18:15:53 +00:00			`// Write one WAL entry when there is an existing WAL file that is currently`
			`// under the size limit, should end up in the same WAL file`
feat: Initial prototype of WriteBuffer and WAL (#271) This is the initial prototype of the WriteBuffer and WAL. This does the following: * accepts a slice of ParsedLine into the DB * writes those into an in memory structure with tags represented as u32 dictionaries and all field types supported * persists those writes into the WAL as Flatbuffer blobs (one WAL entry per slice of lines written, or WriteBatch) * has a method to return a table from the buffer as an Arrow RecordBatch * recovers the WAL after the database is closed and opened back up again * has a single test that covers the end-to-end from the DB side * It doesn't include partitioning yet. Although the write_lines method does actually try to do partitions on time. That'll get changed to be something more general defined by a per database configuration. * hooked up to the v2 HTTP write API * hooked up to a read API which will execute a SQL query against the data in the buffer This includes a refactor of the WAL: Refactors the WAL to remove async and threading so that it can be moved higher up. This simplifies the API while keeping just about the same amount of code in ParitionStore to handle the asynchronous writes. This also modifies the WAL to remove the SideFile implementation, which was causing significant performance problems and write amplification. The downside is that WAL writes are no longer guarranteed atomic. Further, this modifies the WAL to keep the active segement file handle open. Appends now don't have to list the directory contents and look for the latest file and open the file handle to do appends, which should also improve performance and reduce iops. 2020-09-08 18:12:16 +00:00			`create_and_sync_batch!(wal, ["some more data that puts the file over the limit"]);`
feat: WAL file rollover based on size of file 2020-05-13 13:22:41 +00:00
			`// There should still be one existing WAL file`
			`assert_filenames_for_sequence_numbers!(dir, [0]);`

			`// Should be able to read the entries back out`
			`let wal_entries = all_entries(&builder)?;`
			`assert_eq!(2, wal_entries.len());`
			`assert_entry!(wal_entries[0], 0, b"some data within the file limit");`
			`assert_entry!(`
			`wal_entries[1],`
			`1,`
			`b"some more data that puts the file over the limit",`
			`);`

style: wrap comments Runs rustfmt with the new config. 2020-12-11 18:15:53 +00:00			`// Write one WAL entry, and because the existing file is over the size limit,`
			`// this entry should end up in a new WAL file`
feat: WAL file rollover based on size of file 2020-05-13 13:22:41 +00:00			`create_and_sync_batch!(`
			`wal,`
feat: Initial prototype of WriteBuffer and WAL (#271) This is the initial prototype of the WriteBuffer and WAL. This does the following: * accepts a slice of ParsedLine into the DB * writes those into an in memory structure with tags represented as u32 dictionaries and all field types supported * persists those writes into the WAL as Flatbuffer blobs (one WAL entry per slice of lines written, or WriteBatch) * has a method to return a table from the buffer as an Arrow RecordBatch * recovers the WAL after the database is closed and opened back up again * has a single test that covers the end-to-end from the DB side * It doesn't include partitioning yet. Although the write_lines method does actually try to do partitions on time. That'll get changed to be something more general defined by a per database configuration. * hooked up to the v2 HTTP write API * hooked up to a read API which will execute a SQL query against the data in the buffer This includes a refactor of the WAL: Refactors the WAL to remove async and threading so that it can be moved higher up. This simplifies the API while keeping just about the same amount of code in ParitionStore to handle the asynchronous writes. This also modifies the WAL to remove the SideFile implementation, which was causing significant performance problems and write amplification. The downside is that WAL writes are no longer guarranteed atomic. Further, this modifies the WAL to keep the active segement file handle open. Appends now don't have to list the directory contents and look for the latest file and open the file handle to do appends, which should also improve performance and reduce iops. 2020-09-08 18:12:16 +00:00			`["some more data, this should now be rolled over into the next WAL file"]`
feat: WAL file rollover based on size of file 2020-05-13 13:22:41 +00:00			`);`

			`// There should now be two existing WAL files`
			`assert_filenames_for_sequence_numbers!(dir, [0, 2]);`

			`// Should be able to read the entries back out`
			`let wal_entries = all_entries(&builder)?;`
			`assert_eq!(3, wal_entries.len());`
			`assert_entry!(wal_entries[0], 0, b"some data within the file limit");`
			`assert_entry!(`
			`wal_entries[1],`
			`1,`
			`b"some more data that puts the file over the limit"`
			`);`
			`assert_entry!(`
			`wal_entries[2],`
			`2,`
			`b"some more data, this should now be rolled over into the next WAL file"`
			`);`

style: wrap comments Runs rustfmt with the new config. 2020-12-11 18:15:53 +00:00			`// Write two WAL entries, one that could fit in the existing file but puts the`
			`// file over the limit. Because the two entries are in one sync batch, they`
			`// both will end up in the existing file even though it's over the limit`
			`// after the first entry.`
feat: WAL file rollover based on size of file 2020-05-13 13:22:41 +00:00			`create_and_sync_batch!(`
			`wal,`
			`[`
feat: Initial prototype of WriteBuffer and WAL (#271) This is the initial prototype of the WriteBuffer and WAL. This does the following: * accepts a slice of ParsedLine into the DB * writes those into an in memory structure with tags represented as u32 dictionaries and all field types supported * persists those writes into the WAL as Flatbuffer blobs (one WAL entry per slice of lines written, or WriteBatch) * has a method to return a table from the buffer as an Arrow RecordBatch * recovers the WAL after the database is closed and opened back up again * has a single test that covers the end-to-end from the DB side * It doesn't include partitioning yet. Although the write_lines method does actually try to do partitions on time. That'll get changed to be something more general defined by a per database configuration. * hooked up to the v2 HTTP write API * hooked up to a read API which will execute a SQL query against the data in the buffer This includes a refactor of the WAL: Refactors the WAL to remove async and threading so that it can be moved higher up. This simplifies the API while keeping just about the same amount of code in ParitionStore to handle the asynchronous writes. This also modifies the WAL to remove the SideFile implementation, which was causing significant performance problems and write amplification. The downside is that WAL writes are no longer guarranteed atomic. Further, this modifies the WAL to keep the active segement file handle open. Appends now don't have to list the directory contents and look for the latest file and open the file handle to do appends, which should also improve performance and reduce iops. 2020-09-08 18:12:16 +00:00			`"one entry that puts the existing file over the limit",`
			`"another entry"`
feat: WAL file rollover based on size of file 2020-05-13 13:22:41 +00:00			`]`
			`);`

			`// There should still be two existing WAL files`
			`assert_filenames_for_sequence_numbers!(dir, [0, 2]);`

			`// Should be able to read the entries back out`
			`let wal_entries = all_entries(&builder)?;`
			`assert_eq!(5, wal_entries.len());`
			`assert_entry!(wal_entries[0], 0, b"some data within the file limit");`
			`assert_entry!(`
			`wal_entries[1],`
			`1,`
			`b"some more data that puts the file over the limit"`
			`);`
			`assert_entry!(`
			`wal_entries[2],`
			`2,`
			`b"some more data, this should now be rolled over into the next WAL file"`
			`);`
			`assert_entry!(`
			`wal_entries[3],`
			`3,`
			`b"one entry that puts the existing file over the limit"`
			`);`
			`assert_entry!(wal_entries[4], 4, b"another entry");`

			`// Some process deletes the first WAL file`
			`let path = dir.path().join(file_name_for_sequence_number(0));`
			`fs::remove_file(path)?;`

			`// Should be able to read the remaining entries back out`
			`let wal_entries = all_entries(&builder)?;`
			`assert_eq!(3, wal_entries.len());`
			`assert_entry!(`
			`wal_entries[0],`
			`2,`
			`b"some more data, this should now be rolled over into the next WAL file"`
			`);`
			`assert_entry!(`
			`wal_entries[1],`
			`3,`
			`b"one entry that puts the existing file over the limit"`
			`);`
			`assert_entry!(wal_entries[2], 4, b"another entry");`

			`Ok(())`
			`}`