Read-Modify-Write Workflows

This guide covers common patterns for reading, modifying, and writing XPT files.

Basic Roundtrip

use xportrs::Xpt;
fn basic_roundtrip(input: &str, output: &str) -> xportrs::Result<()> {
    // Read
    let dataset = Xpt::read(input)?;

    // (Modify here if needed)

    // Write
    Xpt::writer(dataset)
        .finalize()?
        .write_path(output)?;

    Ok(())
}

Preserving Metadata

xportrs automatically preserves metadata during roundtrip:

use xportrs::Xpt;
fn verify_metadata_preservation(path: &str) -> xportrs::Result<()> {
    // Read original
    let original = Xpt::read(path)?;

    // Write to temp
    let temp_path = "/tmp/roundtrip.xpt";
    Xpt::writer(original.clone())
        .finalize()?
        .write_path(temp_path)?;

    // Read back
    let reloaded = Xpt::read(temp_path)?;

    // Verify metadata preserved
    assert_eq!(original.domain_code(), reloaded.domain_code());
    assert_eq!(original.dataset_label(), reloaded.dataset_label());

    for (orig_col, new_col) in original.columns().iter()
        .zip(reloaded.columns().iter())
    {
        assert_eq!(orig_col.name(), new_col.name());
        assert_eq!(
            orig_col.label().map(|l| l.to_string()),
            new_col.label().map(|l| l.to_string())
        );
        // Format, length, etc. also preserved
    }

    Ok(())
}

Adding Columns

use xportrs::{Column, ColumnData, Format, Xpt};
fn add_derived_column(input: &str, output: &str) -> xportrs::Result<()> {
    let mut dataset = Xpt::read(input)?;

    // Get row count
    let nrows = dataset.nrows();

    // Create new column
    let new_column = Column::new(
        "DERIVED",
        ColumnData::F64(vec![Some(1.0); nrows]),
    )
        .with_label("Derived Variable")
        .with_format(Format::numeric(8, 0));

    // Add to dataset
    dataset.extend([new_column]);

    // Write
    Xpt::writer(dataset)
        .finalize()?
        .write_path(output)?;

    Ok(())
}

Modifying Column Data

use xportrs::{Column, ColumnData, Xpt};
fn modify_column_data(input: &str, output: &str) -> xportrs::Result<()> {
    let dataset = Xpt::read(input)?;

    // Create modified columns
    let modified_columns: Vec<Column> = dataset.columns().iter()
        .map(|col| {
            if col.name() == "AESEQ" {
                // Modify AESEQ: multiply by 10
                if let ColumnData::F64(values) = col.data() {
                    let new_values: Vec<Option<f64>> = values.iter()
                        .map(|v| v.map(|x| x * 10.0))
                        .collect();

                    let mut new_col = Column::new(col.name(), ColumnData::F64(new_values));

                    // Preserve metadata
                    if let Some(label) = col.label() {
                        new_col = new_col.with_label(label.to_string());
                    }
                    if let Some(format) = col.format() {
                        new_col = new_col.with_format(format.clone());
                    }

                    return new_col;
                }
            }
            col.clone()
        })
        .collect();

    // Create new dataset with modified columns
    let mut new_dataset = xportrs::Dataset::new(
        dataset.domain_code(),
        modified_columns,
    )?;

    if let Some(label) = dataset.dataset_label() {
        new_dataset.set_label(label);
    }

    Xpt::writer(new_dataset)
        .finalize()?
        .write_path(output)?;

    Ok(())
}

Filtering Rows

use xportrs::{Column, ColumnData, Dataset, Xpt};
fn filter_rows(input: &str, output: &str, keep_indices: &[usize]) -> xportrs::Result<()> {
    let dataset = Xpt::read(input)?;

    // Filter each column
    let filtered_columns: Vec<Column> = dataset.columns().iter()
        .map(|col| {
            let filtered_data = match col.data() {
                ColumnData::F64(values) => {
                    let filtered: Vec<_> = keep_indices.iter()
                        .map(|&i| values[i].clone())
                        .collect();
                    ColumnData::F64(filtered)
                }
                ColumnData::String(values) => {
                    let filtered: Vec<_> = keep_indices.iter()
                        .map(|&i| values[i].clone())
                        .collect();
                    ColumnData::String(filtered)
                }
                // Handle other types...
                _ => col.data().clone(),
            };

            let mut new_col = Column::new(col.name(), filtered_data);
            if let Some(label) = col.label() {
                new_col = new_col.with_label(label.to_string());
            }
            if let Some(format) = col.format() {
                new_col = new_col.with_format(format.clone());
            }
            new_col
        })
        .collect();

    let mut filtered_dataset = Dataset::new(
        dataset.domain_code(),
        filtered_columns,
    )?;

    if let Some(label) = dataset.dataset_label() {
        filtered_dataset.set_label(label);
    }

    Xpt::writer(filtered_dataset)
        .finalize()?
        .write_path(output)?;

    Ok(())
}

Merging Datasets

use xportrs::{Column, ColumnData, Dataset, Xpt};
fn merge_datasets(input1: &str, input2: &str, output: &str) -> xportrs::Result<()> {
    let ds1 = Xpt::read(input1)?;
    let ds2 = Xpt::read(input2)?;

    // Verify same structure
    assert_eq!(ds1.ncols(), ds2.ncols(), "Column count mismatch");

    // Concatenate data
    let merged_columns: Vec<Column> = ds1.columns().iter()
        .zip(ds2.columns().iter())
        .map(|(col1, col2)| {
            let merged_data = match (col1.data(), col2.data()) {
                (ColumnData::F64(v1), ColumnData::F64(v2)) => {
                    let mut merged = v1.clone();
                    merged.extend(v2.clone());
                    ColumnData::F64(merged)
                }
                (ColumnData::String(v1), ColumnData::String(v2)) => {
                    let mut merged = v1.clone();
                    merged.extend(v2.clone());
                    ColumnData::String(merged)
                }
                _ => panic!("Type mismatch"),
            };

            let mut col = Column::new(col1.name(), merged_data);
            if let Some(label) = col1.label() {
                col = col.with_label(label.to_string());
            }
            if let Some(format) = col1.format() {
                col = col.with_format(format.clone());
            }
            col
        })
        .collect();

    let mut merged = Dataset::new(ds1.domain_code(), merged_columns)?;
    if let Some(label) = ds1.dataset_label() {
        merged.set_label(label);
    }

    Xpt::writer(merged)
        .finalize()?
        .write_path(output)?;

    Ok(())
}

Updating Labels

use xportrs::{Column, Dataset, Xpt};
use std::collections::HashMap;
fn update_labels(
    input: &str,
    output: &str,
    label_updates: &HashMap<&str, &str>,
) -> xportrs::Result<()> {
    let dataset = Xpt::read(input)?;

    let updated_columns: Vec<Column> = dataset.columns().iter()
        .map(|col| {
            let mut new_col = Column::new(col.name(), col.data().clone());

            // Apply label update if specified
            if let Some(&new_label) = label_updates.get(col.name()) {
                new_col = new_col.with_label(new_label);
            } else if let Some(label) = col.label() {
                new_col = new_col.with_label(label.to_string());
            }

            if let Some(format) = col.format() {
                new_col = new_col.with_format(format.clone());
            }

            new_col
        })
        .collect();

    let mut updated = Dataset::new(dataset.domain_code(), updated_columns)?;
    if let Some(label) = dataset.dataset_label() {
        updated.set_label(label);
    }

    Xpt::writer(updated)
        .finalize()?
        .write_path(output)?;

    Ok(())
}

// Usage
fn main() -> xportrs::Result<()> {
    let mut updates = HashMap::new();
    updates.insert("USUBJID", "Unique Subject Identifier");
    updates.insert("AETERM", "Reported Adverse Event Term");
    update_labels("ae.xpt", "ae_updated.xpt", &updates)
}

Batch Processing

use xportrs::Xpt;
use std::path::Path;
fn process_directory(input_dir: &Path, output_dir: &Path) -> xportrs::Result<()> {
    std::fs::create_dir_all(output_dir)?;

    for entry in std::fs::read_dir(input_dir)? {
        let entry = entry?;
        let path = entry.path();

        if path.extension().map_or(false, |e| e == "xpt") {
            let filename = path.file_name().unwrap();
            let output_path = output_dir.join(filename);

            println!("Processing: {}", path.display());

            let dataset = Xpt::read(&path)?;

            // Process...

            Xpt::writer(dataset)
                .finalize()?
                .write_path(&output_path)?;

            println!("  Wrote: {}", output_path.display());
        }
    }

    Ok(())
}

Error Handling in Roundtrips

use xportrs::{Error, Xpt};
fn safe_roundtrip(input: &str, output: &str) -> Result<(), Box<dyn std::error::Error>> {
    // Read with error handling
    let dataset = match Xpt::read(input) {
        Ok(ds) => ds,
        Err(Error::Io(e)) => {
            eprintln!("Failed to read {}: {}", input, e);
            return Err(e.into());
        }
        Err(e) => return Err(e.into()),
    };

    // Validate
    let validated = Xpt::writer(dataset).finalize()?;

    if validated.has_errors() {
        for issue in validated.issues() {
            eprintln!("Validation error: {}", issue);
        }
        return Err("Validation failed".into());
    }

    // Write
    validated.write_path(output)?;

    // Verify
    let _ = Xpt::read(output)?;

    Ok(())
}

Keyboard shortcuts

xportrs Documentation