2024-01-11

Reading a fixed width file with Rust

I am trying to read a fixed width file with Rust, with the goal to load the data into a Polars dataframe, however, I can't parse the file correctly.
I am trying to use the fixed_width crate as polars lacks the utility to parse such files.
Unfortunately, the fixed_width documentation does not have any example where a file is read, all the examples read data from a string...

Here is my failed attempt (the commented code works as it is a copy-paste from the docs) and obviously, the file contains same data as the string:

use serde::{Serialize, Deserialize};
use fixed_width::{FixedWidth, FieldSet, Reader};


fn main() {
    let r_file = "path/to/file.r01";
    // let data = String::from("R   5001.00   1001.00                          513777.5 2093285.7   0.0\nR   5001.00   1002.00                          513786.6 2093281.6   0.0\nR   5001.00   1003.00                          513795.7 2093277.4   0.0\nR   5001.00   1004.00                          513708.8 2093273.3   0.0\n");

    #[derive(Serialize, Deserialize, Debug)]
    struct SpsRecord {
        pub line: f32,
        pub point: f32,
        pub easting: f32,
        pub northing: f32
    }

    impl FixedWidth for SpsRecord {
        fn fields() -> FieldSet {
            FieldSet::Seq(vec![
                FieldSet::new_field(1..11).name("line"),
                FieldSet::new_field(11..21).name("point"),
                FieldSet::new_field(46..55).name("easting"),
                FieldSet::new_field(55..65).name("northing")
            ])
        }
    }

    impl SpsRecord {
        fn from_file(path: &str) -> Result<Vec<Self>, fixed_width::Error> {
            let mut reader = Reader::from_file(path)?;
            let records: Result<Vec<Self>, fixed_width::Error> = reader
                .byte_reader()
                .filter_map(Result::ok)
                .map(|bytes| fixed_width::from_bytes(&bytes))
                .collect();
            match records {
                Ok(records) => Ok(records),
                Err(err) => Err(fixed_width::Error::from(err))
            }
        }

        // fn from_string(data: &str) -> Result<Vec<Self>, fixed_width::Error> {
        //     let mut reader = Reader::from_string(data).width(72);
        //     let records: Result<Vec<Self>, fixed_width::Error> = reader
        //         .byte_reader()
        //         .filter_map(Result::ok)
        //         .map(|bytes| fixed_width::from_bytes(&bytes))
        //         .collect();
        //     match records {
        //         Ok(records) => Ok(records),
        //         Err(err) => Err(fixed_width::Error::from(err))
        //     }
        // }
    }

    println!("Reading {}...", r_file);
    match SpsRecord::from_file(r_file) {
        Ok(records) => {
            for record in records {
                println!("{:?}", record);
            }
        }
        Err(err) => {
            eprintln!("{:#?}", err);
        }
    }

    // println!("Reading\n{}...", data);
    // match SpsRecord::from_string(&data) {
    //     Ok(records) => {
    //         for record in records {
    //             println!("{:?}", record);
    //         }
    //     }
    //     Err(err) => {
    //         eprintln!("{:#?}", err);
    //     }
    // } 
}

The code runs, prints the "Reading..." line and does absolutely nothing, so I don't know where to look.



No comments:

Post a Comment