This commit is contained in:
Robin Appelman 2024-11-28 20:47:38 +01:00
commit c2c37a194e

View file

@ -3,9 +3,9 @@
//! # Example //! # Example
//! //!
//! ```rust //! ```rust
//! # fn main() {
//! # use comma_separated::CommaSeparatedIterator; //! # use comma_separated::CommaSeparatedIterator;
//! let input = r#"foo, "bar", 'quoted, part'"#; //! # fn main() {
//! let input = r#"foo,"bar",'quoted, part'"#;
//! let iterator = CommaSeparatedIterator::new(input); //! let iterator = CommaSeparatedIterator::new(input);
//! assert_eq!(vec!["foo", "\"bar\"", "'quoted, part'"], iterator.collect::<Vec<_>>()); //! assert_eq!(vec!["foo", "\"bar\"", "'quoted, part'"], iterator.collect::<Vec<_>>());
//! # } //! # }
@ -13,16 +13,12 @@
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
enum CommaSeparatedIteratorState { enum CommaSeparatedIteratorState {
/// Start of string or after a ',' (including whitespace) /// Non quoted part
Default, Default,
/// Inside a quote /// Inside a quote
Quoted(Quote), Quoted(Quote),
/// After escape character inside quote /// After escape character inside quote
QuotedPair(Quote), QuotedEscape(Quote),
/// Non quoted part
Token,
/// After closing quote
PostAmbleForQuoted,
} }
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
@ -32,25 +28,13 @@ enum Quote {
} }
pub struct CommaSeparatedIterator<'a> { pub struct CommaSeparatedIterator<'a> {
/// target remaining: &'a str,
target: &'a str,
/// iterator
char_indices: std::str::CharIndices<'a>,
/// current scanner state
state: CommaSeparatedIteratorState,
/// start position of the last token found
s: usize,
} }
impl<'a> CommaSeparatedIterator<'a> { impl<'a> CommaSeparatedIterator<'a> {
/// Create a new iterator, splitting the input into comma-seperated parts with handling of quoted segments /// Create a new iterator, splitting the input into comma-seperated parts with handling of quoted segments
pub fn new(target: &'a str) -> Self { pub fn new(text: &'a str) -> Self {
Self { Self { remaining: text }
target,
char_indices: target.char_indices(),
state: CommaSeparatedIteratorState::Default,
s: 0,
}
} }
} }
@ -58,62 +42,45 @@ impl<'a> Iterator for CommaSeparatedIterator<'a> {
type Item = &'a str; type Item = &'a str;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
for (i, c) in &mut self.char_indices { if self.remaining.is_empty() {
let (next, next_state) = match (self.state, c) { return None;
}
let mut state = CommaSeparatedIteratorState::Default;
let mut char_indices = self.remaining.char_indices();
for (i, c) in &mut char_indices {
state = match (state, c) {
(CommaSeparatedIteratorState::Default, '"') => { (CommaSeparatedIteratorState::Default, '"') => {
self.s = i; CommaSeparatedIteratorState::Quoted(Quote::Double)
(None, CommaSeparatedIteratorState::Quoted(Quote::Double))
} }
(CommaSeparatedIteratorState::Default, '\'') => { (CommaSeparatedIteratorState::Default, '\'') => {
self.s = i; CommaSeparatedIteratorState::Quoted(Quote::Single)
(None, CommaSeparatedIteratorState::Quoted(Quote::Single))
}
(CommaSeparatedIteratorState::Default, ' ' | '\t') => {
(None, CommaSeparatedIteratorState::Default)
}
(CommaSeparatedIteratorState::Default, ',') => (
Some(Some(&self.target[i..i])),
CommaSeparatedIteratorState::Default,
),
(CommaSeparatedIteratorState::Default, _) => {
self.s = i;
(None, CommaSeparatedIteratorState::Token)
} }
(CommaSeparatedIteratorState::Quoted(Quote::Double), '"') (CommaSeparatedIteratorState::Quoted(Quote::Double), '"')
| (CommaSeparatedIteratorState::Quoted(Quote::Single), '\'') => ( | (CommaSeparatedIteratorState::Quoted(Quote::Single), '\'') => {
Some(Some(&self.target[self.s..i + 1])), CommaSeparatedIteratorState::Default
CommaSeparatedIteratorState::PostAmbleForQuoted, }
),
(CommaSeparatedIteratorState::Quoted(quote), '\\') => { (CommaSeparatedIteratorState::Quoted(quote), '\\') => {
(None, CommaSeparatedIteratorState::QuotedPair(quote)) CommaSeparatedIteratorState::QuotedEscape(quote)
} }
(CommaSeparatedIteratorState::QuotedPair(quote), _) => { (CommaSeparatedIteratorState::Quoted(quote), _) => {
(None, CommaSeparatedIteratorState::Quoted(quote)) CommaSeparatedIteratorState::Quoted(quote)
} }
(CommaSeparatedIteratorState::Token, ',') => ( (CommaSeparatedIteratorState::QuotedEscape(quote), _) => {
Some(Some(&self.target[self.s..i])), CommaSeparatedIteratorState::Quoted(quote)
CommaSeparatedIteratorState::Default,
),
(CommaSeparatedIteratorState::PostAmbleForQuoted, ',') => {
(None, CommaSeparatedIteratorState::Default)
} }
(current_state, _) => (None, current_state), (CommaSeparatedIteratorState::Default, ',') => {
let result = &self.remaining[0..i];
self.remaining = &self.remaining[i + 1..];
return Some(result);
}
(CommaSeparatedIteratorState::Default, _) => CommaSeparatedIteratorState::Default,
}; };
self.state = next_state;
if let Some(next) = next {
return next;
}
}
match self.state {
CommaSeparatedIteratorState::Default
| CommaSeparatedIteratorState::PostAmbleForQuoted => None,
CommaSeparatedIteratorState::Quoted(_)
| CommaSeparatedIteratorState::QuotedPair(_)
| CommaSeparatedIteratorState::Token => {
self.state = CommaSeparatedIteratorState::Default;
Some(&self.target[self.s..])
}
} }
let result = self.remaining;
self.remaining = "";
Some(result)
} }
} }
@ -124,25 +91,24 @@ mod tests {
#[test] #[test]
fn test_comma_separated_iterator() { fn test_comma_separated_iterator() {
assert_eq!( assert_eq!(
vec!["abc", "def", "ghi", "jkl ", "mno", "pqr"], vec!["abc", "def", " ghi", "\tjkl ", " mno", "\tpqr"],
CommaSeparatedIterator::new("abc,def, ghi,\tjkl , mno,\tpqr").collect::<Vec<&str>>() CommaSeparatedIterator::new("abc,def, ghi,\tjkl , mno,\tpqr").collect::<Vec<&str>>()
); );
assert_eq!( assert_eq!(
vec![ vec![
"abc", r#""abc,def""#,
"\"def\"", " \"ghi\"",
"\"ghi\"", "\"jkl\" ",
"\"jkl\"", " \"mno\"",
"\"mno\"",
"pqr", "pqr",
"\"abc, def\"", " \"abc, def\"",
"foo", " foo",
"\" foo\"", " \" foo\"",
"',foo'", " ',foo'",
"\"fo'o\"", " \"fo'o\"",
], ],
CommaSeparatedIterator::new( CommaSeparatedIterator::new(
"abc,\"def\", \"ghi\",\t\"jkl\" , \"mno\",\tpqr, \"abc, def\", foo, \" foo\", ',foo', \"fo'o\"" r#""abc,def", "ghi","jkl" , "mno",pqr, "abc, def", foo, " foo", ',foo', "fo'o""#
) )
.collect::<Vec<&str>>() .collect::<Vec<&str>>()
); );