handle nameless teams...

This commit is contained in:
Robin Appelman 2023-11-18 21:56:59 +01:00
commit cba291d6b1

View file

@ -110,17 +110,20 @@ impl Parser for TeamParser {
fn parse(&self, document: &str) -> Result<Self::Output> { fn parse(&self, document: &str) -> Result<Self::Output> {
let document = Html::parse_document(document); let document = Html::parse_document(document);
let root = document.root_element(); let root = document.root_element();
let name = select_text(root, &self.selector_name) let mut name = select_text(root, &self.selector_name)
.ok_or(ScrapeError::NotFound)? .unwrap_or_default()
.to_string(); .to_string();
let tag = select_text(root, &self.selector_tag) let tag = select_text(root, &self.selector_tag)
.ok_or(ParseError::ElementNotFound { .unwrap_or_default()
selector: SELECTOR_TEAM_TAG,
role: "team tag",
})?
.to_string(); .to_string();
match (tag.as_str(), name.as_str()) {
("", "") => return Err(ScrapeError::NotFound),
(_, "") => name = tag.clone(),
_ => {}
};
let image = let image =
document document
.select(&self.selector_image) .select(&self.selector_image)
@ -157,24 +160,20 @@ impl Parser for TeamParser {
.to_string(); .to_string();
let description = select_text(root, &self.selector_team_description) let description = select_text(root, &self.selector_team_description)
.ok_or(ParseError::ElementNotFound { .unwrap_or_default()
selector: SELECTOR_TEAM_DESCRIPTION,
role: "team description",
})?
.replace('\n', " "); .replace('\n', " ");
let titles = document let titles = document
.select(&self.selector_team_titles) .select(&self.selector_team_titles)
.next() .next()
.ok_or(ParseError::ElementNotFound { .map(|el| {
selector: SELECTOR_TEAM_TITLES, el.text()
role: "team titles",
})?
.text()
.map(str::trim) .map(str::trim)
.filter(|s| !s.is_empty()) .filter(|s| !s.is_empty())
.map(String::from) .map(String::from)
.collect(); .collect()
})
.unwrap_or_default();
let results = document let results = document
.select(&self.selector_team_records) .select(&self.selector_team_records)