more match data

This commit is contained in:
Robin Appelman 2025-04-18 19:28:04 +02:00
commit 0f86fda710
2 changed files with 78 additions and 3 deletions

View file

@ -1,9 +1,10 @@
use super::Parser;
use crate::data::{MatchInfo, TeamRef};
use crate::data::{GameMode, MatchInfo, TeamRef};
use crate::parser::{select_last_text, select_text, team_id_from_link, ElementExt};
use crate::{ParseError, Result};
use scraper::{Html, Selector};
const SELECTOR_MATCH_FORMAT: &str = "h3.page-header > strong.styleColor";
const SELECTOR_MATCH_COMMENT_AUTHOR: &str = ".row-fluid .col-md-12 span.text-success";
const SELECTOR_MATCH_COMMENT: &str = ".row-fluid .col-md-12 > .white-row-light-small > p";
const SELECTOR_MATCH_TEAM_LINK: &str = "a[href^=\"team_page\"]:not(.btn-large)";
@ -11,13 +12,20 @@ const SELECTOR_MATCH_RESULT_TEAM: &str =
".table.table-condensed.table-bordered tr:nth-child(2) td:nth-child(1)";
const SELECTOR_MATCH_RESULT_SCORE: &str =
".table.table-condensed.table-bordered tr:nth-child(2) td:nth-child(2)";
const SELECTOR_MATCH_MAP: &str = "h4.text-success.text-center > b";
const SELECTOR_MATCH_WEEK: &str = "p.muted.text-center.nomargin > small > b:nth-child(1)";
const SELECTOR_MATCH_DATE: &str = "p.muted.text-center.nomargin > small > b:nth-child(2)";
pub struct MatchPageParser {
selector_format: Selector,
selector_author: Selector,
selector_comment: Selector,
selector_team_link: Selector,
selector_result_team: Selector,
selector_result_score: Selector,
selector_map: Selector,
selector_week: Selector,
selector_date: Selector,
}
impl Default for MatchPageParser {
@ -29,11 +37,15 @@ impl Default for MatchPageParser {
impl MatchPageParser {
pub fn new() -> Self {
MatchPageParser {
selector_format: Selector::parse(SELECTOR_MATCH_FORMAT).unwrap(),
selector_author: Selector::parse(SELECTOR_MATCH_COMMENT_AUTHOR).unwrap(),
selector_comment: Selector::parse(SELECTOR_MATCH_COMMENT).unwrap(),
selector_team_link: Selector::parse(SELECTOR_MATCH_TEAM_LINK).unwrap(),
selector_result_team: Selector::parse(SELECTOR_MATCH_RESULT_TEAM).unwrap(),
selector_result_score: Selector::parse(SELECTOR_MATCH_RESULT_SCORE).unwrap(),
selector_map: Selector::parse(SELECTOR_MATCH_MAP).unwrap(),
selector_week: Selector::parse(SELECTOR_MATCH_WEEK).unwrap(),
selector_date: Selector::parse(SELECTOR_MATCH_DATE).unwrap(),
}
}
}
@ -59,6 +71,46 @@ impl Parser for MatchPageParser {
let home_team_id = team_id_from_link(team_link_home.attr("href").unwrap_or_default())?;
let away_team_id = team_id_from_link(team_link_away.attr("href").unwrap_or_default())?;
let format_text = select_text(document.root_element(), &self.selector_format).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_MATCH_FORMAT,
role: "away team map",
},
)?;
let format = format_text
.parse::<GameMode>()
.ok()
.or_else(|| format_text.split(' ').find_map(|text| text.parse().ok()))
.ok_or_else(|| ParseError::InvalidText {
text: format_text.into(),
role: "match format",
})?;
let map = select_text(document.root_element(), &self.selector_map).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_MATCH_MAP,
role: "away team map",
},
)?;
let week_text = select_text(document.root_element(), &self.selector_week).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_MATCH_WEEK,
role: "away team week",
},
)?;
let week: u8 = week_text.parse().map_err(|_| ParseError::InvalidText {
text: week_text.into(),
role: "match week",
})?;
let date = select_text(document.root_element(), &self.selector_date).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_MATCH_DATE,
role: "away team week",
},
)?;
let mut team_names = document.select(&self.selector_result_team);
let team_name_home = team_names
.next()
@ -133,6 +185,10 @@ impl Parser for MatchPageParser {
name: team_name_away.to_string(),
id: away_team_id,
},
week,
map: map.into(),
default_date: date.into(),
format,
})
}
}