more player honors data

This commit is contained in:
Robin Appelman 2025-04-16 09:41:39 +02:00
commit b70f2ee336
7 changed files with 491 additions and 213 deletions

View file

@ -1,5 +1,7 @@
use crate::{ParseError, Result};
use regex::Regex;
use scraper::{ElementRef, Selector};
use std::sync::OnceLock;
use steamid_ng::SteamID;
use time::format_description::FormatItem;
use time::macros::format_description;
@ -103,3 +105,8 @@ fn steam_id_from_link(link: &str) -> Result<SteamID, ParseError> {
})
.map(SteamID::from)
}
static WHITESPACE_REGEX: OnceLock<Regex> = OnceLock::new();
fn whitespace_regex() -> &'static Regex {
WHITESPACE_REGEX.get_or_init(|| Regex::new("[\n\t ]+").unwrap())
}

View file

@ -1,5 +1,5 @@
use super::{ElementExt, Parser};
use crate::data::{Class, Honors, Player, TeamMemberShip, TeamRef};
use crate::data::{Class, GameMode, Honors, Player, TeamMemberShip, TeamRef};
use crate::parser::{select_last_text, select_text, team_id_from_link, DATE_FORMAT};
use crate::{ParseError, Result};
use scraper::{Html, Selector};
@ -10,12 +10,13 @@ use time::Date;
const SELECTOR_PLAYER_NAME: &str = ".container .col-md-4 > h3 > b";
const SELECTOR_PLAYER_ID: &str = r#"a[href*="steam://friends/add"]"#;
const SELECTOR_PLAYER_FLAG: &str = r#"img[data-cfsrc*="/images/flags/"]"#;
const SELECTOR_PLAYER_HONORS_GROUP: &str =
".container .col-md-6:nth-child(2) .white-row-small .row-fluid";
const SELECTOR_PLAYER_HONORS_HEADER: &str = "h5";
const SELECTOR_PLAYER_HONORS_LEAGUE: &str = "li div";
const SELECTOR_PLAYER_HONORS_TEAM: &str = "li small";
const SELECTOR_PLAYER_HONORS_TEAM: &str = "li small a";
const SELECTOR_PLAYER_TEAM_GROUP: &str =
".container .col-md-6:nth-child(1) .white-row-small .row-fluid";
@ -32,6 +33,7 @@ const SELECTOR_CLASS: &str =
pub struct PlayerParser {
selector_name: Selector,
selector_id: Selector,
selector_flag: Selector,
selector_honors_header: Selector,
selector_honors_group: Selector,
@ -59,6 +61,7 @@ impl PlayerParser {
PlayerParser {
selector_name: Selector::parse(SELECTOR_PLAYER_NAME).unwrap(),
selector_id: Selector::parse(SELECTOR_PLAYER_ID).unwrap(),
selector_flag: Selector::parse(SELECTOR_PLAYER_FLAG).unwrap(),
selector_honors_header: Selector::parse(SELECTOR_PLAYER_HONORS_HEADER).unwrap(),
selector_honors_group: Selector::parse(SELECTOR_PLAYER_HONORS_GROUP).unwrap(),
@ -106,6 +109,12 @@ impl Parser for PlayerParser {
.unwrap_or_default()
.to_string();
let country = document
.select(&self.selector_flag)
.next()
.and_then(|e| e.attr("title"))
.map(String::from);
let avatar_element =
document
.select(&self.selector_avatar)
@ -135,22 +144,44 @@ impl Parser for PlayerParser {
})
.map(|((format_res, season), team)| {
let format = format_res?;
let game_mode =
GameMode::from_str(format).map_err(|_| ParseError::InvalidText {
text: format.into(),
role: "player honors format",
})?;
let division = season.first_text().ok_or(ParseError::EmptyText {
selector: SELECTOR_PLAYER_HONORS_LEAGUE,
role: "player honors division",
})?;
let season = division
.split(' ')
.nth(1)
.unwrap_or_default()
.parse()
.map_err(|_| ParseError::InvalidText {
text: division.into(),
role: "player honors season",
})?;
let team_name = team.first_text().unwrap_or_default().to_string();
let team_link = team.attr("href").unwrap_or_default();
let team_id: u32 = team_link
.rsplit('=')
.next()
.unwrap_or_default()
.parse()
.map_err(|_| ParseError::InvalidLink {
link: team_link.into(),
role: "player honors team",
})?;
Ok(Honors {
format: format.to_string(),
season: season
.first_text()
.ok_or(ParseError::EmptyText {
selector: SELECTOR_PLAYER_HONORS_LEAGUE,
role: "player honors season",
})?
.to_string(),
team: team
.first_text()
.ok_or(ParseError::EmptyText {
selector: SELECTOR_PLAYER_HONORS_TEAM,
role: "player honors team",
})?
.to_string(),
format: game_mode,
division: division.splitn(3, ' ').last().unwrap_or_default().into(),
season,
team: TeamRef {
id: team_id,
name: team_name,
},
})
})
.collect::<Result<Vec<_>>>()?;
@ -178,12 +209,7 @@ impl Parser for PlayerParser {
})?
.attr("href")
.unwrap_or_default();
let name = select_text(item, &self.selector_team_name).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_PLAYER_TEAM_NAME,
role: "players team name",
},
)?;
let name = select_text(item, &self.selector_team_name).unwrap_or_default();
let league = select_text(item, &self.selector_team_league).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_PLAYER_TEAM_LEAGUE,
@ -198,8 +224,9 @@ impl Parser for PlayerParser {
)?;
let id = team_id_from_link(link)?;
let since = match since.rsplit_once('\n') {
let since = match since.rsplit_once(['\n', ' ', '\t']) {
Some((_, since)) => {
let since = since.trim();
Date::parse(since, DATE_FORMAT).map_err(|_| ParseError::InvalidDate {
role: "team join date",
date: since.to_string(),
@ -232,6 +259,7 @@ impl Parser for PlayerParser {
honors,
teams,
favorite_classes,
country,
})
}
}

View file

@ -1,13 +1,11 @@
use super::{select_text_empty, ElementExt, Parser};
use super::{select_text_empty, whitespace_regex, ElementExt, Parser};
use crate::data::{Membership, NameChange, Record, Team};
use crate::parser::{
select_text, steam_id_from_link, DATE_FORMAT, MEMBER_DATE_ALT_FORMAT, MEMBER_DATE_FORMAT,
};
use crate::{ParseError, Result, ScrapeError};
use regex::Regex;
use scraper::{Html, Selector};
use std::str::FromStr;
use std::sync::OnceLock;
use time::{Date, PrimitiveDateTime, Time, UtcOffset};
use ugc_scraper_types::{GameMode, MembershipRole, Region};
@ -118,13 +116,11 @@ impl TeamParser {
}
}
static WHITESPACE_REGEX: OnceLock<Regex> = OnceLock::new();
impl Parser for TeamParser {
type Output = Team;
fn parse(&self, document: &str) -> Result<Self::Output> {
let whitespace_regex = WHITESPACE_REGEX.get_or_init(|| Regex::new("[\n\t ]+").unwrap());
let whitespace_regex = whitespace_regex();
let document = Html::parse_document(document);
let root = document.root_element();