more team match data:

This commit is contained in:
Robin Appelman 2025-04-20 17:13:51 +02:00
commit f3aadfe03d
5 changed files with 5316 additions and 5145 deletions

View file

@ -5,7 +5,7 @@ pub mod parser;
use crate::data::{
GameMode, MapHistory, MatchInfo, MembershipHistory, Player, Seasons, Team, TeamRef,
TeamRosterData, TeamSeason, Transaction,
TeamRosterData, Transaction,
};
use crate::parser::{
MapHistoryParser, MatchPageParser, Parser, PlayerDetailsParser, PlayerParser, SeasonsParser,
@ -18,6 +18,7 @@ use std::time::Duration;
pub use steamid_ng::SteamID;
use tokio::time::sleep;
use tracing::warn;
use ugc_scraper_types::TeamMatches;
pub type Result<T, E = ScrapeError> = std::result::Result<T, E>;
@ -136,7 +137,7 @@ impl UgcClient {
}
/// Retrieve team match history
pub async fn team_matches(&self, id: u32) -> Result<Vec<TeamSeason>> {
pub async fn team_matches(&self, id: u32) -> Result<TeamMatches> {
let body = self
.request(format!(
"https://www.ugcleague.com/team_page_matches.cfm?clan_id={}",

View file

@ -1,10 +1,13 @@
use super::Parser;
use crate::data::{MatchResult, TeamRef, TeamSeason, TeamSeasonMatch};
use crate::data::{GameMode, MatchResult, TeamRef, TeamSeason, TeamSeasonMatch};
use crate::parser::{match_id_from_link, select_text, team_id_from_link, ElementExt};
use crate::{ParseError, Result};
use scraper::{Html, Selector};
use std::str::FromStr;
use ugc_scraper_types::{Side, TeamMatches};
const SELECTOR_SEASON_TITLE: &str =
const SELECTOR_SEASON_TITLE: &str = ".container table.table.table-condensed.table-striped thead h4";
const SELECTOR_SEASON_SEASON: &str =
".container table.table.table-condensed.table-striped thead h4 b";
const SELECTOR_SEASON_MATCHES: &str =
".container table.table.table-condensed.table-striped tbody:nth-child(3n)";
@ -20,8 +23,12 @@ const SELECTOR_SEASON_POINTS: &str = "td:nth-child(9) small";
const SELECTOR_SEASON_POINTS_OPPONENTS: &str = "td:nth-child(10) small";
const SELECTOR_SEASON_MATCH_PAGE: &str = "td a[href^=\"matchpage\"]";
const SELECTOR_TEAM_NAME: &str = r#"div.col-md-9 > h2 > b"#;
const SELECTOR_TEAM_LINK: &str = r#"h2 > span.pull-right > a[href^="team_page.cfm"]"#;
pub struct TeamMatchesParser {
selector_title: Selector,
selector_season: Selector,
selector_matches: Selector,
selector_match: Selector,
selector_division: Selector,
@ -34,6 +41,9 @@ pub struct TeamMatchesParser {
selector_points: Selector,
selector_points_opponent: Selector,
selector_match_page: Selector,
selector_team_name: Selector,
selector_team_link: Selector,
}
impl Default for TeamMatchesParser {
@ -46,6 +56,7 @@ impl TeamMatchesParser {
pub fn new() -> Self {
TeamMatchesParser {
selector_title: Selector::parse(SELECTOR_SEASON_TITLE).unwrap(),
selector_season: Selector::parse(SELECTOR_SEASON_SEASON).unwrap(),
selector_matches: Selector::parse(SELECTOR_SEASON_MATCHES).unwrap(),
selector_match: Selector::parse(SELECTOR_SEASON_MATCH).unwrap(),
selector_division: Selector::parse(SELECTOR_SEASON_DIVISION).unwrap(),
@ -58,27 +69,44 @@ impl TeamMatchesParser {
selector_points: Selector::parse(SELECTOR_SEASON_POINTS).unwrap(),
selector_points_opponent: Selector::parse(SELECTOR_SEASON_POINTS_OPPONENTS).unwrap(),
selector_match_page: Selector::parse(SELECTOR_SEASON_MATCH_PAGE).unwrap(),
selector_team_name: Selector::parse(SELECTOR_TEAM_NAME).unwrap(),
selector_team_link: Selector::parse(SELECTOR_TEAM_LINK).unwrap(),
}
}
}
impl Parser for TeamMatchesParser {
type Output = Vec<TeamSeason>;
type Output = TeamMatches;
fn parse(&self, document: &str) -> Result<Self::Output> {
let document = Html::parse_document(document);
document
let seasons = document
.select(&self.selector_title)
.zip(document.select(&self.selector_season))
.zip(document.select(&self.selector_matches))
.map(|(title, matches)| {
let title = title.first_text().ok_or(ParseError::EmptyText {
.map(|((title, season), matches)| {
let format = title.first_text().ok_or(ParseError::EmptyText {
selector: SELECTOR_SEASON_TITLE,
role: "season title",
})?;
let season: u32 = title.trim_start_matches("Season ").parse().map_err(|_| {
let format = format
.split(' ')
.find_map(|part| GameMode::from_str(part).ok())
.ok_or(ParseError::InvalidText {
text: format.into(),
role: "season format",
})?;
let season = season.first_text().ok_or(ParseError::EmptyText {
selector: SELECTOR_SEASON_SEASON,
role: "season title",
})?;
let season: u32 = season.trim_start_matches("Season ").parse().map_err(|_| {
ParseError::InvalidText {
text: title.to_string(),
text: season.to_string(),
role: "season title",
}
})?;
@ -167,10 +195,7 @@ impl Parser for TeamMatchesParser {
let opponent = opponent_link
.map(|link| {
let name = link.first_text().ok_or(ParseError::EmptyText {
selector: SELECTOR_SEASON_OPPONENT,
role: "match opponent",
})?;
let name = link.first_text().unwrap_or_default();
let id = team_id_from_link(link.attr("href").unwrap_or_default())?;
Result::<_, ParseError>::Ok(TeamRef {
name: name.to_string(),
@ -208,7 +233,12 @@ impl Parser for TeamMatchesParser {
Ok(TeamSeasonMatch {
week,
date: date.to_string(),
side: side.to_string(),
side: side.parse::<Side>().map_err(|error| {
ParseError::InvalidText {
text: error.text,
role: "match side",
}
})?,
map: map.to_string(),
division: division.to_string(),
result,
@ -216,8 +246,34 @@ impl Parser for TeamMatchesParser {
})
.collect::<Result<_>>()?;
Ok(TeamSeason { season, matches })
Ok(TeamSeason {
season,
matches,
format,
})
.collect::<Result<Vec<_>>>()
})
.collect::<Result<Vec<_>>>()?;
let team_id = document
.select(&self.selector_team_link)
.next()
.and_then(|link| team_id_from_link(link.attr("href").unwrap_or_default()).ok())
.ok_or(ParseError::ElementNotFound {
selector: SELECTOR_TEAM_LINK,
role: "match team link",
})?;
let team_name = select_text(document.root_element(), &self.selector_team_name).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_TEAM_NAME,
role: "match team name",
},
)?;
let team = TeamRef {
id: team_id,
name: team_name.into(),
};
Ok(TeamMatches { team, seasons })
}
}

View file

@ -2,9 +2,15 @@
source: tests/snapshot.rs
expression: parsed
---
[
{
"team": {
"name": "Xenon",
"id": 7861
},
"seasons": [
{
"season": 41,
"format": "9v9",
"matches": [
{
"division": "Euro Platinum",
@ -113,6 +119,7 @@ expression: parsed
},
{
"season": 40,
"format": "9v9",
"matches": [
{
"division": "Euro Platinum",
@ -270,6 +277,7 @@ expression: parsed
},
{
"season": 39,
"format": "9v9",
"matches": [
{
"division": "Euro Platinum",
@ -437,6 +445,7 @@ expression: parsed
},
{
"season": 38,
"format": "9v9",
"matches": [
{
"division": "Euro Silver",
@ -604,6 +613,7 @@ expression: parsed
},
{
"season": 37,
"format": "9v9",
"matches": [
{
"division": "Euro Silver",
@ -800,6 +810,7 @@ expression: parsed
},
{
"season": 36,
"format": "9v9",
"matches": [
{
"division": "Euro Platinum",
@ -948,6 +959,7 @@ expression: parsed
},
{
"season": 35,
"format": "9v9",
"matches": [
{
"division": "Euro Platinum",
@ -1105,6 +1117,7 @@ expression: parsed
},
{
"season": 34,
"format": "9v9",
"matches": [
{
"division": "Euro Platinum",
@ -1281,6 +1294,7 @@ expression: parsed
},
{
"season": 32,
"format": "9v9",
"matches": [
{
"division": "Euro Platinum",
@ -1438,6 +1452,7 @@ expression: parsed
},
{
"season": 31,
"format": "9v9",
"matches": [
{
"division": "Euro Platinum",
@ -1595,6 +1610,7 @@ expression: parsed
},
{
"season": 30,
"format": "9v9",
"matches": [
{
"division": "Euro Platinum",
@ -1752,6 +1768,7 @@ expression: parsed
},
{
"season": 29,
"format": "9v9",
"matches": [
{
"division": "Euro Platinum",
@ -1909,6 +1926,7 @@ expression: parsed
},
{
"season": 28,
"format": "9v9",
"matches": [
{
"division": "Euro Platinum",
@ -2028,6 +2046,7 @@ expression: parsed
},
{
"season": 27,
"format": "9v9",
"matches": [
{
"division": "Premium EU",
@ -2195,6 +2214,7 @@ expression: parsed
},
{
"season": 26,
"format": "9v9",
"matches": [
{
"division": "Premium EU",
@ -2352,6 +2372,7 @@ expression: parsed
},
{
"season": 25,
"format": "9v9",
"matches": [
{
"division": "Euro Platinum",
@ -2566,6 +2587,7 @@ expression: parsed
},
{
"season": 24,
"format": "9v9",
"matches": [
{
"division": "Euro Platinum",
@ -2723,6 +2745,7 @@ expression: parsed
},
{
"season": 23,
"format": "9v9",
"matches": [
{
"division": "Euro Platinum",
@ -2928,6 +2951,7 @@ expression: parsed
},
{
"season": 22,
"format": "9v9",
"matches": [
{
"division": "Euro Platinum",
@ -3123,6 +3147,7 @@ expression: parsed
},
{
"season": 21,
"format": "9v9",
"matches": [
{
"division": "Euro Platinum",
@ -3271,6 +3296,7 @@ expression: parsed
},
{
"season": 20,
"format": "9v9",
"matches": [
{
"division": "Euro Platinum",
@ -3409,6 +3435,7 @@ expression: parsed
},
{
"season": 19,
"format": "9v9",
"matches": [
{
"division": "Euro Gold",
@ -3614,6 +3641,7 @@ expression: parsed
},
{
"season": 18,
"format": "9v9",
"matches": [
{
"division": "Euro Steel",
@ -3828,6 +3856,7 @@ expression: parsed
},
{
"season": 17,
"format": "9v9",
"matches": [
{
"division": "Euro Silver",
@ -3976,6 +4005,7 @@ expression: parsed
},
{
"season": 16,
"format": "9v9",
"matches": [
{
"division": "Euro Silver",
@ -4133,6 +4163,7 @@ expression: parsed
},
{
"season": 15,
"format": "9v9",
"matches": [
{
"division": "Euro Silver",
@ -4290,6 +4321,7 @@ expression: parsed
},
{
"season": 14,
"format": "9v9",
"matches": [
{
"division": "Euro Silver",
@ -4429,6 +4461,7 @@ expression: parsed
},
{
"season": 13,
"format": "9v9",
"matches": [
{
"division": "Euro Silver",
@ -4624,6 +4657,7 @@ expression: parsed
},
{
"season": 12,
"format": "9v9",
"matches": [
{
"division": "Euro Silver",
@ -4819,6 +4853,7 @@ expression: parsed
},
{
"season": 11,
"format": "9v9",
"matches": [
{
"division": "Euro Steel",
@ -4993,4 +5028,5 @@ expression: parsed
}
]
}
]
]
}

View file

@ -2,9 +2,15 @@
source: tests/snapshot.rs
expression: parsed
---
[
{
"team": {
"name": "ChaosTheory",
"id": 2157
},
"seasons": [
{
"season": 1,
"format": "8v8",
"matches": [
{
"division": "Steel",
@ -144,4 +150,5 @@ expression: parsed
}
]
}
]
]
}

View file

@ -261,24 +261,96 @@ pub struct RosterHistory {
pub role: MembershipRole,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct TeamMatches {
pub team: TeamRef,
pub seasons: Vec<TeamSeason>,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct TeamSeason {
pub season: u32,
pub format: GameMode,
pub matches: Vec<TeamSeasonMatch>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
pub enum Side {
Home,
Visiting,
}
#[derive(Debug, Clone, Error)]
#[error("Invalid side {text}")]
pub struct InvalidSide {
pub text: String,
}
impl FromStr for Side {
type Err = InvalidSide;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"home" => Ok(Side::Home),
"visiting" => Ok(Side::Visiting),
_ => Err(InvalidSide { text: s.into() }),
}
}
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct TeamSeasonMatch {
pub division: String,
pub week: u8,
pub date: String,
pub side: String,
pub side: Side,
pub result: MatchResult,
pub map: String,
}
impl TeamSeasonMatch {
pub fn match_info(&self, team: &TeamRef, format: GameMode) -> Option<MatchInfo> {
match &self.result {
MatchResult::Played {
opponent,
score,
score_opponent,
..
}
| MatchResult::Pending {
opponent,
score,
score_opponent,
..
} => {
let (team_home, team_away, score_home, score_away) = if self.side == Side::Home {
(team.clone(), opponent.clone(), *score, *score_opponent)
} else {
(opponent.clone(), team.clone(), *score_opponent, *score)
};
Some(MatchInfo {
comment: None,
comment_author: None,
team_home,
team_away,
score_home,
score_away,
map: self.map.clone(),
week: self.week,
format,
default_date: self.date.clone(),
})
}
_ => None,
}
}
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "serde", serde(rename_all = "snake_case", tag = "state"))]
@ -306,8 +378,7 @@ pub enum MatchResult {
impl MatchResult {
pub fn match_id(&self) -> Option<u32> {
match self {
MatchResult::Played { id, .. } => Some(*id),
MatchResult::Pending { id, .. } => Some(*id),
MatchResult::Played { id, .. } | MatchResult::Pending { id, .. } => Some(*id),
_ => None,
}
}