more team match data:

This commit is contained in:
Robin Appelman 2025-04-20 17:13:51 +02:00
commit f3aadfe03d
5 changed files with 5316 additions and 5145 deletions

View file

@ -5,7 +5,7 @@ pub mod parser;
use crate::data::{ use crate::data::{
GameMode, MapHistory, MatchInfo, MembershipHistory, Player, Seasons, Team, TeamRef, GameMode, MapHistory, MatchInfo, MembershipHistory, Player, Seasons, Team, TeamRef,
TeamRosterData, TeamSeason, Transaction, TeamRosterData, Transaction,
}; };
use crate::parser::{ use crate::parser::{
MapHistoryParser, MatchPageParser, Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, MapHistoryParser, MatchPageParser, Parser, PlayerDetailsParser, PlayerParser, SeasonsParser,
@ -18,6 +18,7 @@ use std::time::Duration;
pub use steamid_ng::SteamID; pub use steamid_ng::SteamID;
use tokio::time::sleep; use tokio::time::sleep;
use tracing::warn; use tracing::warn;
use ugc_scraper_types::TeamMatches;
pub type Result<T, E = ScrapeError> = std::result::Result<T, E>; pub type Result<T, E = ScrapeError> = std::result::Result<T, E>;
@ -136,7 +137,7 @@ impl UgcClient {
} }
/// Retrieve team match history /// Retrieve team match history
pub async fn team_matches(&self, id: u32) -> Result<Vec<TeamSeason>> { pub async fn team_matches(&self, id: u32) -> Result<TeamMatches> {
let body = self let body = self
.request(format!( .request(format!(
"https://www.ugcleague.com/team_page_matches.cfm?clan_id={}", "https://www.ugcleague.com/team_page_matches.cfm?clan_id={}",

View file

@ -1,10 +1,13 @@
use super::Parser; use super::Parser;
use crate::data::{MatchResult, TeamRef, TeamSeason, TeamSeasonMatch}; use crate::data::{GameMode, MatchResult, TeamRef, TeamSeason, TeamSeasonMatch};
use crate::parser::{match_id_from_link, select_text, team_id_from_link, ElementExt}; use crate::parser::{match_id_from_link, select_text, team_id_from_link, ElementExt};
use crate::{ParseError, Result}; use crate::{ParseError, Result};
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use std::str::FromStr;
use ugc_scraper_types::{Side, TeamMatches};
const SELECTOR_SEASON_TITLE: &str = const SELECTOR_SEASON_TITLE: &str = ".container table.table.table-condensed.table-striped thead h4";
const SELECTOR_SEASON_SEASON: &str =
".container table.table.table-condensed.table-striped thead h4 b"; ".container table.table.table-condensed.table-striped thead h4 b";
const SELECTOR_SEASON_MATCHES: &str = const SELECTOR_SEASON_MATCHES: &str =
".container table.table.table-condensed.table-striped tbody:nth-child(3n)"; ".container table.table.table-condensed.table-striped tbody:nth-child(3n)";
@ -20,8 +23,12 @@ const SELECTOR_SEASON_POINTS: &str = "td:nth-child(9) small";
const SELECTOR_SEASON_POINTS_OPPONENTS: &str = "td:nth-child(10) small"; const SELECTOR_SEASON_POINTS_OPPONENTS: &str = "td:nth-child(10) small";
const SELECTOR_SEASON_MATCH_PAGE: &str = "td a[href^=\"matchpage\"]"; const SELECTOR_SEASON_MATCH_PAGE: &str = "td a[href^=\"matchpage\"]";
const SELECTOR_TEAM_NAME: &str = r#"div.col-md-9 > h2 > b"#;
const SELECTOR_TEAM_LINK: &str = r#"h2 > span.pull-right > a[href^="team_page.cfm"]"#;
pub struct TeamMatchesParser { pub struct TeamMatchesParser {
selector_title: Selector, selector_title: Selector,
selector_season: Selector,
selector_matches: Selector, selector_matches: Selector,
selector_match: Selector, selector_match: Selector,
selector_division: Selector, selector_division: Selector,
@ -34,6 +41,9 @@ pub struct TeamMatchesParser {
selector_points: Selector, selector_points: Selector,
selector_points_opponent: Selector, selector_points_opponent: Selector,
selector_match_page: Selector, selector_match_page: Selector,
selector_team_name: Selector,
selector_team_link: Selector,
} }
impl Default for TeamMatchesParser { impl Default for TeamMatchesParser {
@ -46,6 +56,7 @@ impl TeamMatchesParser {
pub fn new() -> Self { pub fn new() -> Self {
TeamMatchesParser { TeamMatchesParser {
selector_title: Selector::parse(SELECTOR_SEASON_TITLE).unwrap(), selector_title: Selector::parse(SELECTOR_SEASON_TITLE).unwrap(),
selector_season: Selector::parse(SELECTOR_SEASON_SEASON).unwrap(),
selector_matches: Selector::parse(SELECTOR_SEASON_MATCHES).unwrap(), selector_matches: Selector::parse(SELECTOR_SEASON_MATCHES).unwrap(),
selector_match: Selector::parse(SELECTOR_SEASON_MATCH).unwrap(), selector_match: Selector::parse(SELECTOR_SEASON_MATCH).unwrap(),
selector_division: Selector::parse(SELECTOR_SEASON_DIVISION).unwrap(), selector_division: Selector::parse(SELECTOR_SEASON_DIVISION).unwrap(),
@ -58,27 +69,44 @@ impl TeamMatchesParser {
selector_points: Selector::parse(SELECTOR_SEASON_POINTS).unwrap(), selector_points: Selector::parse(SELECTOR_SEASON_POINTS).unwrap(),
selector_points_opponent: Selector::parse(SELECTOR_SEASON_POINTS_OPPONENTS).unwrap(), selector_points_opponent: Selector::parse(SELECTOR_SEASON_POINTS_OPPONENTS).unwrap(),
selector_match_page: Selector::parse(SELECTOR_SEASON_MATCH_PAGE).unwrap(), selector_match_page: Selector::parse(SELECTOR_SEASON_MATCH_PAGE).unwrap(),
selector_team_name: Selector::parse(SELECTOR_TEAM_NAME).unwrap(),
selector_team_link: Selector::parse(SELECTOR_TEAM_LINK).unwrap(),
} }
} }
} }
impl Parser for TeamMatchesParser { impl Parser for TeamMatchesParser {
type Output = Vec<TeamSeason>; type Output = TeamMatches;
fn parse(&self, document: &str) -> Result<Self::Output> { fn parse(&self, document: &str) -> Result<Self::Output> {
let document = Html::parse_document(document); let document = Html::parse_document(document);
document let seasons = document
.select(&self.selector_title) .select(&self.selector_title)
.zip(document.select(&self.selector_season))
.zip(document.select(&self.selector_matches)) .zip(document.select(&self.selector_matches))
.map(|(title, matches)| { .map(|((title, season), matches)| {
let title = title.first_text().ok_or(ParseError::EmptyText { let format = title.first_text().ok_or(ParseError::EmptyText {
selector: SELECTOR_SEASON_TITLE, selector: SELECTOR_SEASON_TITLE,
role: "season title", role: "season title",
})?; })?;
let season: u32 = title.trim_start_matches("Season ").parse().map_err(|_| {
let format = format
.split(' ')
.find_map(|part| GameMode::from_str(part).ok())
.ok_or(ParseError::InvalidText {
text: format.into(),
role: "season format",
})?;
let season = season.first_text().ok_or(ParseError::EmptyText {
selector: SELECTOR_SEASON_SEASON,
role: "season title",
})?;
let season: u32 = season.trim_start_matches("Season ").parse().map_err(|_| {
ParseError::InvalidText { ParseError::InvalidText {
text: title.to_string(), text: season.to_string(),
role: "season title", role: "season title",
} }
})?; })?;
@ -167,10 +195,7 @@ impl Parser for TeamMatchesParser {
let opponent = opponent_link let opponent = opponent_link
.map(|link| { .map(|link| {
let name = link.first_text().ok_or(ParseError::EmptyText { let name = link.first_text().unwrap_or_default();
selector: SELECTOR_SEASON_OPPONENT,
role: "match opponent",
})?;
let id = team_id_from_link(link.attr("href").unwrap_or_default())?; let id = team_id_from_link(link.attr("href").unwrap_or_default())?;
Result::<_, ParseError>::Ok(TeamRef { Result::<_, ParseError>::Ok(TeamRef {
name: name.to_string(), name: name.to_string(),
@ -208,7 +233,12 @@ impl Parser for TeamMatchesParser {
Ok(TeamSeasonMatch { Ok(TeamSeasonMatch {
week, week,
date: date.to_string(), date: date.to_string(),
side: side.to_string(), side: side.parse::<Side>().map_err(|error| {
ParseError::InvalidText {
text: error.text,
role: "match side",
}
})?,
map: map.to_string(), map: map.to_string(),
division: division.to_string(), division: division.to_string(),
result, result,
@ -216,8 +246,34 @@ impl Parser for TeamMatchesParser {
}) })
.collect::<Result<_>>()?; .collect::<Result<_>>()?;
Ok(TeamSeason { season, matches }) Ok(TeamSeason {
season,
matches,
format,
})
}) })
.collect::<Result<Vec<_>>>() .collect::<Result<Vec<_>>>()?;
let team_id = document
.select(&self.selector_team_link)
.next()
.and_then(|link| team_id_from_link(link.attr("href").unwrap_or_default()).ok())
.ok_or(ParseError::ElementNotFound {
selector: SELECTOR_TEAM_LINK,
role: "match team link",
})?;
let team_name = select_text(document.root_element(), &self.selector_team_name).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_TEAM_NAME,
role: "match team name",
},
)?;
let team = TeamRef {
id: team_id,
name: team_name.into(),
};
Ok(TeamMatches { team, seasons })
} }
} }

File diff suppressed because it is too large Load diff

View file

@ -2,146 +2,153 @@
source: tests/snapshot.rs source: tests/snapshot.rs
expression: parsed expression: parsed
--- ---
[ {
{ "team": {
"season": 1, "name": "ChaosTheory",
"matches": [ "id": 2157
{ },
"division": "Steel", "seasons": [
"week": 1, {
"date": "Sun Oct 28", "season": 1,
"side": "visiting", "format": "8v8",
"result": { "matches": [
"state": "unknown", {
"opponent": { "division": "Steel",
"name": "[$0.00]", "week": 1,
"id": 2137 "date": "Sun Oct 28",
} "side": "visiting",
"result": {
"state": "unknown",
"opponent": {
"name": "[$0.00]",
"id": 2137
}
},
"map": "cp_gravelpit"
}, },
"map": "cp_gravelpit" {
}, "division": "Platinum",
{ "week": 2,
"division": "Platinum", "date": "Sun Nov 04",
"week": 2, "side": "home",
"date": "Sun Nov 04", "result": {
"side": "home", "state": "unknown",
"result": { "opponent": {
"state": "unknown", "name": "-DeX-",
"opponent": { "id": 2130
"name": "-DeX-", }
"id": 2130 },
} "map": "cp_gravelpit"
}, },
"map": "cp_gravelpit" {
}, "division": "Platinum",
{ "week": 3,
"division": "Platinum", "date": "Sun Nov 11",
"week": 3, "side": "home",
"date": "Sun Nov 11", "result": {
"side": "home", "state": "unknown",
"result": { "opponent": {
"state": "unknown", "name": "[UGS]",
"opponent": { "id": 2127
"name": "[UGS]", }
"id": 2127 },
} "map": "cp_dustbowl"
}, },
"map": "cp_dustbowl" {
}, "division": "Platinum",
{ "week": 4,
"division": "Platinum", "date": "Sun Nov 18",
"week": 4, "side": "home",
"date": "Sun Nov 18", "result": {
"side": "home", "state": "bye_week"
"result": { },
"state": "bye_week" "map": "cp_dustbowl"
}, },
"map": "cp_dustbowl" {
}, "division": "Platinum",
{ "week": 6,
"division": "Platinum", "date": "Mon Dec 10",
"week": 6, "side": "home",
"date": "Mon Dec 10", "result": {
"side": "home", "state": "unknown",
"result": { "opponent": {
"state": "unknown", "name": ".tKd",
"opponent": { "id": 2169
"name": ".tKd", }
"id": 2169 },
} "map": "ctf_impact"
}, },
"map": "ctf_impact" {
}, "division": "Platinum",
{ "week": 7,
"division": "Platinum", "date": "Mon Dec 17",
"week": 7, "side": "visiting",
"date": "Mon Dec 17", "result": {
"side": "visiting", "state": "unknown",
"result": { "opponent": {
"state": "unknown", "name": "[FOoM]",
"opponent": { "id": 2172
"name": "[FOoM]", }
"id": 2172 },
} "map": "cp_science2"
}, },
"map": "cp_science2" {
}, "division": "Platinum",
{ "week": 11,
"division": "Platinum", "date": "Mon Jan 14",
"week": 11, "side": "home",
"date": "Mon Jan 14", "result": {
"side": "home", "state": "unknown",
"result": { "opponent": {
"state": "unknown", "name": "[SWAT]",
"opponent": { "id": 2188
"name": "[SWAT]", }
"id": 2188 },
} "map": "cp_junction"
}, },
"map": "cp_junction" {
}, "division": "Platinum",
{ "week": 12,
"division": "Platinum", "date": "Mon Jan 21",
"week": 12, "side": "home",
"date": "Mon Jan 21", "result": {
"side": "home", "state": "unknown",
"result": { "opponent": {
"state": "unknown", "name": "[FOoM]",
"opponent": { "id": 2172
"name": "[FOoM]", }
"id": 2172 },
} "map": "cp_castle3"
}, },
"map": "cp_castle3" {
}, "division": "Platinum",
{ "week": 13,
"division": "Platinum", "date": "Mon Jan 28",
"week": 13, "side": "home",
"date": "Mon Jan 28", "result": {
"side": "home", "state": "unknown",
"result": { "opponent": {
"state": "unknown", "name": "[=1=]",
"opponent": { "id": 2197
"name": "[=1=]", }
"id": 2197 },
} "map": "cp_dustbowl"
}, },
"map": "cp_dustbowl" {
}, "division": "Platinum",
{ "week": 14,
"division": "Platinum", "date": "Mon Feb 04",
"week": 14, "side": "home",
"date": "Mon Feb 04", "result": {
"side": "home", "state": "unknown",
"result": { "opponent": {
"state": "unknown", "name": "[FOoM]",
"opponent": { "id": 2172
"name": "[FOoM]", }
"id": 2172 },
} "map": "cp_science"
}, }
"map": "cp_science" ]
} }
] ]
} }
]

View file

@ -261,24 +261,96 @@ pub struct RosterHistory {
pub role: MembershipRole, pub role: MembershipRole,
} }
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct TeamMatches {
pub team: TeamRef,
pub seasons: Vec<TeamSeason>,
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct TeamSeason { pub struct TeamSeason {
pub season: u32, pub season: u32,
pub format: GameMode,
pub matches: Vec<TeamSeasonMatch>, pub matches: Vec<TeamSeasonMatch>,
} }
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
pub enum Side {
Home,
Visiting,
}
#[derive(Debug, Clone, Error)]
#[error("Invalid side {text}")]
pub struct InvalidSide {
pub text: String,
}
impl FromStr for Side {
type Err = InvalidSide;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"home" => Ok(Side::Home),
"visiting" => Ok(Side::Visiting),
_ => Err(InvalidSide { text: s.into() }),
}
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct TeamSeasonMatch { pub struct TeamSeasonMatch {
pub division: String, pub division: String,
pub week: u8, pub week: u8,
pub date: String, pub date: String,
pub side: String, pub side: Side,
pub result: MatchResult, pub result: MatchResult,
pub map: String, pub map: String,
} }
impl TeamSeasonMatch {
pub fn match_info(&self, team: &TeamRef, format: GameMode) -> Option<MatchInfo> {
match &self.result {
MatchResult::Played {
opponent,
score,
score_opponent,
..
}
| MatchResult::Pending {
opponent,
score,
score_opponent,
..
} => {
let (team_home, team_away, score_home, score_away) = if self.side == Side::Home {
(team.clone(), opponent.clone(), *score, *score_opponent)
} else {
(opponent.clone(), team.clone(), *score_opponent, *score)
};
Some(MatchInfo {
comment: None,
comment_author: None,
team_home,
team_away,
score_home,
score_away,
map: self.map.clone(),
week: self.week,
format,
default_date: self.date.clone(),
})
}
_ => None,
}
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "serde", serde(rename_all = "snake_case", tag = "state"))] #[cfg_attr(feature = "serde", serde(rename_all = "snake_case", tag = "state"))]
@ -306,8 +378,7 @@ pub enum MatchResult {
impl MatchResult { impl MatchResult {
pub fn match_id(&self) -> Option<u32> { pub fn match_id(&self) -> Option<u32> {
match self { match self {
MatchResult::Played { id, .. } => Some(*id), MatchResult::Played { id, .. } | MatchResult::Pending { id, .. } => Some(*id),
MatchResult::Pending { id, .. } => Some(*id),
_ => None, _ => None,
} }
} }