mirror of
https://codeberg.org/icewind/ugc-scaper.git
synced 2026-06-03 10:14:11 +02:00
work
This commit is contained in:
parent
0f5ea2ebda
commit
53cc7822c4
26 changed files with 31748 additions and 73 deletions
93
src/data.rs
93
src/data.rs
|
|
@ -1,5 +1,5 @@
|
|||
use steamid_ng::SteamID;
|
||||
use time::Date;
|
||||
pub use steamid_ng::SteamID;
|
||||
use time::{Date, OffsetDateTime};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
|
|
@ -42,3 +42,92 @@ pub struct MembershipHistory {
|
|||
pub joined: Date,
|
||||
pub left: Option<Date>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
pub struct Team {
|
||||
pub name: String,
|
||||
pub tag: String,
|
||||
pub image: String,
|
||||
pub format: String,
|
||||
pub timezone: String,
|
||||
pub division: String,
|
||||
pub description: String,
|
||||
pub titles: Vec<String>,
|
||||
pub members: Vec<Membership>,
|
||||
pub results: Vec<Record>,
|
||||
pub name_changes: Vec<NameChange>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
pub struct NameChange {
|
||||
pub from_tag: String,
|
||||
pub from: String,
|
||||
pub to_tag: String,
|
||||
pub to: String,
|
||||
pub date: Date,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
pub struct Membership {
|
||||
pub name: String,
|
||||
pub steam_id: SteamID,
|
||||
pub role: String,
|
||||
pub since: OffsetDateTime,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
pub struct Record {
|
||||
pub season: u32,
|
||||
pub division: String,
|
||||
pub wins: u8,
|
||||
pub losses: u8,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
pub struct RosterHistory {
|
||||
pub name: String,
|
||||
pub steam_id: SteamID,
|
||||
pub joined: Date,
|
||||
pub left: Option<Date>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
pub struct TeamSeason {
|
||||
pub season: u32,
|
||||
pub matches: Vec<TeamSeasonMatch>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
pub struct TeamSeasonMatch {
|
||||
pub division: String,
|
||||
pub week: u8,
|
||||
pub date: String,
|
||||
pub side: String,
|
||||
pub result: MatchResult,
|
||||
pub map: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
pub enum MatchResult {
|
||||
Played {
|
||||
opponent: TeamRef,
|
||||
score: u8,
|
||||
score_opponent: u8,
|
||||
match_points: f32,
|
||||
match_points_opponent: f32,
|
||||
},
|
||||
Pending {
|
||||
opponent: TeamRef,
|
||||
score: u8,
|
||||
score_opponent: u8,
|
||||
},
|
||||
ByeWeek,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,16 +1,14 @@
|
|||
use miette::Diagnostic;
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error, Diagnostic)]
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ScrapeError {
|
||||
#[error("Failed to request data: {0:#}")]
|
||||
Request(#[from] reqwest::Error),
|
||||
#[error(transparent)]
|
||||
#[diagnostic(transparent)]
|
||||
Parse(#[from] ParseError),
|
||||
}
|
||||
|
||||
#[derive(Debug, Error, Diagnostic, Clone)]
|
||||
#[derive(Debug, Error, Clone)]
|
||||
pub enum ParseError {
|
||||
#[error("Couldn't find expected element '{selector}' for {role}")]
|
||||
ElementNotFound {
|
||||
|
|
@ -22,6 +20,8 @@ pub enum ParseError {
|
|||
selector: &'static str,
|
||||
role: &'static str,
|
||||
},
|
||||
#[error("Invalid text for {role}: {text}")]
|
||||
InvalidText { text: String, role: &'static str },
|
||||
#[error("Invalid link for {role}: {link}")]
|
||||
InvalidLink { link: String, role: &'static str },
|
||||
#[error("Invalid date for {role}: {date}")]
|
||||
|
|
|
|||
63
src/lib.rs
63
src/lib.rs
|
|
@ -3,11 +3,14 @@ mod error;
|
|||
#[doc(hidden)]
|
||||
pub mod parser;
|
||||
|
||||
use crate::data::{MembershipHistory, Player};
|
||||
use crate::parser::{Parser, PlayerDetailsParser, PlayerParser};
|
||||
use crate::data::{MembershipHistory, Player, RosterHistory, Team, TeamSeason};
|
||||
use crate::parser::{
|
||||
Parser, PlayerDetailsParser, PlayerParser, TeamMatchesParser, TeamParser,
|
||||
TeamRosterHistoryParser,
|
||||
};
|
||||
pub use error::*;
|
||||
use reqwest::Client;
|
||||
use steamid_ng::SteamID;
|
||||
pub use steamid_ng::SteamID;
|
||||
|
||||
pub type Result<T, E = ScrapeError> = std::result::Result<T, E>;
|
||||
|
||||
|
|
@ -16,17 +19,25 @@ pub struct UgcClient {
|
|||
client: Client,
|
||||
player_parser: PlayerParser,
|
||||
player_detail_parser: PlayerDetailsParser,
|
||||
team_parser: TeamParser,
|
||||
team_roster_history_parser: TeamRosterHistoryParser,
|
||||
team_matches_parser: TeamMatchesParser,
|
||||
}
|
||||
|
||||
/// "API client" for ugc by scraping the website
|
||||
impl UgcClient {
|
||||
pub fn new() -> Self {
|
||||
UgcClient {
|
||||
client: Client::default(),
|
||||
player_parser: PlayerParser::new(),
|
||||
player_detail_parser: PlayerDetailsParser::new(),
|
||||
team_parser: TeamParser::new(),
|
||||
team_roster_history_parser: TeamRosterHistoryParser::new(),
|
||||
team_matches_parser: TeamMatchesParser::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve player information
|
||||
pub async fn player(&self, steam_id: SteamID) -> Result<Player> {
|
||||
let body = self
|
||||
.client
|
||||
|
|
@ -41,6 +52,7 @@ impl UgcClient {
|
|||
self.player_parser.parse(&body)
|
||||
}
|
||||
|
||||
/// Retrieve team membership history for a player
|
||||
pub async fn player_team_history(&self, steam_id: SteamID) -> Result<Vec<MembershipHistory>> {
|
||||
let body = self
|
||||
.client
|
||||
|
|
@ -54,4 +66,49 @@ impl UgcClient {
|
|||
.await?;
|
||||
self.player_detail_parser.parse(&body)
|
||||
}
|
||||
|
||||
/// Retrieve team information
|
||||
pub async fn team(&self, id: u32) -> Result<Team> {
|
||||
let body = self
|
||||
.client
|
||||
.get(&format!(
|
||||
"https://www.ugcleague.com/team_page.cfm?clan_id={}",
|
||||
id
|
||||
))
|
||||
.send()
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
self.team_parser.parse(&body)
|
||||
}
|
||||
|
||||
/// Retrieve team roster history
|
||||
pub async fn team_roster_history(&self, id: u32) -> Result<Vec<RosterHistory>> {
|
||||
let body = self
|
||||
.client
|
||||
.get(&format!(
|
||||
"https://www.ugcleague.com/team_page_rosterhistory.cfm?clan_id={}",
|
||||
id
|
||||
))
|
||||
.send()
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
self.team_roster_history_parser.parse(&body)
|
||||
}
|
||||
|
||||
/// Retrieve team match history
|
||||
pub async fn team_matches(&self, id: u32) -> Result<Vec<TeamSeason>> {
|
||||
let body = self
|
||||
.client
|
||||
.get(&format!(
|
||||
"https://www.ugcleague.com/team_page_matches.cfm?clan_id={}",
|
||||
id
|
||||
))
|
||||
.send()
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
self.team_matches_parser.parse(&body)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,13 +1,20 @@
|
|||
use crate::{ParseError, Result};
|
||||
use scraper::{ElementRef, Selector};
|
||||
use steamid_ng::SteamID;
|
||||
use time::format_description::FormatItem;
|
||||
use time::macros::format_description;
|
||||
|
||||
mod player;
|
||||
mod player_details;
|
||||
mod team;
|
||||
mod team_matches;
|
||||
mod team_roster_history;
|
||||
|
||||
pub use player::*;
|
||||
pub use player_details::*;
|
||||
pub use team::*;
|
||||
pub use team_matches::*;
|
||||
pub use team_roster_history::*;
|
||||
|
||||
pub trait Parser {
|
||||
type Output;
|
||||
|
|
@ -47,6 +54,11 @@ fn select_last_text<'a>(el: ElementRef<'a>, selector: &Selector) -> Option<&'a s
|
|||
|
||||
const DATE_FORMAT: &[FormatItem<'static>] =
|
||||
format_description!("[month padding:none]/[day padding:none]/[year]");
|
||||
const MEMBER_DATE_FORMAT: &[FormatItem<'static>] = format_description!(
|
||||
"[month repr:short] [day padding:none], [year]\n/\n[hour padding:none]:[minute] [period]\n(ET)"
|
||||
);
|
||||
const ROSTER_HISTORY_DATE_FORMAT: &[FormatItem<'static>] =
|
||||
format_description!("[month repr:short] [day padding:none], [year]");
|
||||
|
||||
fn team_id_from_link(link: &str) -> Result<u32, ParseError> {
|
||||
link.rsplit_once('=')
|
||||
|
|
@ -56,3 +68,13 @@ fn team_id_from_link(link: &str) -> Result<u32, ParseError> {
|
|||
role: "team id",
|
||||
})
|
||||
}
|
||||
|
||||
fn steam_id_from_link(link: &str) -> Result<SteamID, ParseError> {
|
||||
link.rsplit_once('=')
|
||||
.and_then(|part| part.1.parse::<u64>().ok())
|
||||
.ok_or_else(|| ParseError::InvalidLink {
|
||||
link: link.to_string(),
|
||||
role: "user id",
|
||||
})
|
||||
.map(SteamID::from)
|
||||
}
|
||||
|
|
|
|||
341
src/parser/team.rs
Normal file
341
src/parser/team.rs
Normal file
|
|
@ -0,0 +1,341 @@
|
|||
use super::{ElementExt, Parser};
|
||||
use crate::data::{Membership, NameChange, Record, Team};
|
||||
use crate::parser::{select_text, steam_id_from_link, DATE_FORMAT, MEMBER_DATE_FORMAT};
|
||||
use crate::{ParseError, Result};
|
||||
use scraper::{Html, Selector};
|
||||
use time::{Date, PrimitiveDateTime, UtcOffset};
|
||||
|
||||
const SELECTOR_TEAM_NAME: &str = ".container .col-md-12 h1 > b";
|
||||
const SELECTOR_TEAM_TAG: &str = ".container .col-md-12 h1 > span";
|
||||
const SELECTOR_TEAM_IMAGE: &str = ".container .col-md-12 a > img";
|
||||
|
||||
const SELECTOR_TEAM_FORMAT: &str = ".container .col-md-3 .white-row-small h5 .text-danger b";
|
||||
const SELECTOR_TEAM_DIVISION: &str = ".container .col-md-3 .white-row-small h5 > b";
|
||||
const SELECTOR_TEAM_TIMEZONE: &str = ".container .col-md-3 .white-row-small p > small > b";
|
||||
const SELECTOR_TEAM_DESCRIPTION: &str =
|
||||
".container .col-md-3 .white-row-small p:nth-child(4) > small";
|
||||
const SELECTOR_TEAM_TITLES: &str = ".container .col-md-3 .white-row-small p > .text-warning";
|
||||
|
||||
const SELECTOR_TEAM_MEMBER_ROW: &str =
|
||||
".container .white-row-small > .row-fluid > .col-md-12 > .white-row-light-small";
|
||||
const SELECTOR_TEAM_MEMBER_LINK: &str = "b > a[href^=\"players_page\"]";
|
||||
const SELECTOR_TEAM_MEMBER_ROLE: &str = ".tinytext";
|
||||
const SELECTOR_TEAM_MEMBER_SINCE: &str = ".tinytext > em";
|
||||
|
||||
const SELECTOR_TEAM_RECORDS: &str =
|
||||
".container .col-md-3 .white-row-small .table-responsive > table tbody tr";
|
||||
const SELECTOR_TEAM_RECORD_SEASON: &str = "td:nth-child(1) small span b";
|
||||
const SELECTOR_TEAM_RECORD_DIVISION: &str = "td:nth-child(2) small";
|
||||
const SELECTOR_TEAM_RECORD_RESULT: &str = "td:nth-child(3)";
|
||||
|
||||
const SELECTOR_TEAM_NAME_CHANGE: &str =
|
||||
".white-row-small:nth-child(3) .table-responsive table tbody tr";
|
||||
const SELECTOR_TEAM_NAME_FROM_TAG: &str = "td:nth-child(1) small";
|
||||
const SELECTOR_TEAM_NAME_FROM_NAME: &str = "td:nth-child(2) small";
|
||||
const SELECTOR_TEAM_NAME_TO_TAG: &str = "td:nth-child(3) small";
|
||||
const SELECTOR_TEAM_NAME_TO_NAME: &str = "td:nth-child(4) small";
|
||||
const SELECTOR_TEAM_NAME_DATE: &str = "td:nth-child(5) small";
|
||||
|
||||
pub struct TeamParser {
|
||||
selector_name: Selector,
|
||||
selector_tag: Selector,
|
||||
selector_image: Selector,
|
||||
|
||||
selector_team_format: Selector,
|
||||
selector_team_division: Selector,
|
||||
selector_team_timezone: Selector,
|
||||
selector_team_description: Selector,
|
||||
selector_team_titles: Selector,
|
||||
|
||||
selector_team_member_row: Selector,
|
||||
selector_team_member_link: Selector,
|
||||
selector_team_member_role: Selector,
|
||||
selector_team_member_since: Selector,
|
||||
|
||||
selector_team_records: Selector,
|
||||
selector_team_record_season: Selector,
|
||||
selector_team_record_division: Selector,
|
||||
selector_team_record_result: Selector,
|
||||
|
||||
selector_team_name_item: Selector,
|
||||
selector_team_name_from_tag: Selector,
|
||||
selector_team_name_from_name: Selector,
|
||||
selector_team_name_to_tag: Selector,
|
||||
selector_team_name_to_name: Selector,
|
||||
selector_team_name_date: Selector,
|
||||
}
|
||||
|
||||
impl Default for TeamParser {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl TeamParser {
|
||||
pub fn new() -> Self {
|
||||
TeamParser {
|
||||
selector_name: Selector::parse(SELECTOR_TEAM_NAME).unwrap(),
|
||||
selector_tag: Selector::parse(SELECTOR_TEAM_TAG).unwrap(),
|
||||
selector_image: Selector::parse(SELECTOR_TEAM_IMAGE).unwrap(),
|
||||
|
||||
selector_team_format: Selector::parse(SELECTOR_TEAM_FORMAT).unwrap(),
|
||||
selector_team_division: Selector::parse(SELECTOR_TEAM_DIVISION).unwrap(),
|
||||
selector_team_timezone: Selector::parse(SELECTOR_TEAM_TIMEZONE).unwrap(),
|
||||
selector_team_description: Selector::parse(SELECTOR_TEAM_DESCRIPTION).unwrap(),
|
||||
selector_team_titles: Selector::parse(SELECTOR_TEAM_TITLES).unwrap(),
|
||||
|
||||
selector_team_member_row: Selector::parse(SELECTOR_TEAM_MEMBER_ROW).unwrap(),
|
||||
selector_team_member_link: Selector::parse(SELECTOR_TEAM_MEMBER_LINK).unwrap(),
|
||||
selector_team_member_role: Selector::parse(SELECTOR_TEAM_MEMBER_ROLE).unwrap(),
|
||||
selector_team_member_since: Selector::parse(SELECTOR_TEAM_MEMBER_SINCE).unwrap(),
|
||||
|
||||
selector_team_records: Selector::parse(SELECTOR_TEAM_RECORDS).unwrap(),
|
||||
selector_team_record_season: Selector::parse(SELECTOR_TEAM_RECORD_SEASON).unwrap(),
|
||||
selector_team_record_division: Selector::parse(SELECTOR_TEAM_RECORD_DIVISION).unwrap(),
|
||||
selector_team_record_result: Selector::parse(SELECTOR_TEAM_RECORD_RESULT).unwrap(),
|
||||
|
||||
selector_team_name_item: Selector::parse(SELECTOR_TEAM_NAME_CHANGE).unwrap(),
|
||||
selector_team_name_from_tag: Selector::parse(SELECTOR_TEAM_NAME_FROM_TAG).unwrap(),
|
||||
selector_team_name_from_name: Selector::parse(SELECTOR_TEAM_NAME_FROM_NAME).unwrap(),
|
||||
selector_team_name_to_tag: Selector::parse(SELECTOR_TEAM_NAME_TO_TAG).unwrap(),
|
||||
selector_team_name_to_name: Selector::parse(SELECTOR_TEAM_NAME_TO_NAME).unwrap(),
|
||||
selector_team_name_date: Selector::parse(SELECTOR_TEAM_NAME_DATE).unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser for TeamParser {
|
||||
type Output = Team;
|
||||
|
||||
fn parse(&self, document: &str) -> Result<Self::Output> {
|
||||
let document = Html::parse_document(document);
|
||||
let root = document.root_element();
|
||||
let name = select_text(root, &self.selector_name)
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_NAME,
|
||||
role: "team name",
|
||||
})?
|
||||
.to_string();
|
||||
|
||||
let tag = select_text(root, &self.selector_tag)
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_TAG,
|
||||
role: "team tag",
|
||||
})?
|
||||
.to_string();
|
||||
|
||||
let image =
|
||||
document
|
||||
.select(&self.selector_image)
|
||||
.next()
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_IMAGE,
|
||||
role: "team image",
|
||||
})?;
|
||||
let image = image
|
||||
.attr("data-cfsrc")
|
||||
.or_else(|| image.attr("src"))
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
|
||||
let format = select_text(root, &self.selector_team_format)
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_FORMAT,
|
||||
role: "team format",
|
||||
})?
|
||||
.to_string();
|
||||
|
||||
let division = select_text(root, &self.selector_team_division)
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_DIVISION,
|
||||
role: "team division",
|
||||
})?
|
||||
.to_string();
|
||||
|
||||
let timezone = select_text(root, &self.selector_team_timezone)
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_TIMEZONE,
|
||||
role: "team timzone",
|
||||
})?
|
||||
.to_string();
|
||||
|
||||
let description = select_text(root, &self.selector_team_description)
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_DESCRIPTION,
|
||||
role: "team description",
|
||||
})?
|
||||
.replace('\n', " ");
|
||||
|
||||
let titles = document
|
||||
.select(&self.selector_team_titles)
|
||||
.next()
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_TITLES,
|
||||
role: "team titles",
|
||||
})?
|
||||
.text()
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(String::from)
|
||||
.collect();
|
||||
|
||||
let results = document
|
||||
.select(&self.selector_team_records)
|
||||
.map(|record| {
|
||||
let season = select_text(record, &self.selector_team_record_season).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_RECORD_SEASON,
|
||||
role: "team record season",
|
||||
},
|
||||
)?;
|
||||
let division = select_text(record, &self.selector_team_record_division)
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_RECORD_DIVISION,
|
||||
role: "team record division",
|
||||
})?
|
||||
.to_string();
|
||||
let result = select_text(record, &self.selector_team_record_result).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_RECORD_RESULT,
|
||||
role: "team record result",
|
||||
},
|
||||
)?;
|
||||
|
||||
let (wins, losses) =
|
||||
result
|
||||
.split_once('-')
|
||||
.ok_or_else(|| ParseError::InvalidText {
|
||||
text: result.to_string(),
|
||||
role: "team record result",
|
||||
})?;
|
||||
|
||||
Ok(Record {
|
||||
season: season.parse().map_err(|_| ParseError::InvalidText {
|
||||
text: season.to_string(),
|
||||
role: "team record season",
|
||||
})?,
|
||||
division,
|
||||
wins: wins.parse().map_err(|_| ParseError::InvalidText {
|
||||
text: wins.to_string(),
|
||||
role: "team record wins",
|
||||
})?,
|
||||
losses: losses.parse().map_err(|_| ParseError::InvalidText {
|
||||
text: losses.to_string(),
|
||||
role: "team record losses",
|
||||
})?,
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
let members = document
|
||||
.select(&self.selector_team_member_row)
|
||||
.map(|row| {
|
||||
let link = row.select(&self.selector_team_member_link).next().ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_MEMBER_LINK,
|
||||
role: "team member link",
|
||||
},
|
||||
)?;
|
||||
let name = link
|
||||
.first_text()
|
||||
.ok_or(ParseError::EmptyText {
|
||||
selector: SELECTOR_TEAM_MEMBER_LINK,
|
||||
role: "team member link",
|
||||
})?
|
||||
.to_string();
|
||||
let link = link.attr("href").unwrap_or_default();
|
||||
|
||||
let role = select_text(row, &self.selector_team_member_role)
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_MEMBER_ROLE,
|
||||
role: "team member role",
|
||||
})?
|
||||
.split('\n')
|
||||
.next()
|
||||
.unwrap();
|
||||
let since = select_text(row, &self.selector_team_member_since).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_MEMBER_SINCE,
|
||||
role: "team member since",
|
||||
},
|
||||
)?;
|
||||
let role = role.trim().to_string();
|
||||
let since = since.trim();
|
||||
let since = PrimitiveDateTime::parse(since, MEMBER_DATE_FORMAT)
|
||||
.map_err(|_| ParseError::InvalidDate {
|
||||
role: "member join date",
|
||||
date: since.to_string(),
|
||||
})?
|
||||
.assume_offset(UtcOffset::from_hms(-5, 0, 0).unwrap());
|
||||
|
||||
Ok(Membership {
|
||||
name,
|
||||
steam_id: steam_id_from_link(link)?,
|
||||
role,
|
||||
since,
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
let name_changes = document
|
||||
.select(&self.selector_team_name_item)
|
||||
.map(|row| {
|
||||
let from_tag = select_text(row, &self.selector_team_name_from_tag).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_NAME_FROM_TAG,
|
||||
role: "team name change from tag",
|
||||
},
|
||||
)?;
|
||||
let from_name = select_text(row, &self.selector_team_name_from_name).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_NAME_FROM_NAME,
|
||||
role: "team name change from name",
|
||||
},
|
||||
)?;
|
||||
let to_tag = select_text(row, &self.selector_team_name_to_tag).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_NAME_TO_TAG,
|
||||
role: "team name change to tag",
|
||||
},
|
||||
)?;
|
||||
let to_name = select_text(row, &self.selector_team_name_to_name).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_NAME_TO_NAME,
|
||||
role: "team name change from name",
|
||||
},
|
||||
)?;
|
||||
let date = select_text(row, &self.selector_team_name_date).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_NAME_DATE,
|
||||
role: "team name change date",
|
||||
},
|
||||
)?;
|
||||
let date = Date::parse(date, DATE_FORMAT).map_err(|_| ParseError::InvalidDate {
|
||||
date: date.to_string(),
|
||||
role: "team name change date",
|
||||
})?;
|
||||
Ok(NameChange {
|
||||
from_tag: from_tag.to_string(),
|
||||
from: from_name.to_string(),
|
||||
to_tag: to_tag.to_string(),
|
||||
to: to_name.to_string(),
|
||||
date,
|
||||
})
|
||||
})
|
||||
.collect::<Result<_>>()?;
|
||||
|
||||
Ok(Team {
|
||||
name,
|
||||
description,
|
||||
division,
|
||||
timezone,
|
||||
format,
|
||||
image,
|
||||
tag,
|
||||
titles,
|
||||
results,
|
||||
members,
|
||||
name_changes,
|
||||
})
|
||||
}
|
||||
}
|
||||
212
src/parser/team_matches.rs
Normal file
212
src/parser/team_matches.rs
Normal file
|
|
@ -0,0 +1,212 @@
|
|||
use super::Parser;
|
||||
use crate::data::{MatchResult, TeamRef, TeamSeason, TeamSeasonMatch};
|
||||
use crate::parser::{select_text, team_id_from_link, ElementExt};
|
||||
use crate::{ParseError, Result};
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
const SELECTOR_SEASON_TITLE: &str =
|
||||
".container table.table.table-condensed.table-striped thead h4 b";
|
||||
const SELECTOR_SEASON_MATCHES: &str =
|
||||
".container table.table.table-condensed.table-striped tbody:nth-child(3n)";
|
||||
const SELECTOR_SEASON_MATCH: &str = "tr:not(:last-child)";
|
||||
const SELECTOR_SEASON_DIVISION: &str = "td:nth-child(1) small";
|
||||
const SELECTOR_SEASON_WEEK: &str = "td:nth-child(2) small";
|
||||
const SELECTOR_SEASON_DATE: &str = "td:nth-child(3) small";
|
||||
const SELECTOR_SEASON_SIDE: &str = "td:nth-child(4) small";
|
||||
const SELECTOR_SEASON_OPPONENT: &str = "td:nth-child(6) a";
|
||||
const SELECTOR_SEASON_MAP: &str = "td:nth-child(7)";
|
||||
const SELECTOR_SEASON_SCORES: &str = "td:nth-child(8)";
|
||||
const SELECTOR_SEASON_POINTS: &str = "td:nth-child(9) small";
|
||||
const SELECTOR_SEASON_POINTS_OPPONENTS: &str = "td:nth-child(10) small";
|
||||
|
||||
pub struct TeamMatchesParser {
|
||||
selector_title: Selector,
|
||||
selector_matches: Selector,
|
||||
selector_match: Selector,
|
||||
selector_division: Selector,
|
||||
selector_week: Selector,
|
||||
selector_date: Selector,
|
||||
selector_side: Selector,
|
||||
selector_opponent: Selector,
|
||||
selector_map: Selector,
|
||||
selector_scores: Selector,
|
||||
selector_points: Selector,
|
||||
selector_points_opponent: Selector,
|
||||
}
|
||||
|
||||
impl Default for TeamMatchesParser {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl TeamMatchesParser {
|
||||
pub fn new() -> Self {
|
||||
TeamMatchesParser {
|
||||
selector_title: Selector::parse(SELECTOR_SEASON_TITLE).unwrap(),
|
||||
selector_matches: Selector::parse(SELECTOR_SEASON_MATCHES).unwrap(),
|
||||
selector_match: Selector::parse(SELECTOR_SEASON_MATCH).unwrap(),
|
||||
selector_division: Selector::parse(SELECTOR_SEASON_DIVISION).unwrap(),
|
||||
selector_week: Selector::parse(SELECTOR_SEASON_WEEK).unwrap(),
|
||||
selector_date: Selector::parse(SELECTOR_SEASON_DATE).unwrap(),
|
||||
selector_side: Selector::parse(SELECTOR_SEASON_SIDE).unwrap(),
|
||||
selector_opponent: Selector::parse(SELECTOR_SEASON_OPPONENT).unwrap(),
|
||||
selector_map: Selector::parse(SELECTOR_SEASON_MAP).unwrap(),
|
||||
selector_scores: Selector::parse(SELECTOR_SEASON_SCORES).unwrap(),
|
||||
selector_points: Selector::parse(SELECTOR_SEASON_POINTS).unwrap(),
|
||||
selector_points_opponent: Selector::parse(SELECTOR_SEASON_POINTS_OPPONENTS).unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser for TeamMatchesParser {
|
||||
type Output = Vec<TeamSeason>;
|
||||
|
||||
fn parse(&self, document: &str) -> Result<Self::Output> {
|
||||
let document = Html::parse_document(document);
|
||||
|
||||
document
|
||||
.select(&self.selector_title)
|
||||
.zip(document.select(&self.selector_matches))
|
||||
.map(|(title, matches)| {
|
||||
let title = title.first_text().ok_or(ParseError::EmptyText {
|
||||
selector: SELECTOR_SEASON_TITLE,
|
||||
role: "season title",
|
||||
})?;
|
||||
let season: u32 = title.trim_start_matches("Season ").parse().map_err(|_| {
|
||||
ParseError::InvalidText {
|
||||
text: title.to_string(),
|
||||
role: "season title",
|
||||
}
|
||||
})?;
|
||||
|
||||
let matches = matches
|
||||
.select(&self.selector_match)
|
||||
.map(|game| {
|
||||
let division = select_text(game, &self.selector_division).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_SEASON_DIVISION,
|
||||
role: "match division",
|
||||
},
|
||||
)?;
|
||||
let week = select_text(game, &self.selector_week).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_SEASON_WEEK,
|
||||
role: "match week",
|
||||
},
|
||||
)?;
|
||||
let week = week.parse().map_err(|_| ParseError::InvalidText {
|
||||
text: week.to_string(),
|
||||
role: "match week",
|
||||
})?;
|
||||
let date = select_text(game, &self.selector_date).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_SEASON_DATE,
|
||||
role: "match date",
|
||||
},
|
||||
)?;
|
||||
let side = select_text(game, &self.selector_side).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_SEASON_SIDE,
|
||||
role: "match side",
|
||||
},
|
||||
)?;
|
||||
let opponent_link = game.select(&self.selector_opponent).next();
|
||||
let map = select_text(game, &self.selector_map).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_SEASON_MAP,
|
||||
role: "match map",
|
||||
},
|
||||
)?;
|
||||
let scores = select_text(game, &self.selector_scores)
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_SEASON_SCORES,
|
||||
role: "match scores",
|
||||
})?
|
||||
.trim_start_matches('(')
|
||||
.trim_end_matches(')');
|
||||
let points = select_text(game, &self.selector_points);
|
||||
let points_opponent = select_text(game, &self.selector_points_opponent);
|
||||
|
||||
let points = points
|
||||
.map(|points| {
|
||||
points.parse().map_err(|_| ParseError::InvalidText {
|
||||
text: points.to_string(),
|
||||
role: "match points",
|
||||
})
|
||||
})
|
||||
.transpose()?;
|
||||
|
||||
let points_opponent = points_opponent
|
||||
.map(|points| {
|
||||
points.parse().map_err(|_| ParseError::InvalidText {
|
||||
text: points.to_string(),
|
||||
role: "match points opponent",
|
||||
})
|
||||
})
|
||||
.transpose()?;
|
||||
|
||||
let (score, score_opponent) =
|
||||
scores
|
||||
.split_once(" -\n")
|
||||
.ok_or_else(|| ParseError::InvalidText {
|
||||
text: scores.to_string(),
|
||||
role: "match scores",
|
||||
})?;
|
||||
let score = score.parse().map_err(|_| ParseError::InvalidText {
|
||||
text: scores.to_string(),
|
||||
role: "match scores",
|
||||
});
|
||||
let score_opponent =
|
||||
score_opponent.parse().map_err(|_| ParseError::InvalidText {
|
||||
text: scores.to_string(),
|
||||
role: "match scores",
|
||||
});
|
||||
|
||||
let opponent = opponent_link
|
||||
.map(|link| {
|
||||
let name = link.first_text().ok_or(ParseError::EmptyText {
|
||||
selector: SELECTOR_SEASON_OPPONENT,
|
||||
role: "match opponent",
|
||||
})?;
|
||||
let id = team_id_from_link(link.attr("href").unwrap_or_default())?;
|
||||
Result::<_, ParseError>::Ok(TeamRef {
|
||||
name: name.to_string(),
|
||||
id,
|
||||
})
|
||||
})
|
||||
.transpose()?;
|
||||
|
||||
let result = match (opponent, points, points_opponent) {
|
||||
(Some(opponent), Some(point), Some(points_opponent)) => {
|
||||
MatchResult::Played {
|
||||
opponent,
|
||||
score: score?,
|
||||
score_opponent: score_opponent?,
|
||||
match_points: point,
|
||||
match_points_opponent: points_opponent,
|
||||
}
|
||||
}
|
||||
(Some(opponent), None, None) => MatchResult::Pending {
|
||||
opponent,
|
||||
score: score?,
|
||||
score_opponent: score_opponent?,
|
||||
},
|
||||
_ => MatchResult::ByeWeek,
|
||||
};
|
||||
Ok(TeamSeasonMatch {
|
||||
week,
|
||||
date: date.to_string(),
|
||||
side: side.to_string(),
|
||||
map: map.to_string(),
|
||||
division: division.to_string(),
|
||||
result,
|
||||
})
|
||||
})
|
||||
.collect::<Result<_>>()?;
|
||||
|
||||
Ok(TeamSeason { season, matches })
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()
|
||||
}
|
||||
}
|
||||
97
src/parser/team_roster_history.rs
Normal file
97
src/parser/team_roster_history.rs
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
use super::Parser;
|
||||
use crate::data::RosterHistory;
|
||||
use crate::parser::{select_text, ROSTER_HISTORY_DATE_FORMAT};
|
||||
use crate::{ParseError, Result};
|
||||
use scraper::{Html, Selector};
|
||||
use steamid_ng::SteamID;
|
||||
use time::Date;
|
||||
|
||||
const SELECTOR_ROSTER_ITEM: &str =
|
||||
".container .white-row-small .row-fluid > .col-md-12 > .clearfix";
|
||||
const SELECTOR_ROSTER_NAME: &str = "h5 b";
|
||||
const SELECTOR_ROSTER_ID: &str = "h5 small";
|
||||
const SELECTOR_ROSTER_JOINED: &str = "span.text-success small";
|
||||
const SELECTOR_ROSTER_LEFT: &str = "span.text-danger small";
|
||||
|
||||
pub struct TeamRosterHistoryParser {
|
||||
selector_item: Selector,
|
||||
selector_name: Selector,
|
||||
selector_id: Selector,
|
||||
selector_joined: Selector,
|
||||
selector_left: Selector,
|
||||
}
|
||||
|
||||
impl Default for TeamRosterHistoryParser {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl TeamRosterHistoryParser {
|
||||
pub fn new() -> Self {
|
||||
TeamRosterHistoryParser {
|
||||
selector_item: Selector::parse(SELECTOR_ROSTER_ITEM).unwrap(),
|
||||
selector_name: Selector::parse(SELECTOR_ROSTER_NAME).unwrap(),
|
||||
selector_id: Selector::parse(SELECTOR_ROSTER_ID).unwrap(),
|
||||
selector_joined: Selector::parse(SELECTOR_ROSTER_JOINED).unwrap(),
|
||||
selector_left: Selector::parse(SELECTOR_ROSTER_LEFT).unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser for TeamRosterHistoryParser {
|
||||
type Output = Vec<RosterHistory>;
|
||||
|
||||
fn parse(&self, document: &str) -> Result<Self::Output> {
|
||||
let document = Html::parse_document(document);
|
||||
|
||||
document
|
||||
.select(&self.selector_item)
|
||||
.map(|item| {
|
||||
let name =
|
||||
select_text(item, &self.selector_name).ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_ROSTER_NAME,
|
||||
role: "member name",
|
||||
})?;
|
||||
let steam_id =
|
||||
select_text(item, &self.selector_id).ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_ROSTER_ID,
|
||||
role: "member steam id",
|
||||
})?;
|
||||
let joined = select_text(item, &self.selector_joined).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_ROSTER_JOINED,
|
||||
role: "member joined date",
|
||||
},
|
||||
)?;
|
||||
let left = select_text(item, &self.selector_left);
|
||||
|
||||
Ok(RosterHistory {
|
||||
name: name.to_string(),
|
||||
steam_id: SteamID::from_steam3(steam_id).map_err(|_| {
|
||||
ParseError::InvalidText {
|
||||
text: steam_id.to_string(),
|
||||
role: "member steam id",
|
||||
}
|
||||
})?,
|
||||
joined: Date::parse(joined, ROSTER_HISTORY_DATE_FORMAT).map_err(|_| {
|
||||
ParseError::InvalidDate {
|
||||
date: steam_id.to_string(),
|
||||
role: "member join date",
|
||||
}
|
||||
})?,
|
||||
left: left
|
||||
.map(|left| {
|
||||
Date::parse(left, ROSTER_HISTORY_DATE_FORMAT).map_err(|_| {
|
||||
ParseError::InvalidDate {
|
||||
date: steam_id.to_string(),
|
||||
role: "member join date",
|
||||
}
|
||||
})
|
||||
})
|
||||
.transpose()?,
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue