cleanups, tests, clippy

This commit is contained in:
Robin Appelman 2023-11-16 16:54:43 +01:00
commit a9a3751067
16 changed files with 5932 additions and 117 deletions

View file

@ -2,6 +2,7 @@ use steamid_ng::SteamID;
use time::Date;
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct Player {
pub name: String,
pub steam_id: SteamID,
@ -10,6 +11,7 @@ pub struct Player {
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct Honors {
pub format: String,
pub season: String,
@ -17,6 +19,7 @@ pub struct Honors {
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct TeamMemberShip {
pub team: TeamRef,
pub league: String,
@ -24,13 +27,16 @@ pub struct TeamMemberShip {
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct TeamRef {
pub name: String,
pub id: u32,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct MembershipHistory {
pub format: String,
pub team: TeamRef,
pub division: String,
pub joined: Date,

View file

@ -1,5 +1,5 @@
use thiserror::Error;
use miette::Diagnostic;
use thiserror::Error;
#[derive(Debug, Error, Diagnostic)]
pub enum ScrapeError {
@ -7,14 +7,23 @@ pub enum ScrapeError {
Request(#[from] reqwest::Error),
#[error(transparent)]
#[diagnostic(transparent)]
Parse(#[from] ParseError)
Parse(#[from] ParseError),
}
#[derive(Debug, Error, Diagnostic)]
#[derive(Debug, Error, Diagnostic, Clone)]
pub enum ParseError {
#[error("Couldn't find expected element '{selector}' for {role}")]
ElementNotFound {
selector: &'static str,
role: &'static str
}
}
role: &'static str,
},
#[error("Element '{selector}' does contain text for {role}")]
EmptyText {
selector: &'static str,
role: &'static str,
},
#[error("Invalid link for {role}: {link}")]
InvalidLink { link: String, role: &'static str },
#[error("Invalid date for {role}: {date}")]
InvalidDate { date: String, role: &'static str },
}

57
src/lib.rs Normal file
View file

@ -0,0 +1,57 @@
pub mod data;
mod error;
#[doc(hidden)]
pub mod parser;
use crate::data::{MembershipHistory, Player};
use crate::parser::{Parser, PlayerDetailsParser, PlayerParser};
pub use error::*;
use reqwest::Client;
use steamid_ng::SteamID;
pub type Result<T, E = ScrapeError> = std::result::Result<T, E>;
#[derive(Default)]
pub struct UgcClient {
client: Client,
player_parser: PlayerParser,
player_detail_parser: PlayerDetailsParser,
}
impl UgcClient {
pub fn new() -> Self {
UgcClient {
client: Client::default(),
player_parser: PlayerParser::new(),
player_detail_parser: PlayerDetailsParser::new(),
}
}
pub async fn player(&self, steam_id: SteamID) -> Result<Player> {
let body = self
.client
.get(&format!(
"https://www.ugcleague.com/players_page.cfm?player_id={}",
u64::from(steam_id)
))
.send()
.await?
.text()
.await?;
self.player_parser.parse(&body)
}
pub async fn player_team_history(&self, steam_id: SteamID) -> Result<Vec<MembershipHistory>> {
let body = self
.client
.get(&format!(
"https://www.ugcleague.com/players_page_details.cfm?player_id={}",
u64::from(steam_id)
))
.send()
.await?
.text()
.await?;
self.player_detail_parser.parse(&body)
}
}

View file

@ -1,22 +0,0 @@
pub mod data;
mod error;
mod parser;
use crate::parser::{Parser, PlayerDetailsParser, PlayerParser};
pub use error::*;
use main_error::MainResult;
use reqwest::get;
pub type Result<T, E = ScrapeError> = std::result::Result<T, E>;
#[tokio::main]
async fn main() -> MainResult {
let body =
get("https://www.ugcleague.com/players_page_details.cfm?player_id=76561198024494988")
.await?
.text()
.await?;
let parser = PlayerDetailsParser::new();
dbg!(parser.parse(&body)?);
Ok(())
}

View file

@ -1,5 +1,7 @@
use crate::Result;
use crate::{ParseError, Result};
use scraper::{ElementRef, Selector};
use time::format_description::FormatItem;
use time::macros::format_description;
mod player;
mod player_details;
@ -19,29 +21,38 @@ trait ElementExt<'a> {
impl<'a> ElementExt<'a> for ElementRef<'a> {
fn first_text(&self) -> Option<&'a str> {
self.text().filter(|s| !s.trim().is_empty()).next()
self.text().map(str::trim).find(|s| !s.is_empty())
}
fn nth_text(&self, n: usize) -> Option<&'a str> {
self.text()
.filter(|s| !s.trim().is_empty())
.skip(n - 1)
.next()
.map(|s| s.trim())
.nth(n - 1)
.map(str::trim)
}
}
fn select_text<'a>(el: ElementRef<'a>, selector: &Selector, default: &'static str) -> &'a str {
fn select_text<'a>(el: ElementRef<'a>, selector: &Selector) -> Option<&'a str> {
el.select(selector)
.next()
.and_then(|item| item.text().filter(|s| !s.trim().is_empty()).next())
.unwrap_or(default)
.trim()
.and_then(|item| item.text().find(|s| !s.trim().is_empty()))
.map(str::trim)
}
fn select_last_text<'a>(el: ElementRef<'a>, selector: &Selector, default: &'static str) -> &'a str {
fn select_last_text<'a>(el: ElementRef<'a>, selector: &Selector) -> Option<&'a str> {
el.select(selector)
.next()
.and_then(|item| item.text().last())
.unwrap_or(default)
.trim()
.map(str::trim)
}
const DATE_FORMAT: &[FormatItem<'static>] =
format_description!("[month padding:none]/[day padding:none]/[year]");
fn team_id_from_link(link: &str) -> Result<u32, ParseError> {
link.rsplit_once('=')
.and_then(|part| part.1.parse().ok())
.ok_or_else(|| ParseError::InvalidLink {
link: link.to_string(),
role: "team id",
})
}

View file

@ -1,11 +1,11 @@
use super::{ElementExt, Parser};
use crate::data::{Honors, Player, TeamMemberShip, TeamRef};
use crate::parser::{select_last_text, select_text};
use crate::parser::{select_last_text, select_text, team_id_from_link, DATE_FORMAT};
use crate::{ParseError, Result};
use scraper::{Html, Selector};
use std::iter::repeat;
use steamid_ng::SteamID;
use time::{macros::format_description, Date};
use time::Date;
const SELECTOR_PLAYER_NAME: &str = ".container .col-md-4 > h3 > b";
const SELECTOR_PLAYER_ID: &str = ".container .col-md-4 > p.nomargin";
@ -39,6 +39,12 @@ pub struct PlayerParser {
selector_team_since: Selector,
}
impl Default for PlayerParser {
fn default() -> Self {
Self::new()
}
}
impl PlayerParser {
pub fn new() -> Self {
PlayerParser {
@ -63,9 +69,7 @@ impl Parser for PlayerParser {
type Output = Player;
fn parse(&self, document: &str) -> Result<Self::Output> {
let document = Html::parse_document(&document);
let format = format_description!("[month padding:none]/[day padding:none]/[year]");
let document = Html::parse_document(document);
let name = document
.select(&self.selector_name)
.next()
@ -91,19 +95,37 @@ impl Parser for PlayerParser {
let honors = document
.select(&self.selector_honors_group)
.flat_map(|group| {
let format =
select_text(group, &self.selector_honors_header, "format not detected")
.trim_end_matches(" Medals");
let format = select_text(group, &self.selector_honors_header)
.ok_or(ParseError::ElementNotFound {
selector: SELECTOR_PLAYER_HONORS_HEADER,
role: "player honors format",
})
.map(|format| format.trim_end_matches(" Medals"));
let leagues = group.select(&self.selector_honors_league);
let teams = group.select(&self.selector_honors_team);
repeat(format).zip(leagues).zip(teams)
})
.map(|((format, season), team)| Honors {
format: format.to_string(),
season: season.text().next().unwrap_or_default().trim().to_string(),
team: team.text().next().unwrap_or_default().trim().to_string(),
.map(|((format_res, season), team)| {
let format = format_res?;
Ok(Honors {
format: format.to_string(),
season: season
.first_text()
.ok_or(ParseError::EmptyText {
selector: SELECTOR_PLAYER_HONORS_LEAGUE,
role: "player honors season",
})?
.to_string(),
team: team
.first_text()
.ok_or(ParseError::EmptyText {
selector: SELECTOR_PLAYER_HONORS_TEAM,
role: "player honors team",
})?
.to_string(),
})
})
.collect();
.collect::<Result<Vec<_>>>()?;
let teams = document
.select(&self.selector_team_group)
@ -112,31 +134,58 @@ impl Parser for PlayerParser {
let link = item
.select(&self.selector_team_link)
.next()
.and_then(|link| link.attr("href"))
.unwrap_or("=0");
let name = select_text(item, &self.selector_team_name, "failed to find name");
let league = select_text(item, &self.selector_team_league, "failed to find league");
let since = select_last_text(item, &self.selector_team_since, "");
.ok_or(ParseError::ElementNotFound {
selector: SELECTOR_PLAYER_TEAM_LINK,
role: "players team link",
})?
.attr("href")
.unwrap_or_default();
let name = select_text(item, &self.selector_team_name).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_PLAYER_TEAM_NAME,
role: "players team name",
},
)?;
let league = select_text(item, &self.selector_team_league).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_PLAYER_TEAM_LEAGUE,
role: "players team league",
},
)?;
let since = select_last_text(item, &self.selector_team_since).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_PLAYER_TEAM_SINCE,
role: "players team joined",
},
)?;
let id = match link.rsplit_once("=") {
Some((_, id)) => id.parse().unwrap_or_default(),
_ => 0,
};
let since = match since.rsplit_once("\n") {
Some((_, since)) => Date::parse(since, &format).unwrap_or(Date::MIN),
_ => Date::MIN,
let id = team_id_from_link(link)?;
let since = match since.rsplit_once('\n') {
Some((_, since)) => {
Date::parse(since, DATE_FORMAT).map_err(|_| ParseError::InvalidDate {
role: "team join date",
date: since.to_string(),
})?
}
_ => {
return Err(ParseError::InvalidDate {
role: "team join date",
date: since.to_string(),
}
.into())
}
};
TeamMemberShip {
Ok(TeamMemberShip {
team: TeamRef {
name: name.to_string(),
id,
},
league: league.to_string(),
since,
}
})
})
.collect();
.collect::<Result<Vec<_>>>()?;
Ok(Player {
name,

View file

@ -1,9 +1,9 @@
use super::{ElementExt, Parser};
use crate::data::{MembershipHistory, TeamRef};
use crate::parser::select_text;
use crate::Result;
use crate::parser::{select_text, team_id_from_link, DATE_FORMAT};
use crate::{ParseError, Result};
use scraper::{Html, Selector};
use time::{macros::format_description, Date};
use time::Date;
const SELECTOR_TEAM_FORMAT: &str = ".container .white-row-small thead h4";
const SELECTOR_TEAM_GROUP: &str = ".container .white-row-small tbody";
@ -37,13 +37,19 @@ impl PlayerDetailsParser {
}
}
impl Default for PlayerDetailsParser {
fn default() -> Self {
Self::new()
}
}
impl Parser for PlayerDetailsParser {
type Output = Vec<MembershipHistory>;
fn parse(&self, document: &str) -> Result<Self::Output> {
let document = Html::parse_document(&document);
let document = Html::parse_document(document);
Ok(document
document
.select(&self.selector_team_format)
.zip(document.select(&self.selector_team_group))
.flat_map(|(format, history)| {
@ -52,34 +58,57 @@ impl Parser for PlayerDetailsParser {
.map(move |row| (format, row))
})
.map(|(format, team)| {
let format = format.first_text();
let format = format.first_text().ok_or(ParseError::EmptyText {
selector: SELECTOR_TEAM_FORMAT,
role: "team format",
})?;
let link = team
.select(&self.selector_team_link)
.next()
.and_then(|link| link.attr("href"))
.ok_or(ParseError::ElementNotFound {
selector: SELECTOR_TEAM_LINK,
role: "team link",
})?
.attr("href")
.unwrap_or_default();
let name = select_text(team, &self.selector_team_link, "failed to find team name");
let division =
select_text(team, &self.selector_team_joined, "failed to find division");
let joined = select_text(team, &self.selector_team_joined, "");
let left = select_text(team, &self.selector_team_left, "");
let name = select_text(team, &self.selector_team_link).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_TEAM_LINK,
role: "team link",
},
)?;
let division = select_text(team, &self.selector_team_division).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_TEAM_DIVISION,
role: "team division",
},
)?;
let joined = select_text(team, &self.selector_team_joined).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_TEAM_JOINED,
role: "team join date",
},
)?;
let left = select_text(team, &self.selector_team_left).unwrap_or_default();
let id = match link.rsplit_once("=") {
Some((_, id)) => id.parse().unwrap_or_default(),
_ => 0,
};
let format = format_description!("[month padding:none]/[day padding:none]/[year]");
let id = team_id_from_link(link)?;
MembershipHistory {
joined: Date::parse(joined, format).unwrap_or(Date::MIN),
left: Date::parse(left, format).ok(),
Ok(MembershipHistory {
format: format.to_string(),
joined: Date::parse(joined, DATE_FORMAT).map_err(|_| {
ParseError::InvalidDate {
role: "team join date",
date: joined.to_string(),
}
})?,
left: Date::parse(left, DATE_FORMAT).ok(),
team: TeamRef {
name: name.to_string(),
id,
},
division: division.to_string(),
}
})
})
.collect())
.collect()
}
}