mirror of
https://codeberg.org/icewind/ugc-scaper.git
synced 2026-06-03 10:14:11 +02:00
cleanups, tests, clippy
This commit is contained in:
parent
d937dbbb5e
commit
a9a3751067
16 changed files with 5932 additions and 117 deletions
|
|
@ -2,6 +2,7 @@ use steamid_ng::SteamID;
|
|||
use time::Date;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
pub struct Player {
|
||||
pub name: String,
|
||||
pub steam_id: SteamID,
|
||||
|
|
@ -10,6 +11,7 @@ pub struct Player {
|
|||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
pub struct Honors {
|
||||
pub format: String,
|
||||
pub season: String,
|
||||
|
|
@ -17,6 +19,7 @@ pub struct Honors {
|
|||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
pub struct TeamMemberShip {
|
||||
pub team: TeamRef,
|
||||
pub league: String,
|
||||
|
|
@ -24,13 +27,16 @@ pub struct TeamMemberShip {
|
|||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
pub struct TeamRef {
|
||||
pub name: String,
|
||||
pub id: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
pub struct MembershipHistory {
|
||||
pub format: String,
|
||||
pub team: TeamRef,
|
||||
pub division: String,
|
||||
pub joined: Date,
|
||||
|
|
|
|||
21
src/error.rs
21
src/error.rs
|
|
@ -1,5 +1,5 @@
|
|||
use thiserror::Error;
|
||||
use miette::Diagnostic;
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error, Diagnostic)]
|
||||
pub enum ScrapeError {
|
||||
|
|
@ -7,14 +7,23 @@ pub enum ScrapeError {
|
|||
Request(#[from] reqwest::Error),
|
||||
#[error(transparent)]
|
||||
#[diagnostic(transparent)]
|
||||
Parse(#[from] ParseError)
|
||||
Parse(#[from] ParseError),
|
||||
}
|
||||
|
||||
#[derive(Debug, Error, Diagnostic)]
|
||||
#[derive(Debug, Error, Diagnostic, Clone)]
|
||||
pub enum ParseError {
|
||||
#[error("Couldn't find expected element '{selector}' for {role}")]
|
||||
ElementNotFound {
|
||||
selector: &'static str,
|
||||
role: &'static str
|
||||
}
|
||||
}
|
||||
role: &'static str,
|
||||
},
|
||||
#[error("Element '{selector}' does contain text for {role}")]
|
||||
EmptyText {
|
||||
selector: &'static str,
|
||||
role: &'static str,
|
||||
},
|
||||
#[error("Invalid link for {role}: {link}")]
|
||||
InvalidLink { link: String, role: &'static str },
|
||||
#[error("Invalid date for {role}: {date}")]
|
||||
InvalidDate { date: String, role: &'static str },
|
||||
}
|
||||
|
|
|
|||
57
src/lib.rs
Normal file
57
src/lib.rs
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
pub mod data;
|
||||
mod error;
|
||||
#[doc(hidden)]
|
||||
pub mod parser;
|
||||
|
||||
use crate::data::{MembershipHistory, Player};
|
||||
use crate::parser::{Parser, PlayerDetailsParser, PlayerParser};
|
||||
pub use error::*;
|
||||
use reqwest::Client;
|
||||
use steamid_ng::SteamID;
|
||||
|
||||
pub type Result<T, E = ScrapeError> = std::result::Result<T, E>;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct UgcClient {
|
||||
client: Client,
|
||||
player_parser: PlayerParser,
|
||||
player_detail_parser: PlayerDetailsParser,
|
||||
}
|
||||
|
||||
impl UgcClient {
|
||||
pub fn new() -> Self {
|
||||
UgcClient {
|
||||
client: Client::default(),
|
||||
player_parser: PlayerParser::new(),
|
||||
player_detail_parser: PlayerDetailsParser::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn player(&self, steam_id: SteamID) -> Result<Player> {
|
||||
let body = self
|
||||
.client
|
||||
.get(&format!(
|
||||
"https://www.ugcleague.com/players_page.cfm?player_id={}",
|
||||
u64::from(steam_id)
|
||||
))
|
||||
.send()
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
self.player_parser.parse(&body)
|
||||
}
|
||||
|
||||
pub async fn player_team_history(&self, steam_id: SteamID) -> Result<Vec<MembershipHistory>> {
|
||||
let body = self
|
||||
.client
|
||||
.get(&format!(
|
||||
"https://www.ugcleague.com/players_page_details.cfm?player_id={}",
|
||||
u64::from(steam_id)
|
||||
))
|
||||
.send()
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
self.player_detail_parser.parse(&body)
|
||||
}
|
||||
}
|
||||
22
src/main.rs
22
src/main.rs
|
|
@ -1,22 +0,0 @@
|
|||
pub mod data;
|
||||
mod error;
|
||||
mod parser;
|
||||
|
||||
use crate::parser::{Parser, PlayerDetailsParser, PlayerParser};
|
||||
pub use error::*;
|
||||
use main_error::MainResult;
|
||||
use reqwest::get;
|
||||
|
||||
pub type Result<T, E = ScrapeError> = std::result::Result<T, E>;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> MainResult {
|
||||
let body =
|
||||
get("https://www.ugcleague.com/players_page_details.cfm?player_id=76561198024494988")
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
let parser = PlayerDetailsParser::new();
|
||||
dbg!(parser.parse(&body)?);
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
use crate::Result;
|
||||
use crate::{ParseError, Result};
|
||||
use scraper::{ElementRef, Selector};
|
||||
use time::format_description::FormatItem;
|
||||
use time::macros::format_description;
|
||||
|
||||
mod player;
|
||||
mod player_details;
|
||||
|
|
@ -19,29 +21,38 @@ trait ElementExt<'a> {
|
|||
|
||||
impl<'a> ElementExt<'a> for ElementRef<'a> {
|
||||
fn first_text(&self) -> Option<&'a str> {
|
||||
self.text().filter(|s| !s.trim().is_empty()).next()
|
||||
self.text().map(str::trim).find(|s| !s.is_empty())
|
||||
}
|
||||
fn nth_text(&self, n: usize) -> Option<&'a str> {
|
||||
self.text()
|
||||
.filter(|s| !s.trim().is_empty())
|
||||
.skip(n - 1)
|
||||
.next()
|
||||
.map(|s| s.trim())
|
||||
.nth(n - 1)
|
||||
.map(str::trim)
|
||||
}
|
||||
}
|
||||
|
||||
fn select_text<'a>(el: ElementRef<'a>, selector: &Selector, default: &'static str) -> &'a str {
|
||||
fn select_text<'a>(el: ElementRef<'a>, selector: &Selector) -> Option<&'a str> {
|
||||
el.select(selector)
|
||||
.next()
|
||||
.and_then(|item| item.text().filter(|s| !s.trim().is_empty()).next())
|
||||
.unwrap_or(default)
|
||||
.trim()
|
||||
.and_then(|item| item.text().find(|s| !s.trim().is_empty()))
|
||||
.map(str::trim)
|
||||
}
|
||||
|
||||
fn select_last_text<'a>(el: ElementRef<'a>, selector: &Selector, default: &'static str) -> &'a str {
|
||||
fn select_last_text<'a>(el: ElementRef<'a>, selector: &Selector) -> Option<&'a str> {
|
||||
el.select(selector)
|
||||
.next()
|
||||
.and_then(|item| item.text().last())
|
||||
.unwrap_or(default)
|
||||
.trim()
|
||||
.map(str::trim)
|
||||
}
|
||||
|
||||
const DATE_FORMAT: &[FormatItem<'static>] =
|
||||
format_description!("[month padding:none]/[day padding:none]/[year]");
|
||||
|
||||
fn team_id_from_link(link: &str) -> Result<u32, ParseError> {
|
||||
link.rsplit_once('=')
|
||||
.and_then(|part| part.1.parse().ok())
|
||||
.ok_or_else(|| ParseError::InvalidLink {
|
||||
link: link.to_string(),
|
||||
role: "team id",
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
use super::{ElementExt, Parser};
|
||||
use crate::data::{Honors, Player, TeamMemberShip, TeamRef};
|
||||
use crate::parser::{select_last_text, select_text};
|
||||
use crate::parser::{select_last_text, select_text, team_id_from_link, DATE_FORMAT};
|
||||
use crate::{ParseError, Result};
|
||||
use scraper::{Html, Selector};
|
||||
use std::iter::repeat;
|
||||
use steamid_ng::SteamID;
|
||||
use time::{macros::format_description, Date};
|
||||
use time::Date;
|
||||
|
||||
const SELECTOR_PLAYER_NAME: &str = ".container .col-md-4 > h3 > b";
|
||||
const SELECTOR_PLAYER_ID: &str = ".container .col-md-4 > p.nomargin";
|
||||
|
|
@ -39,6 +39,12 @@ pub struct PlayerParser {
|
|||
selector_team_since: Selector,
|
||||
}
|
||||
|
||||
impl Default for PlayerParser {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl PlayerParser {
|
||||
pub fn new() -> Self {
|
||||
PlayerParser {
|
||||
|
|
@ -63,9 +69,7 @@ impl Parser for PlayerParser {
|
|||
type Output = Player;
|
||||
|
||||
fn parse(&self, document: &str) -> Result<Self::Output> {
|
||||
let document = Html::parse_document(&document);
|
||||
let format = format_description!("[month padding:none]/[day padding:none]/[year]");
|
||||
|
||||
let document = Html::parse_document(document);
|
||||
let name = document
|
||||
.select(&self.selector_name)
|
||||
.next()
|
||||
|
|
@ -91,19 +95,37 @@ impl Parser for PlayerParser {
|
|||
let honors = document
|
||||
.select(&self.selector_honors_group)
|
||||
.flat_map(|group| {
|
||||
let format =
|
||||
select_text(group, &self.selector_honors_header, "format not detected")
|
||||
.trim_end_matches(" Medals");
|
||||
let format = select_text(group, &self.selector_honors_header)
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_PLAYER_HONORS_HEADER,
|
||||
role: "player honors format",
|
||||
})
|
||||
.map(|format| format.trim_end_matches(" Medals"));
|
||||
let leagues = group.select(&self.selector_honors_league);
|
||||
let teams = group.select(&self.selector_honors_team);
|
||||
repeat(format).zip(leagues).zip(teams)
|
||||
})
|
||||
.map(|((format, season), team)| Honors {
|
||||
format: format.to_string(),
|
||||
season: season.text().next().unwrap_or_default().trim().to_string(),
|
||||
team: team.text().next().unwrap_or_default().trim().to_string(),
|
||||
.map(|((format_res, season), team)| {
|
||||
let format = format_res?;
|
||||
Ok(Honors {
|
||||
format: format.to_string(),
|
||||
season: season
|
||||
.first_text()
|
||||
.ok_or(ParseError::EmptyText {
|
||||
selector: SELECTOR_PLAYER_HONORS_LEAGUE,
|
||||
role: "player honors season",
|
||||
})?
|
||||
.to_string(),
|
||||
team: team
|
||||
.first_text()
|
||||
.ok_or(ParseError::EmptyText {
|
||||
selector: SELECTOR_PLAYER_HONORS_TEAM,
|
||||
role: "player honors team",
|
||||
})?
|
||||
.to_string(),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
let teams = document
|
||||
.select(&self.selector_team_group)
|
||||
|
|
@ -112,31 +134,58 @@ impl Parser for PlayerParser {
|
|||
let link = item
|
||||
.select(&self.selector_team_link)
|
||||
.next()
|
||||
.and_then(|link| link.attr("href"))
|
||||
.unwrap_or("=0");
|
||||
let name = select_text(item, &self.selector_team_name, "failed to find name");
|
||||
let league = select_text(item, &self.selector_team_league, "failed to find league");
|
||||
let since = select_last_text(item, &self.selector_team_since, "");
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_PLAYER_TEAM_LINK,
|
||||
role: "players team link",
|
||||
})?
|
||||
.attr("href")
|
||||
.unwrap_or_default();
|
||||
let name = select_text(item, &self.selector_team_name).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_PLAYER_TEAM_NAME,
|
||||
role: "players team name",
|
||||
},
|
||||
)?;
|
||||
let league = select_text(item, &self.selector_team_league).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_PLAYER_TEAM_LEAGUE,
|
||||
role: "players team league",
|
||||
},
|
||||
)?;
|
||||
let since = select_last_text(item, &self.selector_team_since).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_PLAYER_TEAM_SINCE,
|
||||
role: "players team joined",
|
||||
},
|
||||
)?;
|
||||
|
||||
let id = match link.rsplit_once("=") {
|
||||
Some((_, id)) => id.parse().unwrap_or_default(),
|
||||
_ => 0,
|
||||
};
|
||||
let since = match since.rsplit_once("\n") {
|
||||
Some((_, since)) => Date::parse(since, &format).unwrap_or(Date::MIN),
|
||||
_ => Date::MIN,
|
||||
let id = team_id_from_link(link)?;
|
||||
let since = match since.rsplit_once('\n') {
|
||||
Some((_, since)) => {
|
||||
Date::parse(since, DATE_FORMAT).map_err(|_| ParseError::InvalidDate {
|
||||
role: "team join date",
|
||||
date: since.to_string(),
|
||||
})?
|
||||
}
|
||||
_ => {
|
||||
return Err(ParseError::InvalidDate {
|
||||
role: "team join date",
|
||||
date: since.to_string(),
|
||||
}
|
||||
.into())
|
||||
}
|
||||
};
|
||||
|
||||
TeamMemberShip {
|
||||
Ok(TeamMemberShip {
|
||||
team: TeamRef {
|
||||
name: name.to_string(),
|
||||
id,
|
||||
},
|
||||
league: league.to_string(),
|
||||
since,
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
Ok(Player {
|
||||
name,
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
use super::{ElementExt, Parser};
|
||||
use crate::data::{MembershipHistory, TeamRef};
|
||||
use crate::parser::select_text;
|
||||
use crate::Result;
|
||||
use crate::parser::{select_text, team_id_from_link, DATE_FORMAT};
|
||||
use crate::{ParseError, Result};
|
||||
use scraper::{Html, Selector};
|
||||
use time::{macros::format_description, Date};
|
||||
use time::Date;
|
||||
|
||||
const SELECTOR_TEAM_FORMAT: &str = ".container .white-row-small thead h4";
|
||||
const SELECTOR_TEAM_GROUP: &str = ".container .white-row-small tbody";
|
||||
|
|
@ -37,13 +37,19 @@ impl PlayerDetailsParser {
|
|||
}
|
||||
}
|
||||
|
||||
impl Default for PlayerDetailsParser {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser for PlayerDetailsParser {
|
||||
type Output = Vec<MembershipHistory>;
|
||||
|
||||
fn parse(&self, document: &str) -> Result<Self::Output> {
|
||||
let document = Html::parse_document(&document);
|
||||
let document = Html::parse_document(document);
|
||||
|
||||
Ok(document
|
||||
document
|
||||
.select(&self.selector_team_format)
|
||||
.zip(document.select(&self.selector_team_group))
|
||||
.flat_map(|(format, history)| {
|
||||
|
|
@ -52,34 +58,57 @@ impl Parser for PlayerDetailsParser {
|
|||
.map(move |row| (format, row))
|
||||
})
|
||||
.map(|(format, team)| {
|
||||
let format = format.first_text();
|
||||
let format = format.first_text().ok_or(ParseError::EmptyText {
|
||||
selector: SELECTOR_TEAM_FORMAT,
|
||||
role: "team format",
|
||||
})?;
|
||||
let link = team
|
||||
.select(&self.selector_team_link)
|
||||
.next()
|
||||
.and_then(|link| link.attr("href"))
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_LINK,
|
||||
role: "team link",
|
||||
})?
|
||||
.attr("href")
|
||||
.unwrap_or_default();
|
||||
let name = select_text(team, &self.selector_team_link, "failed to find team name");
|
||||
let division =
|
||||
select_text(team, &self.selector_team_joined, "failed to find division");
|
||||
let joined = select_text(team, &self.selector_team_joined, "");
|
||||
let left = select_text(team, &self.selector_team_left, "");
|
||||
let name = select_text(team, &self.selector_team_link).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_LINK,
|
||||
role: "team link",
|
||||
},
|
||||
)?;
|
||||
let division = select_text(team, &self.selector_team_division).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_DIVISION,
|
||||
role: "team division",
|
||||
},
|
||||
)?;
|
||||
let joined = select_text(team, &self.selector_team_joined).ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TEAM_JOINED,
|
||||
role: "team join date",
|
||||
},
|
||||
)?;
|
||||
let left = select_text(team, &self.selector_team_left).unwrap_or_default();
|
||||
|
||||
let id = match link.rsplit_once("=") {
|
||||
Some((_, id)) => id.parse().unwrap_or_default(),
|
||||
_ => 0,
|
||||
};
|
||||
let format = format_description!("[month padding:none]/[day padding:none]/[year]");
|
||||
let id = team_id_from_link(link)?;
|
||||
|
||||
MembershipHistory {
|
||||
joined: Date::parse(joined, format).unwrap_or(Date::MIN),
|
||||
left: Date::parse(left, format).ok(),
|
||||
Ok(MembershipHistory {
|
||||
format: format.to_string(),
|
||||
joined: Date::parse(joined, DATE_FORMAT).map_err(|_| {
|
||||
ParseError::InvalidDate {
|
||||
role: "team join date",
|
||||
date: joined.to_string(),
|
||||
}
|
||||
})?,
|
||||
left: Date::parse(left, DATE_FORMAT).ok(),
|
||||
team: TeamRef {
|
||||
name: name.to_string(),
|
||||
id,
|
||||
},
|
||||
division: division.to_string(),
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect())
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue