cleanups, tests, clippy

This commit is contained in:
Robin Appelman 2023-11-16 16:54:43 +01:00
commit a9a3751067
16 changed files with 5932 additions and 117 deletions

156
Cargo.lock generated
View file

@ -111,6 +111,18 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "console"
version = "0.15.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8"
dependencies = [
"encode_unicode",
"lazy_static",
"libc",
"windows-sys 0.45.0",
]
[[package]]
name = "core-foundation"
version = "0.9.3"
@ -157,6 +169,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3"
dependencies = [
"powerfmt",
"serde",
]
[[package]]
@ -191,6 +204,12 @@ version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591"
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "encoding_rs"
version = "0.8.33"
@ -222,7 +241,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c18ee0ed65a5f1f81cac6b1d213b69c35fa47d4252ad41f1486dbd8226fe36e"
dependencies = [
"libc",
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
@ -481,6 +500,20 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "insta"
version = "1.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d64600be34b2fcfc267740a243fa7744441bb4947a619ac4e5bb6507f35fbfc"
dependencies = [
"console",
"lazy_static",
"linked-hash-map",
"serde",
"similar",
"yaml-rust",
]
[[package]]
name = "ipnet"
version = "2.9.0"
@ -514,6 +547,12 @@ version = "0.2.150"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
[[package]]
name = "linked-hash-map"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
[[package]]
name = "linux-raw-sys"
version = "0.4.11"
@ -614,7 +653,7 @@ checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0"
dependencies = [
"libc",
"wasi",
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
@ -815,7 +854,7 @@ dependencies = [
"libc",
"redox_syscall",
"smallvec",
"windows-targets",
"windows-targets 0.48.5",
]
[[package]]
@ -1080,7 +1119,7 @@ dependencies = [
"errno",
"libc",
"linux-raw-sys",
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
@ -1095,7 +1134,7 @@ version = "0.1.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c3733bf4cf7ea0880754e19cb5a462007c4a8c1914bff372ccc95b464f1df88"
dependencies = [
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
@ -1214,6 +1253,12 @@ dependencies = [
"stable_deref_trait",
]
[[package]]
name = "similar"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2aeaf503862c419d66959f5d7ca015337d864e9c49485d771b732e2a20453597"
[[package]]
name = "siphasher"
version = "0.3.11"
@ -1252,7 +1297,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9"
dependencies = [
"libc",
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
@ -1355,7 +1400,7 @@ dependencies = [
"fastrand",
"redox_syscall",
"rustix",
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
@ -1446,7 +1491,7 @@ dependencies = [
"pin-project-lite",
"socket2 0.5.5",
"tokio-macros",
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
@ -1519,10 +1564,12 @@ checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed"
name = "ugc-scraper"
version = "0.1.0"
dependencies = [
"insta",
"main_error",
"miette",
"reqwest",
"scraper",
"serde",
"steamid-ng",
"thiserror",
"time",
@ -1698,13 +1745,37 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.45.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
dependencies = [
"windows-targets 0.42.2",
]
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets",
"windows-targets 0.48.5",
]
[[package]]
name = "windows-targets"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
dependencies = [
"windows_aarch64_gnullvm 0.42.2",
"windows_aarch64_msvc 0.42.2",
"windows_i686_gnu 0.42.2",
"windows_i686_msvc 0.42.2",
"windows_x86_64_gnu 0.42.2",
"windows_x86_64_gnullvm 0.42.2",
"windows_x86_64_msvc 0.42.2",
]
[[package]]
@ -1713,51 +1784,93 @@ version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
"windows_aarch64_gnullvm 0.48.5",
"windows_aarch64_msvc 0.48.5",
"windows_i686_gnu 0.48.5",
"windows_i686_msvc 0.48.5",
"windows_x86_64_gnu 0.48.5",
"windows_x86_64_gnullvm 0.48.5",
"windows_x86_64_msvc 0.48.5",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_i686_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_x86_64_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
@ -1771,7 +1884,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
dependencies = [
"cfg-if",
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
name = "yaml-rust"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
dependencies = [
"linked-hash-map",
]
[[package]]

View file

@ -2,14 +2,27 @@
name = "ugc-scraper"
version = "0.1.0"
edition = "2021"
rust-version = "1.67"
rust-version = "1.67.0"
[dependencies]
tokio = { version = "1.34.0", features = ["macros", "rt-multi-thread", "rt"] }
tokio = "1.34.0"
reqwest = "0.11.22"
scraper = "0.18.1"
miette = "5.10.0"
thiserror = "1.0.50"
main_error = "0.1.2"
time = { version = "0.3.30", features = ["parsing", "macros"] }
steamid-ng = "1.0.0"
serde = { version = "1.0.192", features = ["derive"], optional = true }
[dev-dependencies]
tokio = { version = "1.34.0", features = ["macros", "rt-multi-thread", "rt"] }
main_error = "0.1.2"
insta = { version = "1.34.0", features = ["json"] }
[profile.dev.package]
insta.opt-level = 3
similar.opt-level = 3
[features]
serde = ["dep:serde", "time/serde"]
default = ["serde"]

18
examples/test.rs Normal file
View file

@ -0,0 +1,18 @@
use main_error::MainResult;
use std::env::args;
use steamid_ng::SteamID;
use ugc_scraper::UgcClient;
#[tokio::main]
async fn main() -> MainResult {
let client = UgcClient::new();
let id = args().nth(1).expect("no steam id provided");
let id = SteamID::try_from(id.as_str()).expect("invalid steam id provided");
let player = client.player(id).await?;
dbg!(player.teams);
let membership = client.player_team_history(id).await?;
dbg!(membership);
Ok(())
}

View file

@ -38,10 +38,17 @@
cargo = msrvToolchain;
};
src = sourceByRegex ./. ["Cargo.*" "(src|derive|benches|tests|examples|koth_bagel.*)(/.*)?"];
src = sourceByRegex ./. ["Cargo.*" "(src|derive|benches|tests|examples)(/.*)?"];
deps = with pkgs; [
pkg-config
openssl
];
nearskOpt = {
pname = "vbsp";
root = src;
nativeBuildInputs = deps;
};
in rec {
packages = {
@ -67,16 +74,13 @@
devShells = let
tools = with pkgs; [
bacon
cargo-insta
cargo-edit
cargo-outdated
cargo-audit
cargo-msrv
cargo-semver-checks
];
deps = with pkgs; [
pkg-config
openssl
];
in {
default = mkShell {
nativeBuildInputs = [toolchain] ++ tools ++ deps;

View file

@ -2,6 +2,7 @@ use steamid_ng::SteamID;
use time::Date;
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct Player {
pub name: String,
pub steam_id: SteamID,
@ -10,6 +11,7 @@ pub struct Player {
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct Honors {
pub format: String,
pub season: String,
@ -17,6 +19,7 @@ pub struct Honors {
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct TeamMemberShip {
pub team: TeamRef,
pub league: String,
@ -24,13 +27,16 @@ pub struct TeamMemberShip {
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct TeamRef {
pub name: String,
pub id: u32,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct MembershipHistory {
pub format: String,
pub team: TeamRef,
pub division: String,
pub joined: Date,

View file

@ -1,5 +1,5 @@
use thiserror::Error;
use miette::Diagnostic;
use thiserror::Error;
#[derive(Debug, Error, Diagnostic)]
pub enum ScrapeError {
@ -7,14 +7,23 @@ pub enum ScrapeError {
Request(#[from] reqwest::Error),
#[error(transparent)]
#[diagnostic(transparent)]
Parse(#[from] ParseError)
Parse(#[from] ParseError),
}
#[derive(Debug, Error, Diagnostic)]
#[derive(Debug, Error, Diagnostic, Clone)]
pub enum ParseError {
#[error("Couldn't find expected element '{selector}' for {role}")]
ElementNotFound {
selector: &'static str,
role: &'static str
}
role: &'static str,
},
#[error("Element '{selector}' does contain text for {role}")]
EmptyText {
selector: &'static str,
role: &'static str,
},
#[error("Invalid link for {role}: {link}")]
InvalidLink { link: String, role: &'static str },
#[error("Invalid date for {role}: {date}")]
InvalidDate { date: String, role: &'static str },
}

57
src/lib.rs Normal file
View file

@ -0,0 +1,57 @@
pub mod data;
mod error;
#[doc(hidden)]
pub mod parser;
use crate::data::{MembershipHistory, Player};
use crate::parser::{Parser, PlayerDetailsParser, PlayerParser};
pub use error::*;
use reqwest::Client;
use steamid_ng::SteamID;
pub type Result<T, E = ScrapeError> = std::result::Result<T, E>;
#[derive(Default)]
pub struct UgcClient {
client: Client,
player_parser: PlayerParser,
player_detail_parser: PlayerDetailsParser,
}
impl UgcClient {
pub fn new() -> Self {
UgcClient {
client: Client::default(),
player_parser: PlayerParser::new(),
player_detail_parser: PlayerDetailsParser::new(),
}
}
pub async fn player(&self, steam_id: SteamID) -> Result<Player> {
let body = self
.client
.get(&format!(
"https://www.ugcleague.com/players_page.cfm?player_id={}",
u64::from(steam_id)
))
.send()
.await?
.text()
.await?;
self.player_parser.parse(&body)
}
pub async fn player_team_history(&self, steam_id: SteamID) -> Result<Vec<MembershipHistory>> {
let body = self
.client
.get(&format!(
"https://www.ugcleague.com/players_page_details.cfm?player_id={}",
u64::from(steam_id)
))
.send()
.await?
.text()
.await?;
self.player_detail_parser.parse(&body)
}
}

View file

@ -1,22 +0,0 @@
pub mod data;
mod error;
mod parser;
use crate::parser::{Parser, PlayerDetailsParser, PlayerParser};
pub use error::*;
use main_error::MainResult;
use reqwest::get;
pub type Result<T, E = ScrapeError> = std::result::Result<T, E>;
#[tokio::main]
async fn main() -> MainResult {
let body =
get("https://www.ugcleague.com/players_page_details.cfm?player_id=76561198024494988")
.await?
.text()
.await?;
let parser = PlayerDetailsParser::new();
dbg!(parser.parse(&body)?);
Ok(())
}

View file

@ -1,5 +1,7 @@
use crate::Result;
use crate::{ParseError, Result};
use scraper::{ElementRef, Selector};
use time::format_description::FormatItem;
use time::macros::format_description;
mod player;
mod player_details;
@ -19,29 +21,38 @@ trait ElementExt<'a> {
impl<'a> ElementExt<'a> for ElementRef<'a> {
fn first_text(&self) -> Option<&'a str> {
self.text().filter(|s| !s.trim().is_empty()).next()
self.text().map(str::trim).find(|s| !s.is_empty())
}
fn nth_text(&self, n: usize) -> Option<&'a str> {
self.text()
.filter(|s| !s.trim().is_empty())
.skip(n - 1)
.next()
.map(|s| s.trim())
.nth(n - 1)
.map(str::trim)
}
}
fn select_text<'a>(el: ElementRef<'a>, selector: &Selector, default: &'static str) -> &'a str {
fn select_text<'a>(el: ElementRef<'a>, selector: &Selector) -> Option<&'a str> {
el.select(selector)
.next()
.and_then(|item| item.text().filter(|s| !s.trim().is_empty()).next())
.unwrap_or(default)
.trim()
.and_then(|item| item.text().find(|s| !s.trim().is_empty()))
.map(str::trim)
}
fn select_last_text<'a>(el: ElementRef<'a>, selector: &Selector, default: &'static str) -> &'a str {
fn select_last_text<'a>(el: ElementRef<'a>, selector: &Selector) -> Option<&'a str> {
el.select(selector)
.next()
.and_then(|item| item.text().last())
.unwrap_or(default)
.trim()
.map(str::trim)
}
const DATE_FORMAT: &[FormatItem<'static>] =
format_description!("[month padding:none]/[day padding:none]/[year]");
fn team_id_from_link(link: &str) -> Result<u32, ParseError> {
link.rsplit_once('=')
.and_then(|part| part.1.parse().ok())
.ok_or_else(|| ParseError::InvalidLink {
link: link.to_string(),
role: "team id",
})
}

View file

@ -1,11 +1,11 @@
use super::{ElementExt, Parser};
use crate::data::{Honors, Player, TeamMemberShip, TeamRef};
use crate::parser::{select_last_text, select_text};
use crate::parser::{select_last_text, select_text, team_id_from_link, DATE_FORMAT};
use crate::{ParseError, Result};
use scraper::{Html, Selector};
use std::iter::repeat;
use steamid_ng::SteamID;
use time::{macros::format_description, Date};
use time::Date;
const SELECTOR_PLAYER_NAME: &str = ".container .col-md-4 > h3 > b";
const SELECTOR_PLAYER_ID: &str = ".container .col-md-4 > p.nomargin";
@ -39,6 +39,12 @@ pub struct PlayerParser {
selector_team_since: Selector,
}
impl Default for PlayerParser {
fn default() -> Self {
Self::new()
}
}
impl PlayerParser {
pub fn new() -> Self {
PlayerParser {
@ -63,9 +69,7 @@ impl Parser for PlayerParser {
type Output = Player;
fn parse(&self, document: &str) -> Result<Self::Output> {
let document = Html::parse_document(&document);
let format = format_description!("[month padding:none]/[day padding:none]/[year]");
let document = Html::parse_document(document);
let name = document
.select(&self.selector_name)
.next()
@ -91,19 +95,37 @@ impl Parser for PlayerParser {
let honors = document
.select(&self.selector_honors_group)
.flat_map(|group| {
let format =
select_text(group, &self.selector_honors_header, "format not detected")
.trim_end_matches(" Medals");
let format = select_text(group, &self.selector_honors_header)
.ok_or(ParseError::ElementNotFound {
selector: SELECTOR_PLAYER_HONORS_HEADER,
role: "player honors format",
})
.map(|format| format.trim_end_matches(" Medals"));
let leagues = group.select(&self.selector_honors_league);
let teams = group.select(&self.selector_honors_team);
repeat(format).zip(leagues).zip(teams)
})
.map(|((format, season), team)| Honors {
.map(|((format_res, season), team)| {
let format = format_res?;
Ok(Honors {
format: format.to_string(),
season: season.text().next().unwrap_or_default().trim().to_string(),
team: team.text().next().unwrap_or_default().trim().to_string(),
season: season
.first_text()
.ok_or(ParseError::EmptyText {
selector: SELECTOR_PLAYER_HONORS_LEAGUE,
role: "player honors season",
})?
.to_string(),
team: team
.first_text()
.ok_or(ParseError::EmptyText {
selector: SELECTOR_PLAYER_HONORS_TEAM,
role: "player honors team",
})?
.to_string(),
})
.collect();
})
.collect::<Result<Vec<_>>>()?;
let teams = document
.select(&self.selector_team_group)
@ -112,31 +134,58 @@ impl Parser for PlayerParser {
let link = item
.select(&self.selector_team_link)
.next()
.and_then(|link| link.attr("href"))
.unwrap_or("=0");
let name = select_text(item, &self.selector_team_name, "failed to find name");
let league = select_text(item, &self.selector_team_league, "failed to find league");
let since = select_last_text(item, &self.selector_team_since, "");
.ok_or(ParseError::ElementNotFound {
selector: SELECTOR_PLAYER_TEAM_LINK,
role: "players team link",
})?
.attr("href")
.unwrap_or_default();
let name = select_text(item, &self.selector_team_name).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_PLAYER_TEAM_NAME,
role: "players team name",
},
)?;
let league = select_text(item, &self.selector_team_league).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_PLAYER_TEAM_LEAGUE,
role: "players team league",
},
)?;
let since = select_last_text(item, &self.selector_team_since).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_PLAYER_TEAM_SINCE,
role: "players team joined",
},
)?;
let id = match link.rsplit_once("=") {
Some((_, id)) => id.parse().unwrap_or_default(),
_ => 0,
};
let since = match since.rsplit_once("\n") {
Some((_, since)) => Date::parse(since, &format).unwrap_or(Date::MIN),
_ => Date::MIN,
let id = team_id_from_link(link)?;
let since = match since.rsplit_once('\n') {
Some((_, since)) => {
Date::parse(since, DATE_FORMAT).map_err(|_| ParseError::InvalidDate {
role: "team join date",
date: since.to_string(),
})?
}
_ => {
return Err(ParseError::InvalidDate {
role: "team join date",
date: since.to_string(),
}
.into())
}
};
TeamMemberShip {
Ok(TeamMemberShip {
team: TeamRef {
name: name.to_string(),
id,
},
league: league.to_string(),
since,
}
})
.collect();
})
.collect::<Result<Vec<_>>>()?;
Ok(Player {
name,

View file

@ -1,9 +1,9 @@
use super::{ElementExt, Parser};
use crate::data::{MembershipHistory, TeamRef};
use crate::parser::select_text;
use crate::Result;
use crate::parser::{select_text, team_id_from_link, DATE_FORMAT};
use crate::{ParseError, Result};
use scraper::{Html, Selector};
use time::{macros::format_description, Date};
use time::Date;
const SELECTOR_TEAM_FORMAT: &str = ".container .white-row-small thead h4";
const SELECTOR_TEAM_GROUP: &str = ".container .white-row-small tbody";
@ -37,13 +37,19 @@ impl PlayerDetailsParser {
}
}
impl Default for PlayerDetailsParser {
fn default() -> Self {
Self::new()
}
}
impl Parser for PlayerDetailsParser {
type Output = Vec<MembershipHistory>;
fn parse(&self, document: &str) -> Result<Self::Output> {
let document = Html::parse_document(&document);
let document = Html::parse_document(document);
Ok(document
document
.select(&self.selector_team_format)
.zip(document.select(&self.selector_team_group))
.flat_map(|(format, history)| {
@ -52,34 +58,57 @@ impl Parser for PlayerDetailsParser {
.map(move |row| (format, row))
})
.map(|(format, team)| {
let format = format.first_text();
let format = format.first_text().ok_or(ParseError::EmptyText {
selector: SELECTOR_TEAM_FORMAT,
role: "team format",
})?;
let link = team
.select(&self.selector_team_link)
.next()
.and_then(|link| link.attr("href"))
.ok_or(ParseError::ElementNotFound {
selector: SELECTOR_TEAM_LINK,
role: "team link",
})?
.attr("href")
.unwrap_or_default();
let name = select_text(team, &self.selector_team_link, "failed to find team name");
let division =
select_text(team, &self.selector_team_joined, "failed to find division");
let joined = select_text(team, &self.selector_team_joined, "");
let left = select_text(team, &self.selector_team_left, "");
let name = select_text(team, &self.selector_team_link).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_TEAM_LINK,
role: "team link",
},
)?;
let division = select_text(team, &self.selector_team_division).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_TEAM_DIVISION,
role: "team division",
},
)?;
let joined = select_text(team, &self.selector_team_joined).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_TEAM_JOINED,
role: "team join date",
},
)?;
let left = select_text(team, &self.selector_team_left).unwrap_or_default();
let id = match link.rsplit_once("=") {
Some((_, id)) => id.parse().unwrap_or_default(),
_ => 0,
};
let format = format_description!("[month padding:none]/[day padding:none]/[year]");
let id = team_id_from_link(link)?;
MembershipHistory {
joined: Date::parse(joined, format).unwrap_or(Date::MIN),
left: Date::parse(left, format).ok(),
Ok(MembershipHistory {
format: format.to_string(),
joined: Date::parse(joined, DATE_FORMAT).map_err(|_| {
ParseError::InvalidDate {
role: "team join date",
date: joined.to_string(),
}
})?,
left: Date::parse(left, DATE_FORMAT).ok(),
team: TeamRef {
name: name.to_string(),
id,
},
division: division.to_string(),
}
})
.collect())
})
.collect()
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

19
tests/player.rs Normal file
View file

@ -0,0 +1,19 @@
use insta::assert_json_snapshot;
use std::fs::read_to_string;
use ugc_scraper::parser::{Parser, PlayerDetailsParser, PlayerParser};
#[test]
fn test_parse_player_html() {
let body = read_to_string("tests/data/player_76561198024494988.html").unwrap();
let parser = PlayerParser::new();
let parsed = parser.parse(&body).unwrap();
assert_json_snapshot!(parsed);
}
#[test]
fn test_parse_player_details_html() {
let body = read_to_string("tests/data/player_details_76561198024494988.html").unwrap();
let parser = PlayerDetailsParser::new();
let parsed = parser.parse(&body).unwrap();
assert_json_snapshot!(parsed);
}

View file

@ -0,0 +1,128 @@
---
source: tests/player.rs
expression: parsed
---
[
{
"format": "TF2 Highlander",
"team": {
"name": "Xenon",
"id": 7861
},
"division": "Euro Platinum",
"joined": [
2013,
221
],
"left": null
},
{
"format": "TF2 6vs6",
"team": {
"name": "UGC 6s",
"id": 6929
},
"division": "Europe",
"joined": [
2017,
52
],
"left": null
},
{
"format": "TF2 6vs6",
"team": {
"name": "sExy eSports",
"id": 17736
},
"division": "Euro Steel",
"joined": [
2015,
253
],
"left": [
2017,
22
]
},
{
"format": "TF2 6vs6",
"team": {
"name": "BigHorseDong",
"id": 16277
},
"division": "Euro Steel",
"joined": [
2015,
157
],
"left": [
2015,
253
]
},
{
"format": "TF2 6vs6",
"team": {
"name": "sExy eSports",
"id": 17736
},
"division": "Euro Steel",
"joined": [
2015,
153
],
"left": [
2015,
157
]
},
{
"format": "TF2 6vs6",
"team": {
"name": "BigHorseDong",
"id": 16277
},
"division": "Euro Steel",
"joined": [
2015,
32
],
"left": [
2015,
153
]
},
{
"format": "TF2 6vs6",
"team": {
"name": "Necronoms",
"id": 8622
},
"division": "Euro Steel",
"joined": [
2013,
250
],
"left": [
2014,
357
]
},
{
"format": "TF2 4vs4",
"team": {
"name": "sExy eSports",
"id": 17790
},
"division": "Silver Euro",
"joined": [
2015,
153
],
"left": [
2017,
227
]
}
]

View file

@ -0,0 +1,234 @@
---
source: tests/player.rs
expression: parsed
---
{
"name": "Icewind demostf",
"steam_id": 0,
"honors": [
{
"format": "TF2 Highlander",
"season": "Season 32 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 31 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 30 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 29 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 28 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 27 Premium EU",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 26 Premium EU",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 25 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 24 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 23 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 22 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 21 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 20 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 19 Euro Gold",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 18 Euro Silver",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 17 Euro Silver",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 16 Euro Silver",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 15 Euro Silver",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 14 Euro Silver",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 13 Euro Silver",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 12 Euro Silver",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 11 Euro Steel",
"team": "Xenon"
},
{
"format": "TF2 6vs6",
"season": "Season 35 Europe",
"team": "Controller Gamers"
},
{
"format": "TF2 6vs6",
"season": "Season 34 Europe",
"team": "Bye week"
},
{
"format": "TF2 6vs6",
"season": "Season 33 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 32 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 31 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 30 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 29 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 28 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 27 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 26 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 25 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 24 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 23 Europe",
"team": "Giel and the 9wiels"
},
{
"format": "TF2 6vs6",
"season": "Season 19 Euro Platinum",
"team": "sExy eSports"
},
{
"format": "TF2 6vs6",
"season": "Season 18 Euro Steel",
"team": "BigHorseDong"
},
{
"format": "TF2 6vs6",
"season": "Season 13 Euro Steel",
"team": "Necronoms"
},
{
"format": "TF2 4vs4",
"season": "Season 6 Silver Euro",
"team": "sExy eSports"
},
{
"format": "TF2 4vs4",
"season": "Season 5 Silver Euro",
"team": "sExy eSports"
}
],
"teams": [
{
"team": {
"name": "UGC 6s",
"id": 6929
},
"league": "TF2 6vs6 - Europe",
"since": [
2017,
52
]
},
{
"team": {
"name": "Xenon",
"id": 7861
},
"league": "TF2 Highlander - Euro Platinum",
"since": [
2013,
221
]
}
]
}