cleanups, tests, clippy

This commit is contained in:
Robin Appelman 2023-11-16 16:54:43 +01:00
commit a9a3751067
16 changed files with 5932 additions and 117 deletions

156
Cargo.lock generated
View file

@ -111,6 +111,18 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "console"
version = "0.15.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8"
dependencies = [
"encode_unicode",
"lazy_static",
"libc",
"windows-sys 0.45.0",
]
[[package]] [[package]]
name = "core-foundation" name = "core-foundation"
version = "0.9.3" version = "0.9.3"
@ -157,6 +169,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3" checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3"
dependencies = [ dependencies = [
"powerfmt", "powerfmt",
"serde",
] ]
[[package]] [[package]]
@ -191,6 +204,12 @@ version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591" checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591"
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]] [[package]]
name = "encoding_rs" name = "encoding_rs"
version = "0.8.33" version = "0.8.33"
@ -222,7 +241,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c18ee0ed65a5f1f81cac6b1d213b69c35fa47d4252ad41f1486dbd8226fe36e" checksum = "7c18ee0ed65a5f1f81cac6b1d213b69c35fa47d4252ad41f1486dbd8226fe36e"
dependencies = [ dependencies = [
"libc", "libc",
"windows-sys", "windows-sys 0.48.0",
] ]
[[package]] [[package]]
@ -481,6 +500,20 @@ dependencies = [
"hashbrown", "hashbrown",
] ]
[[package]]
name = "insta"
version = "1.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d64600be34b2fcfc267740a243fa7744441bb4947a619ac4e5bb6507f35fbfc"
dependencies = [
"console",
"lazy_static",
"linked-hash-map",
"serde",
"similar",
"yaml-rust",
]
[[package]] [[package]]
name = "ipnet" name = "ipnet"
version = "2.9.0" version = "2.9.0"
@ -514,6 +547,12 @@ version = "0.2.150"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
[[package]]
name = "linked-hash-map"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
[[package]] [[package]]
name = "linux-raw-sys" name = "linux-raw-sys"
version = "0.4.11" version = "0.4.11"
@ -614,7 +653,7 @@ checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0"
dependencies = [ dependencies = [
"libc", "libc",
"wasi", "wasi",
"windows-sys", "windows-sys 0.48.0",
] ]
[[package]] [[package]]
@ -815,7 +854,7 @@ dependencies = [
"libc", "libc",
"redox_syscall", "redox_syscall",
"smallvec", "smallvec",
"windows-targets", "windows-targets 0.48.5",
] ]
[[package]] [[package]]
@ -1080,7 +1119,7 @@ dependencies = [
"errno", "errno",
"libc", "libc",
"linux-raw-sys", "linux-raw-sys",
"windows-sys", "windows-sys 0.48.0",
] ]
[[package]] [[package]]
@ -1095,7 +1134,7 @@ version = "0.1.22"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c3733bf4cf7ea0880754e19cb5a462007c4a8c1914bff372ccc95b464f1df88" checksum = "0c3733bf4cf7ea0880754e19cb5a462007c4a8c1914bff372ccc95b464f1df88"
dependencies = [ dependencies = [
"windows-sys", "windows-sys 0.48.0",
] ]
[[package]] [[package]]
@ -1214,6 +1253,12 @@ dependencies = [
"stable_deref_trait", "stable_deref_trait",
] ]
[[package]]
name = "similar"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2aeaf503862c419d66959f5d7ca015337d864e9c49485d771b732e2a20453597"
[[package]] [[package]]
name = "siphasher" name = "siphasher"
version = "0.3.11" version = "0.3.11"
@ -1252,7 +1297,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9"
dependencies = [ dependencies = [
"libc", "libc",
"windows-sys", "windows-sys 0.48.0",
] ]
[[package]] [[package]]
@ -1355,7 +1400,7 @@ dependencies = [
"fastrand", "fastrand",
"redox_syscall", "redox_syscall",
"rustix", "rustix",
"windows-sys", "windows-sys 0.48.0",
] ]
[[package]] [[package]]
@ -1446,7 +1491,7 @@ dependencies = [
"pin-project-lite", "pin-project-lite",
"socket2 0.5.5", "socket2 0.5.5",
"tokio-macros", "tokio-macros",
"windows-sys", "windows-sys 0.48.0",
] ]
[[package]] [[package]]
@ -1519,10 +1564,12 @@ checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed"
name = "ugc-scraper" name = "ugc-scraper"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"insta",
"main_error", "main_error",
"miette", "miette",
"reqwest", "reqwest",
"scraper", "scraper",
"serde",
"steamid-ng", "steamid-ng",
"thiserror", "thiserror",
"time", "time",
@ -1698,13 +1745,37 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.45.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
dependencies = [
"windows-targets 0.42.2",
]
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.48.0" version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [ dependencies = [
"windows-targets", "windows-targets 0.48.5",
]
[[package]]
name = "windows-targets"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
dependencies = [
"windows_aarch64_gnullvm 0.42.2",
"windows_aarch64_msvc 0.42.2",
"windows_i686_gnu 0.42.2",
"windows_i686_msvc 0.42.2",
"windows_x86_64_gnu 0.42.2",
"windows_x86_64_gnullvm 0.42.2",
"windows_x86_64_msvc 0.42.2",
] ]
[[package]] [[package]]
@ -1713,51 +1784,93 @@ version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [ dependencies = [
"windows_aarch64_gnullvm", "windows_aarch64_gnullvm 0.48.5",
"windows_aarch64_msvc", "windows_aarch64_msvc 0.48.5",
"windows_i686_gnu", "windows_i686_gnu 0.48.5",
"windows_i686_msvc", "windows_i686_msvc 0.48.5",
"windows_x86_64_gnu", "windows_x86_64_gnu 0.48.5",
"windows_x86_64_gnullvm", "windows_x86_64_gnullvm 0.48.5",
"windows_x86_64_msvc", "windows_x86_64_msvc 0.48.5",
] ]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
[[package]] [[package]]
name = "windows_aarch64_gnullvm" name = "windows_aarch64_gnullvm"
version = "0.48.5" version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
[[package]] [[package]]
name = "windows_aarch64_msvc" name = "windows_aarch64_msvc"
version = "0.48.5" version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_i686_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
[[package]] [[package]]
name = "windows_i686_gnu" name = "windows_i686_gnu"
version = "0.48.5" version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
[[package]] [[package]]
name = "windows_i686_msvc" name = "windows_i686_msvc"
version = "0.48.5" version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_x86_64_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
[[package]] [[package]]
name = "windows_x86_64_gnu" name = "windows_x86_64_gnu"
version = "0.48.5" version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
[[package]] [[package]]
name = "windows_x86_64_gnullvm" name = "windows_x86_64_gnullvm"
version = "0.48.5" version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
[[package]] [[package]]
name = "windows_x86_64_msvc" name = "windows_x86_64_msvc"
version = "0.48.5" version = "0.48.5"
@ -1771,7 +1884,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"windows-sys", "windows-sys 0.48.0",
]
[[package]]
name = "yaml-rust"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
dependencies = [
"linked-hash-map",
] ]
[[package]] [[package]]

View file

@ -2,14 +2,27 @@
name = "ugc-scraper" name = "ugc-scraper"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
rust-version = "1.67" rust-version = "1.67.0"
[dependencies] [dependencies]
tokio = { version = "1.34.0", features = ["macros", "rt-multi-thread", "rt"] } tokio = "1.34.0"
reqwest = "0.11.22" reqwest = "0.11.22"
scraper = "0.18.1" scraper = "0.18.1"
miette = "5.10.0" miette = "5.10.0"
thiserror = "1.0.50" thiserror = "1.0.50"
main_error = "0.1.2"
time = { version = "0.3.30", features = ["parsing", "macros"] } time = { version = "0.3.30", features = ["parsing", "macros"] }
steamid-ng = "1.0.0" steamid-ng = "1.0.0"
serde = { version = "1.0.192", features = ["derive"], optional = true }
[dev-dependencies]
tokio = { version = "1.34.0", features = ["macros", "rt-multi-thread", "rt"] }
main_error = "0.1.2"
insta = { version = "1.34.0", features = ["json"] }
[profile.dev.package]
insta.opt-level = 3
similar.opt-level = 3
[features]
serde = ["dep:serde", "time/serde"]
default = ["serde"]

18
examples/test.rs Normal file
View file

@ -0,0 +1,18 @@
use main_error::MainResult;
use std::env::args;
use steamid_ng::SteamID;
use ugc_scraper::UgcClient;
#[tokio::main]
async fn main() -> MainResult {
let client = UgcClient::new();
let id = args().nth(1).expect("no steam id provided");
let id = SteamID::try_from(id.as_str()).expect("invalid steam id provided");
let player = client.player(id).await?;
dbg!(player.teams);
let membership = client.player_team_history(id).await?;
dbg!(membership);
Ok(())
}

View file

@ -38,10 +38,17 @@
cargo = msrvToolchain; cargo = msrvToolchain;
}; };
src = sourceByRegex ./. ["Cargo.*" "(src|derive|benches|tests|examples|koth_bagel.*)(/.*)?"]; src = sourceByRegex ./. ["Cargo.*" "(src|derive|benches|tests|examples)(/.*)?"];
deps = with pkgs; [
pkg-config
openssl
];
nearskOpt = { nearskOpt = {
pname = "vbsp"; pname = "vbsp";
root = src; root = src;
nativeBuildInputs = deps;
}; };
in rec { in rec {
packages = { packages = {
@ -67,16 +74,13 @@
devShells = let devShells = let
tools = with pkgs; [ tools = with pkgs; [
bacon bacon
cargo-insta
cargo-edit cargo-edit
cargo-outdated cargo-outdated
cargo-audit cargo-audit
cargo-msrv cargo-msrv
cargo-semver-checks cargo-semver-checks
]; ];
deps = with pkgs; [
pkg-config
openssl
];
in { in {
default = mkShell { default = mkShell {
nativeBuildInputs = [toolchain] ++ tools ++ deps; nativeBuildInputs = [toolchain] ++ tools ++ deps;

View file

@ -2,6 +2,7 @@ use steamid_ng::SteamID;
use time::Date; use time::Date;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct Player { pub struct Player {
pub name: String, pub name: String,
pub steam_id: SteamID, pub steam_id: SteamID,
@ -10,6 +11,7 @@ pub struct Player {
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct Honors { pub struct Honors {
pub format: String, pub format: String,
pub season: String, pub season: String,
@ -17,6 +19,7 @@ pub struct Honors {
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct TeamMemberShip { pub struct TeamMemberShip {
pub team: TeamRef, pub team: TeamRef,
pub league: String, pub league: String,
@ -24,13 +27,16 @@ pub struct TeamMemberShip {
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct TeamRef { pub struct TeamRef {
pub name: String, pub name: String,
pub id: u32, pub id: u32,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct MembershipHistory { pub struct MembershipHistory {
pub format: String,
pub team: TeamRef, pub team: TeamRef,
pub division: String, pub division: String,
pub joined: Date, pub joined: Date,

View file

@ -1,5 +1,5 @@
use thiserror::Error;
use miette::Diagnostic; use miette::Diagnostic;
use thiserror::Error;
#[derive(Debug, Error, Diagnostic)] #[derive(Debug, Error, Diagnostic)]
pub enum ScrapeError { pub enum ScrapeError {
@ -7,14 +7,23 @@ pub enum ScrapeError {
Request(#[from] reqwest::Error), Request(#[from] reqwest::Error),
#[error(transparent)] #[error(transparent)]
#[diagnostic(transparent)] #[diagnostic(transparent)]
Parse(#[from] ParseError) Parse(#[from] ParseError),
} }
#[derive(Debug, Error, Diagnostic)] #[derive(Debug, Error, Diagnostic, Clone)]
pub enum ParseError { pub enum ParseError {
#[error("Couldn't find expected element '{selector}' for {role}")] #[error("Couldn't find expected element '{selector}' for {role}")]
ElementNotFound { ElementNotFound {
selector: &'static str, selector: &'static str,
role: &'static str role: &'static str,
} },
} #[error("Element '{selector}' does contain text for {role}")]
EmptyText {
selector: &'static str,
role: &'static str,
},
#[error("Invalid link for {role}: {link}")]
InvalidLink { link: String, role: &'static str },
#[error("Invalid date for {role}: {date}")]
InvalidDate { date: String, role: &'static str },
}

57
src/lib.rs Normal file
View file

@ -0,0 +1,57 @@
pub mod data;
mod error;
#[doc(hidden)]
pub mod parser;
use crate::data::{MembershipHistory, Player};
use crate::parser::{Parser, PlayerDetailsParser, PlayerParser};
pub use error::*;
use reqwest::Client;
use steamid_ng::SteamID;
pub type Result<T, E = ScrapeError> = std::result::Result<T, E>;
#[derive(Default)]
pub struct UgcClient {
client: Client,
player_parser: PlayerParser,
player_detail_parser: PlayerDetailsParser,
}
impl UgcClient {
pub fn new() -> Self {
UgcClient {
client: Client::default(),
player_parser: PlayerParser::new(),
player_detail_parser: PlayerDetailsParser::new(),
}
}
pub async fn player(&self, steam_id: SteamID) -> Result<Player> {
let body = self
.client
.get(&format!(
"https://www.ugcleague.com/players_page.cfm?player_id={}",
u64::from(steam_id)
))
.send()
.await?
.text()
.await?;
self.player_parser.parse(&body)
}
pub async fn player_team_history(&self, steam_id: SteamID) -> Result<Vec<MembershipHistory>> {
let body = self
.client
.get(&format!(
"https://www.ugcleague.com/players_page_details.cfm?player_id={}",
u64::from(steam_id)
))
.send()
.await?
.text()
.await?;
self.player_detail_parser.parse(&body)
}
}

View file

@ -1,22 +0,0 @@
pub mod data;
mod error;
mod parser;
use crate::parser::{Parser, PlayerDetailsParser, PlayerParser};
pub use error::*;
use main_error::MainResult;
use reqwest::get;
pub type Result<T, E = ScrapeError> = std::result::Result<T, E>;
#[tokio::main]
async fn main() -> MainResult {
let body =
get("https://www.ugcleague.com/players_page_details.cfm?player_id=76561198024494988")
.await?
.text()
.await?;
let parser = PlayerDetailsParser::new();
dbg!(parser.parse(&body)?);
Ok(())
}

View file

@ -1,5 +1,7 @@
use crate::Result; use crate::{ParseError, Result};
use scraper::{ElementRef, Selector}; use scraper::{ElementRef, Selector};
use time::format_description::FormatItem;
use time::macros::format_description;
mod player; mod player;
mod player_details; mod player_details;
@ -19,29 +21,38 @@ trait ElementExt<'a> {
impl<'a> ElementExt<'a> for ElementRef<'a> { impl<'a> ElementExt<'a> for ElementRef<'a> {
fn first_text(&self) -> Option<&'a str> { fn first_text(&self) -> Option<&'a str> {
self.text().filter(|s| !s.trim().is_empty()).next() self.text().map(str::trim).find(|s| !s.is_empty())
} }
fn nth_text(&self, n: usize) -> Option<&'a str> { fn nth_text(&self, n: usize) -> Option<&'a str> {
self.text() self.text()
.filter(|s| !s.trim().is_empty()) .filter(|s| !s.trim().is_empty())
.skip(n - 1) .nth(n - 1)
.next() .map(str::trim)
.map(|s| s.trim())
} }
} }
fn select_text<'a>(el: ElementRef<'a>, selector: &Selector, default: &'static str) -> &'a str { fn select_text<'a>(el: ElementRef<'a>, selector: &Selector) -> Option<&'a str> {
el.select(selector) el.select(selector)
.next() .next()
.and_then(|item| item.text().filter(|s| !s.trim().is_empty()).next()) .and_then(|item| item.text().find(|s| !s.trim().is_empty()))
.unwrap_or(default) .map(str::trim)
.trim()
} }
fn select_last_text<'a>(el: ElementRef<'a>, selector: &Selector, default: &'static str) -> &'a str { fn select_last_text<'a>(el: ElementRef<'a>, selector: &Selector) -> Option<&'a str> {
el.select(selector) el.select(selector)
.next() .next()
.and_then(|item| item.text().last()) .and_then(|item| item.text().last())
.unwrap_or(default) .map(str::trim)
.trim() }
const DATE_FORMAT: &[FormatItem<'static>] =
format_description!("[month padding:none]/[day padding:none]/[year]");
fn team_id_from_link(link: &str) -> Result<u32, ParseError> {
link.rsplit_once('=')
.and_then(|part| part.1.parse().ok())
.ok_or_else(|| ParseError::InvalidLink {
link: link.to_string(),
role: "team id",
})
} }

View file

@ -1,11 +1,11 @@
use super::{ElementExt, Parser}; use super::{ElementExt, Parser};
use crate::data::{Honors, Player, TeamMemberShip, TeamRef}; use crate::data::{Honors, Player, TeamMemberShip, TeamRef};
use crate::parser::{select_last_text, select_text}; use crate::parser::{select_last_text, select_text, team_id_from_link, DATE_FORMAT};
use crate::{ParseError, Result}; use crate::{ParseError, Result};
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use std::iter::repeat; use std::iter::repeat;
use steamid_ng::SteamID; use steamid_ng::SteamID;
use time::{macros::format_description, Date}; use time::Date;
const SELECTOR_PLAYER_NAME: &str = ".container .col-md-4 > h3 > b"; const SELECTOR_PLAYER_NAME: &str = ".container .col-md-4 > h3 > b";
const SELECTOR_PLAYER_ID: &str = ".container .col-md-4 > p.nomargin"; const SELECTOR_PLAYER_ID: &str = ".container .col-md-4 > p.nomargin";
@ -39,6 +39,12 @@ pub struct PlayerParser {
selector_team_since: Selector, selector_team_since: Selector,
} }
impl Default for PlayerParser {
fn default() -> Self {
Self::new()
}
}
impl PlayerParser { impl PlayerParser {
pub fn new() -> Self { pub fn new() -> Self {
PlayerParser { PlayerParser {
@ -63,9 +69,7 @@ impl Parser for PlayerParser {
type Output = Player; type Output = Player;
fn parse(&self, document: &str) -> Result<Self::Output> { fn parse(&self, document: &str) -> Result<Self::Output> {
let document = Html::parse_document(&document); let document = Html::parse_document(document);
let format = format_description!("[month padding:none]/[day padding:none]/[year]");
let name = document let name = document
.select(&self.selector_name) .select(&self.selector_name)
.next() .next()
@ -91,19 +95,37 @@ impl Parser for PlayerParser {
let honors = document let honors = document
.select(&self.selector_honors_group) .select(&self.selector_honors_group)
.flat_map(|group| { .flat_map(|group| {
let format = let format = select_text(group, &self.selector_honors_header)
select_text(group, &self.selector_honors_header, "format not detected") .ok_or(ParseError::ElementNotFound {
.trim_end_matches(" Medals"); selector: SELECTOR_PLAYER_HONORS_HEADER,
role: "player honors format",
})
.map(|format| format.trim_end_matches(" Medals"));
let leagues = group.select(&self.selector_honors_league); let leagues = group.select(&self.selector_honors_league);
let teams = group.select(&self.selector_honors_team); let teams = group.select(&self.selector_honors_team);
repeat(format).zip(leagues).zip(teams) repeat(format).zip(leagues).zip(teams)
}) })
.map(|((format, season), team)| Honors { .map(|((format_res, season), team)| {
format: format.to_string(), let format = format_res?;
season: season.text().next().unwrap_or_default().trim().to_string(), Ok(Honors {
team: team.text().next().unwrap_or_default().trim().to_string(), format: format.to_string(),
season: season
.first_text()
.ok_or(ParseError::EmptyText {
selector: SELECTOR_PLAYER_HONORS_LEAGUE,
role: "player honors season",
})?
.to_string(),
team: team
.first_text()
.ok_or(ParseError::EmptyText {
selector: SELECTOR_PLAYER_HONORS_TEAM,
role: "player honors team",
})?
.to_string(),
})
}) })
.collect(); .collect::<Result<Vec<_>>>()?;
let teams = document let teams = document
.select(&self.selector_team_group) .select(&self.selector_team_group)
@ -112,31 +134,58 @@ impl Parser for PlayerParser {
let link = item let link = item
.select(&self.selector_team_link) .select(&self.selector_team_link)
.next() .next()
.and_then(|link| link.attr("href")) .ok_or(ParseError::ElementNotFound {
.unwrap_or("=0"); selector: SELECTOR_PLAYER_TEAM_LINK,
let name = select_text(item, &self.selector_team_name, "failed to find name"); role: "players team link",
let league = select_text(item, &self.selector_team_league, "failed to find league"); })?
let since = select_last_text(item, &self.selector_team_since, ""); .attr("href")
.unwrap_or_default();
let name = select_text(item, &self.selector_team_name).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_PLAYER_TEAM_NAME,
role: "players team name",
},
)?;
let league = select_text(item, &self.selector_team_league).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_PLAYER_TEAM_LEAGUE,
role: "players team league",
},
)?;
let since = select_last_text(item, &self.selector_team_since).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_PLAYER_TEAM_SINCE,
role: "players team joined",
},
)?;
let id = match link.rsplit_once("=") { let id = team_id_from_link(link)?;
Some((_, id)) => id.parse().unwrap_or_default(), let since = match since.rsplit_once('\n') {
_ => 0, Some((_, since)) => {
}; Date::parse(since, DATE_FORMAT).map_err(|_| ParseError::InvalidDate {
let since = match since.rsplit_once("\n") { role: "team join date",
Some((_, since)) => Date::parse(since, &format).unwrap_or(Date::MIN), date: since.to_string(),
_ => Date::MIN, })?
}
_ => {
return Err(ParseError::InvalidDate {
role: "team join date",
date: since.to_string(),
}
.into())
}
}; };
TeamMemberShip { Ok(TeamMemberShip {
team: TeamRef { team: TeamRef {
name: name.to_string(), name: name.to_string(),
id, id,
}, },
league: league.to_string(), league: league.to_string(),
since, since,
} })
}) })
.collect(); .collect::<Result<Vec<_>>>()?;
Ok(Player { Ok(Player {
name, name,

View file

@ -1,9 +1,9 @@
use super::{ElementExt, Parser}; use super::{ElementExt, Parser};
use crate::data::{MembershipHistory, TeamRef}; use crate::data::{MembershipHistory, TeamRef};
use crate::parser::select_text; use crate::parser::{select_text, team_id_from_link, DATE_FORMAT};
use crate::Result; use crate::{ParseError, Result};
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use time::{macros::format_description, Date}; use time::Date;
const SELECTOR_TEAM_FORMAT: &str = ".container .white-row-small thead h4"; const SELECTOR_TEAM_FORMAT: &str = ".container .white-row-small thead h4";
const SELECTOR_TEAM_GROUP: &str = ".container .white-row-small tbody"; const SELECTOR_TEAM_GROUP: &str = ".container .white-row-small tbody";
@ -37,13 +37,19 @@ impl PlayerDetailsParser {
} }
} }
impl Default for PlayerDetailsParser {
fn default() -> Self {
Self::new()
}
}
impl Parser for PlayerDetailsParser { impl Parser for PlayerDetailsParser {
type Output = Vec<MembershipHistory>; type Output = Vec<MembershipHistory>;
fn parse(&self, document: &str) -> Result<Self::Output> { fn parse(&self, document: &str) -> Result<Self::Output> {
let document = Html::parse_document(&document); let document = Html::parse_document(document);
Ok(document document
.select(&self.selector_team_format) .select(&self.selector_team_format)
.zip(document.select(&self.selector_team_group)) .zip(document.select(&self.selector_team_group))
.flat_map(|(format, history)| { .flat_map(|(format, history)| {
@ -52,34 +58,57 @@ impl Parser for PlayerDetailsParser {
.map(move |row| (format, row)) .map(move |row| (format, row))
}) })
.map(|(format, team)| { .map(|(format, team)| {
let format = format.first_text(); let format = format.first_text().ok_or(ParseError::EmptyText {
selector: SELECTOR_TEAM_FORMAT,
role: "team format",
})?;
let link = team let link = team
.select(&self.selector_team_link) .select(&self.selector_team_link)
.next() .next()
.and_then(|link| link.attr("href")) .ok_or(ParseError::ElementNotFound {
selector: SELECTOR_TEAM_LINK,
role: "team link",
})?
.attr("href")
.unwrap_or_default(); .unwrap_or_default();
let name = select_text(team, &self.selector_team_link, "failed to find team name"); let name = select_text(team, &self.selector_team_link).ok_or(
let division = ParseError::ElementNotFound {
select_text(team, &self.selector_team_joined, "failed to find division"); selector: SELECTOR_TEAM_LINK,
let joined = select_text(team, &self.selector_team_joined, ""); role: "team link",
let left = select_text(team, &self.selector_team_left, ""); },
)?;
let division = select_text(team, &self.selector_team_division).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_TEAM_DIVISION,
role: "team division",
},
)?;
let joined = select_text(team, &self.selector_team_joined).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_TEAM_JOINED,
role: "team join date",
},
)?;
let left = select_text(team, &self.selector_team_left).unwrap_or_default();
let id = match link.rsplit_once("=") { let id = team_id_from_link(link)?;
Some((_, id)) => id.parse().unwrap_or_default(),
_ => 0,
};
let format = format_description!("[month padding:none]/[day padding:none]/[year]");
MembershipHistory { Ok(MembershipHistory {
joined: Date::parse(joined, format).unwrap_or(Date::MIN), format: format.to_string(),
left: Date::parse(left, format).ok(), joined: Date::parse(joined, DATE_FORMAT).map_err(|_| {
ParseError::InvalidDate {
role: "team join date",
date: joined.to_string(),
}
})?,
left: Date::parse(left, DATE_FORMAT).ok(),
team: TeamRef { team: TeamRef {
name: name.to_string(), name: name.to_string(),
id, id,
}, },
division: division.to_string(), division: division.to_string(),
} })
}) })
.collect()) .collect()
} }
} }

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

19
tests/player.rs Normal file
View file

@ -0,0 +1,19 @@
use insta::assert_json_snapshot;
use std::fs::read_to_string;
use ugc_scraper::parser::{Parser, PlayerDetailsParser, PlayerParser};
#[test]
fn test_parse_player_html() {
let body = read_to_string("tests/data/player_76561198024494988.html").unwrap();
let parser = PlayerParser::new();
let parsed = parser.parse(&body).unwrap();
assert_json_snapshot!(parsed);
}
#[test]
fn test_parse_player_details_html() {
let body = read_to_string("tests/data/player_details_76561198024494988.html").unwrap();
let parser = PlayerDetailsParser::new();
let parsed = parser.parse(&body).unwrap();
assert_json_snapshot!(parsed);
}

View file

@ -0,0 +1,128 @@
---
source: tests/player.rs
expression: parsed
---
[
{
"format": "TF2 Highlander",
"team": {
"name": "Xenon",
"id": 7861
},
"division": "Euro Platinum",
"joined": [
2013,
221
],
"left": null
},
{
"format": "TF2 6vs6",
"team": {
"name": "UGC 6s",
"id": 6929
},
"division": "Europe",
"joined": [
2017,
52
],
"left": null
},
{
"format": "TF2 6vs6",
"team": {
"name": "sExy eSports",
"id": 17736
},
"division": "Euro Steel",
"joined": [
2015,
253
],
"left": [
2017,
22
]
},
{
"format": "TF2 6vs6",
"team": {
"name": "BigHorseDong",
"id": 16277
},
"division": "Euro Steel",
"joined": [
2015,
157
],
"left": [
2015,
253
]
},
{
"format": "TF2 6vs6",
"team": {
"name": "sExy eSports",
"id": 17736
},
"division": "Euro Steel",
"joined": [
2015,
153
],
"left": [
2015,
157
]
},
{
"format": "TF2 6vs6",
"team": {
"name": "BigHorseDong",
"id": 16277
},
"division": "Euro Steel",
"joined": [
2015,
32
],
"left": [
2015,
153
]
},
{
"format": "TF2 6vs6",
"team": {
"name": "Necronoms",
"id": 8622
},
"division": "Euro Steel",
"joined": [
2013,
250
],
"left": [
2014,
357
]
},
{
"format": "TF2 4vs4",
"team": {
"name": "sExy eSports",
"id": 17790
},
"division": "Silver Euro",
"joined": [
2015,
153
],
"left": [
2017,
227
]
}
]

View file

@ -0,0 +1,234 @@
---
source: tests/player.rs
expression: parsed
---
{
"name": "Icewind demostf",
"steam_id": 0,
"honors": [
{
"format": "TF2 Highlander",
"season": "Season 32 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 31 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 30 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 29 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 28 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 27 Premium EU",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 26 Premium EU",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 25 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 24 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 23 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 22 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 21 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 20 Euro Platinum",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 19 Euro Gold",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 18 Euro Silver",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 17 Euro Silver",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 16 Euro Silver",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 15 Euro Silver",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 14 Euro Silver",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 13 Euro Silver",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 12 Euro Silver",
"team": "Xenon"
},
{
"format": "TF2 Highlander",
"season": "Season 11 Euro Steel",
"team": "Xenon"
},
{
"format": "TF2 6vs6",
"season": "Season 35 Europe",
"team": "Controller Gamers"
},
{
"format": "TF2 6vs6",
"season": "Season 34 Europe",
"team": "Bye week"
},
{
"format": "TF2 6vs6",
"season": "Season 33 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 32 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 31 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 30 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 29 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 28 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 27 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 26 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 25 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 24 Europe",
"team": "meta.tf"
},
{
"format": "TF2 6vs6",
"season": "Season 23 Europe",
"team": "Giel and the 9wiels"
},
{
"format": "TF2 6vs6",
"season": "Season 19 Euro Platinum",
"team": "sExy eSports"
},
{
"format": "TF2 6vs6",
"season": "Season 18 Euro Steel",
"team": "BigHorseDong"
},
{
"format": "TF2 6vs6",
"season": "Season 13 Euro Steel",
"team": "Necronoms"
},
{
"format": "TF2 4vs4",
"season": "Season 6 Silver Euro",
"team": "sExy eSports"
},
{
"format": "TF2 4vs4",
"season": "Season 5 Silver Euro",
"team": "sExy eSports"
}
],
"teams": [
{
"team": {
"name": "UGC 6s",
"id": 6929
},
"league": "TF2 6vs6 - Europe",
"since": [
2017,
52
]
},
{
"team": {
"name": "Xenon",
"id": 7861
},
"league": "TF2 Highlander - Euro Platinum",
"since": [
2013,
221
]
}
]
}