mirror of
https://codeberg.org/icewind/ugc-scaper.git
synced 2026-06-03 18:24:10 +02:00
membership history
This commit is contained in:
parent
7a1b207d66
commit
d937dbbb5e
5 changed files with 125 additions and 25 deletions
|
|
@ -28,3 +28,11 @@ pub struct TeamRef {
|
||||||
pub name: String,
|
pub name: String,
|
||||||
pub id: u32,
|
pub id: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct MembershipHistory {
|
||||||
|
pub team: TeamRef,
|
||||||
|
pub division: String,
|
||||||
|
pub joined: Date,
|
||||||
|
pub left: Option<Date>,
|
||||||
|
}
|
||||||
|
|
|
||||||
14
src/main.rs
14
src/main.rs
|
|
@ -1,18 +1,22 @@
|
||||||
|
pub mod data;
|
||||||
mod error;
|
mod error;
|
||||||
mod parser;
|
mod parser;
|
||||||
pub mod data;
|
|
||||||
|
|
||||||
|
use crate::parser::{Parser, PlayerDetailsParser, PlayerParser};
|
||||||
|
pub use error::*;
|
||||||
use main_error::MainResult;
|
use main_error::MainResult;
|
||||||
use reqwest::get;
|
use reqwest::get;
|
||||||
pub use error::*;
|
|
||||||
use crate::parser::{PlayerParser, Parser};
|
|
||||||
|
|
||||||
pub type Result<T, E = ScrapeError> = std::result::Result<T, E>;
|
pub type Result<T, E = ScrapeError> = std::result::Result<T, E>;
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> MainResult {
|
async fn main() -> MainResult {
|
||||||
let body = get("https://www.ugcleague.com/players_page.cfm?player_id=76561198024494988").await?.text().await?;
|
let body =
|
||||||
let parser = PlayerParser::new();
|
get("https://www.ugcleague.com/players_page_details.cfm?player_id=76561198024494988")
|
||||||
|
.await?
|
||||||
|
.text()
|
||||||
|
.await?;
|
||||||
|
let parser = PlayerDetailsParser::new();
|
||||||
dbg!(parser.parse(&body)?);
|
dbg!(parser.parse(&body)?);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,11 @@
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
use scraper::ElementRef;
|
use scraper::{ElementRef, Selector};
|
||||||
|
|
||||||
mod player;
|
mod player;
|
||||||
|
mod player_details;
|
||||||
|
|
||||||
pub use player::*;
|
pub use player::*;
|
||||||
|
pub use player_details::*;
|
||||||
|
|
||||||
pub trait Parser {
|
pub trait Parser {
|
||||||
type Output;
|
type Output;
|
||||||
|
|
@ -27,3 +29,19 @@ impl<'a> ElementExt<'a> for ElementRef<'a> {
|
||||||
.map(|s| s.trim())
|
.map(|s| s.trim())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn select_text<'a>(el: ElementRef<'a>, selector: &Selector, default: &'static str) -> &'a str {
|
||||||
|
el.select(selector)
|
||||||
|
.next()
|
||||||
|
.and_then(|item| item.text().filter(|s| !s.trim().is_empty()).next())
|
||||||
|
.unwrap_or(default)
|
||||||
|
.trim()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn select_last_text<'a>(el: ElementRef<'a>, selector: &Selector, default: &'static str) -> &'a str {
|
||||||
|
el.select(selector)
|
||||||
|
.next()
|
||||||
|
.and_then(|item| item.text().last())
|
||||||
|
.unwrap_or(default)
|
||||||
|
.trim()
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
use super::{ElementExt, Parser};
|
use super::{ElementExt, Parser};
|
||||||
use crate::data::{Honors, Player, TeamMemberShip, TeamRef};
|
use crate::data::{Honors, Player, TeamMemberShip, TeamRef};
|
||||||
|
use crate::parser::{select_last_text, select_text};
|
||||||
use crate::{ParseError, Result};
|
use crate::{ParseError, Result};
|
||||||
use scraper::{ElementRef, Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
use std::iter::repeat;
|
use std::iter::repeat;
|
||||||
use steamid_ng::SteamID;
|
use steamid_ng::SteamID;
|
||||||
use time::{macros::format_description, Date};
|
use time::{macros::format_description, Date};
|
||||||
|
|
@ -58,27 +59,12 @@ impl PlayerParser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn select_text<'a>(el: ElementRef<'a>, selector: &Selector, default: &'static str) -> &'a str {
|
|
||||||
el.select(selector)
|
|
||||||
.next()
|
|
||||||
.and_then(|item| item.text().filter(|s| !s.trim().is_empty()).next())
|
|
||||||
.unwrap_or(default)
|
|
||||||
.trim()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn select_last_text<'a>(el: ElementRef<'a>, selector: &Selector, default: &'static str) -> &'a str {
|
|
||||||
el.select(selector)
|
|
||||||
.next()
|
|
||||||
.and_then(|item| item.text().last())
|
|
||||||
.unwrap_or(default)
|
|
||||||
.trim()
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Parser for PlayerParser {
|
impl Parser for PlayerParser {
|
||||||
type Output = Player;
|
type Output = Player;
|
||||||
|
|
||||||
fn parse(&self, document: &str) -> Result<Self::Output> {
|
fn parse(&self, document: &str) -> Result<Self::Output> {
|
||||||
let document = Html::parse_document(&document);
|
let document = Html::parse_document(&document);
|
||||||
|
let format = format_description!("[month padding:none]/[day padding:none]/[year]");
|
||||||
|
|
||||||
let name = document
|
let name = document
|
||||||
.select(&self.selector_name)
|
.select(&self.selector_name)
|
||||||
|
|
@ -122,7 +108,7 @@ impl Parser for PlayerParser {
|
||||||
let teams = document
|
let teams = document
|
||||||
.select(&self.selector_team_group)
|
.select(&self.selector_team_group)
|
||||||
.filter(|item| item.select(&self.selector_team_link).next().is_some())
|
.filter(|item| item.select(&self.selector_team_link).next().is_some())
|
||||||
.map(|item| {
|
.map(move |item| {
|
||||||
let link = item
|
let link = item
|
||||||
.select(&self.selector_team_link)
|
.select(&self.selector_team_link)
|
||||||
.next()
|
.next()
|
||||||
|
|
@ -136,7 +122,6 @@ impl Parser for PlayerParser {
|
||||||
Some((_, id)) => id.parse().unwrap_or_default(),
|
Some((_, id)) => id.parse().unwrap_or_default(),
|
||||||
_ => 0,
|
_ => 0,
|
||||||
};
|
};
|
||||||
let format = format_description!("[month padding:none]/[day padding:none]/[year]");
|
|
||||||
let since = match since.rsplit_once("\n") {
|
let since = match since.rsplit_once("\n") {
|
||||||
Some((_, since)) => Date::parse(since, &format).unwrap_or(Date::MIN),
|
Some((_, since)) => Date::parse(since, &format).unwrap_or(Date::MIN),
|
||||||
_ => Date::MIN,
|
_ => Date::MIN,
|
||||||
|
|
|
||||||
85
src/parser/player_details.rs
Normal file
85
src/parser/player_details.rs
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
use super::{ElementExt, Parser};
|
||||||
|
use crate::data::{MembershipHistory, TeamRef};
|
||||||
|
use crate::parser::select_text;
|
||||||
|
use crate::Result;
|
||||||
|
use scraper::{Html, Selector};
|
||||||
|
use time::{macros::format_description, Date};
|
||||||
|
|
||||||
|
const SELECTOR_TEAM_FORMAT: &str = ".container .white-row-small thead h4";
|
||||||
|
const SELECTOR_TEAM_GROUP: &str = ".container .white-row-small tbody";
|
||||||
|
const TEAM_ROW: &str = "tr:not(:first-child)";
|
||||||
|
const SELECTOR_TEAM_LINK: &str = "td:nth-child(3) a";
|
||||||
|
const SELECTOR_TEAM_DIVISION: &str = "td:nth-child(3) small";
|
||||||
|
const SELECTOR_TEAM_JOINED: &str = "td:nth-child(5) span";
|
||||||
|
const SELECTOR_TEAM_LEFT: &str = "td:nth-child(6) span";
|
||||||
|
|
||||||
|
pub struct PlayerDetailsParser {
|
||||||
|
selector_team_format: Selector,
|
||||||
|
selector_team_group: Selector,
|
||||||
|
selector_team_row: Selector,
|
||||||
|
selector_team_link: Selector,
|
||||||
|
selector_team_division: Selector,
|
||||||
|
selector_team_joined: Selector,
|
||||||
|
selector_team_left: Selector,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PlayerDetailsParser {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
PlayerDetailsParser {
|
||||||
|
selector_team_format: Selector::parse(SELECTOR_TEAM_FORMAT).unwrap(),
|
||||||
|
selector_team_group: Selector::parse(SELECTOR_TEAM_GROUP).unwrap(),
|
||||||
|
selector_team_row: Selector::parse(TEAM_ROW).unwrap(),
|
||||||
|
selector_team_link: Selector::parse(SELECTOR_TEAM_LINK).unwrap(),
|
||||||
|
selector_team_division: Selector::parse(SELECTOR_TEAM_DIVISION).unwrap(),
|
||||||
|
selector_team_joined: Selector::parse(SELECTOR_TEAM_JOINED).unwrap(),
|
||||||
|
selector_team_left: Selector::parse(SELECTOR_TEAM_LEFT).unwrap(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Parser for PlayerDetailsParser {
|
||||||
|
type Output = Vec<MembershipHistory>;
|
||||||
|
|
||||||
|
fn parse(&self, document: &str) -> Result<Self::Output> {
|
||||||
|
let document = Html::parse_document(&document);
|
||||||
|
|
||||||
|
Ok(document
|
||||||
|
.select(&self.selector_team_format)
|
||||||
|
.zip(document.select(&self.selector_team_group))
|
||||||
|
.flat_map(|(format, history)| {
|
||||||
|
history
|
||||||
|
.select(&self.selector_team_row)
|
||||||
|
.map(move |row| (format, row))
|
||||||
|
})
|
||||||
|
.map(|(format, team)| {
|
||||||
|
let format = format.first_text();
|
||||||
|
let link = team
|
||||||
|
.select(&self.selector_team_link)
|
||||||
|
.next()
|
||||||
|
.and_then(|link| link.attr("href"))
|
||||||
|
.unwrap_or_default();
|
||||||
|
let name = select_text(team, &self.selector_team_link, "failed to find team name");
|
||||||
|
let division =
|
||||||
|
select_text(team, &self.selector_team_joined, "failed to find division");
|
||||||
|
let joined = select_text(team, &self.selector_team_joined, "");
|
||||||
|
let left = select_text(team, &self.selector_team_left, "");
|
||||||
|
|
||||||
|
let id = match link.rsplit_once("=") {
|
||||||
|
Some((_, id)) => id.parse().unwrap_or_default(),
|
||||||
|
_ => 0,
|
||||||
|
};
|
||||||
|
let format = format_description!("[month padding:none]/[day padding:none]/[year]");
|
||||||
|
|
||||||
|
MembershipHistory {
|
||||||
|
joined: Date::parse(joined, format).unwrap_or(Date::MIN),
|
||||||
|
left: Date::parse(left, format).ok(),
|
||||||
|
team: TeamRef {
|
||||||
|
name: name.to_string(),
|
||||||
|
id,
|
||||||
|
},
|
||||||
|
division: division.to_string(),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect())
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue