mirror of
https://codeberg.org/icewind/ugc-scaper.git
synced 2026-06-03 18:24:10 +02:00
membership history
This commit is contained in:
parent
7a1b207d66
commit
d937dbbb5e
5 changed files with 125 additions and 25 deletions
|
|
@ -28,3 +28,11 @@ pub struct TeamRef {
|
|||
pub name: String,
|
||||
pub id: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MembershipHistory {
|
||||
pub team: TeamRef,
|
||||
pub division: String,
|
||||
pub joined: Date,
|
||||
pub left: Option<Date>,
|
||||
}
|
||||
|
|
|
|||
14
src/main.rs
14
src/main.rs
|
|
@ -1,18 +1,22 @@
|
|||
pub mod data;
|
||||
mod error;
|
||||
mod parser;
|
||||
pub mod data;
|
||||
|
||||
use crate::parser::{Parser, PlayerDetailsParser, PlayerParser};
|
||||
pub use error::*;
|
||||
use main_error::MainResult;
|
||||
use reqwest::get;
|
||||
pub use error::*;
|
||||
use crate::parser::{PlayerParser, Parser};
|
||||
|
||||
pub type Result<T, E = ScrapeError> = std::result::Result<T, E>;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> MainResult {
|
||||
let body = get("https://www.ugcleague.com/players_page.cfm?player_id=76561198024494988").await?.text().await?;
|
||||
let parser = PlayerParser::new();
|
||||
let body =
|
||||
get("https://www.ugcleague.com/players_page_details.cfm?player_id=76561198024494988")
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
let parser = PlayerDetailsParser::new();
|
||||
dbg!(parser.parse(&body)?);
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
use crate::Result;
|
||||
use scraper::ElementRef;
|
||||
use scraper::{ElementRef, Selector};
|
||||
|
||||
mod player;
|
||||
mod player_details;
|
||||
|
||||
pub use player::*;
|
||||
pub use player_details::*;
|
||||
|
||||
pub trait Parser {
|
||||
type Output;
|
||||
|
|
@ -27,3 +29,19 @@ impl<'a> ElementExt<'a> for ElementRef<'a> {
|
|||
.map(|s| s.trim())
|
||||
}
|
||||
}
|
||||
|
||||
fn select_text<'a>(el: ElementRef<'a>, selector: &Selector, default: &'static str) -> &'a str {
|
||||
el.select(selector)
|
||||
.next()
|
||||
.and_then(|item| item.text().filter(|s| !s.trim().is_empty()).next())
|
||||
.unwrap_or(default)
|
||||
.trim()
|
||||
}
|
||||
|
||||
fn select_last_text<'a>(el: ElementRef<'a>, selector: &Selector, default: &'static str) -> &'a str {
|
||||
el.select(selector)
|
||||
.next()
|
||||
.and_then(|item| item.text().last())
|
||||
.unwrap_or(default)
|
||||
.trim()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
use super::{ElementExt, Parser};
|
||||
use crate::data::{Honors, Player, TeamMemberShip, TeamRef};
|
||||
use crate::parser::{select_last_text, select_text};
|
||||
use crate::{ParseError, Result};
|
||||
use scraper::{ElementRef, Html, Selector};
|
||||
use scraper::{Html, Selector};
|
||||
use std::iter::repeat;
|
||||
use steamid_ng::SteamID;
|
||||
use time::{macros::format_description, Date};
|
||||
|
|
@ -58,27 +59,12 @@ impl PlayerParser {
|
|||
}
|
||||
}
|
||||
|
||||
fn select_text<'a>(el: ElementRef<'a>, selector: &Selector, default: &'static str) -> &'a str {
|
||||
el.select(selector)
|
||||
.next()
|
||||
.and_then(|item| item.text().filter(|s| !s.trim().is_empty()).next())
|
||||
.unwrap_or(default)
|
||||
.trim()
|
||||
}
|
||||
|
||||
fn select_last_text<'a>(el: ElementRef<'a>, selector: &Selector, default: &'static str) -> &'a str {
|
||||
el.select(selector)
|
||||
.next()
|
||||
.and_then(|item| item.text().last())
|
||||
.unwrap_or(default)
|
||||
.trim()
|
||||
}
|
||||
|
||||
impl Parser for PlayerParser {
|
||||
type Output = Player;
|
||||
|
||||
fn parse(&self, document: &str) -> Result<Self::Output> {
|
||||
let document = Html::parse_document(&document);
|
||||
let format = format_description!("[month padding:none]/[day padding:none]/[year]");
|
||||
|
||||
let name = document
|
||||
.select(&self.selector_name)
|
||||
|
|
@ -122,7 +108,7 @@ impl Parser for PlayerParser {
|
|||
let teams = document
|
||||
.select(&self.selector_team_group)
|
||||
.filter(|item| item.select(&self.selector_team_link).next().is_some())
|
||||
.map(|item| {
|
||||
.map(move |item| {
|
||||
let link = item
|
||||
.select(&self.selector_team_link)
|
||||
.next()
|
||||
|
|
@ -136,7 +122,6 @@ impl Parser for PlayerParser {
|
|||
Some((_, id)) => id.parse().unwrap_or_default(),
|
||||
_ => 0,
|
||||
};
|
||||
let format = format_description!("[month padding:none]/[day padding:none]/[year]");
|
||||
let since = match since.rsplit_once("\n") {
|
||||
Some((_, since)) => Date::parse(since, &format).unwrap_or(Date::MIN),
|
||||
_ => Date::MIN,
|
||||
|
|
|
|||
85
src/parser/player_details.rs
Normal file
85
src/parser/player_details.rs
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
use super::{ElementExt, Parser};
|
||||
use crate::data::{MembershipHistory, TeamRef};
|
||||
use crate::parser::select_text;
|
||||
use crate::Result;
|
||||
use scraper::{Html, Selector};
|
||||
use time::{macros::format_description, Date};
|
||||
|
||||
const SELECTOR_TEAM_FORMAT: &str = ".container .white-row-small thead h4";
|
||||
const SELECTOR_TEAM_GROUP: &str = ".container .white-row-small tbody";
|
||||
const TEAM_ROW: &str = "tr:not(:first-child)";
|
||||
const SELECTOR_TEAM_LINK: &str = "td:nth-child(3) a";
|
||||
const SELECTOR_TEAM_DIVISION: &str = "td:nth-child(3) small";
|
||||
const SELECTOR_TEAM_JOINED: &str = "td:nth-child(5) span";
|
||||
const SELECTOR_TEAM_LEFT: &str = "td:nth-child(6) span";
|
||||
|
||||
pub struct PlayerDetailsParser {
|
||||
selector_team_format: Selector,
|
||||
selector_team_group: Selector,
|
||||
selector_team_row: Selector,
|
||||
selector_team_link: Selector,
|
||||
selector_team_division: Selector,
|
||||
selector_team_joined: Selector,
|
||||
selector_team_left: Selector,
|
||||
}
|
||||
|
||||
impl PlayerDetailsParser {
|
||||
pub fn new() -> Self {
|
||||
PlayerDetailsParser {
|
||||
selector_team_format: Selector::parse(SELECTOR_TEAM_FORMAT).unwrap(),
|
||||
selector_team_group: Selector::parse(SELECTOR_TEAM_GROUP).unwrap(),
|
||||
selector_team_row: Selector::parse(TEAM_ROW).unwrap(),
|
||||
selector_team_link: Selector::parse(SELECTOR_TEAM_LINK).unwrap(),
|
||||
selector_team_division: Selector::parse(SELECTOR_TEAM_DIVISION).unwrap(),
|
||||
selector_team_joined: Selector::parse(SELECTOR_TEAM_JOINED).unwrap(),
|
||||
selector_team_left: Selector::parse(SELECTOR_TEAM_LEFT).unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser for PlayerDetailsParser {
|
||||
type Output = Vec<MembershipHistory>;
|
||||
|
||||
fn parse(&self, document: &str) -> Result<Self::Output> {
|
||||
let document = Html::parse_document(&document);
|
||||
|
||||
Ok(document
|
||||
.select(&self.selector_team_format)
|
||||
.zip(document.select(&self.selector_team_group))
|
||||
.flat_map(|(format, history)| {
|
||||
history
|
||||
.select(&self.selector_team_row)
|
||||
.map(move |row| (format, row))
|
||||
})
|
||||
.map(|(format, team)| {
|
||||
let format = format.first_text();
|
||||
let link = team
|
||||
.select(&self.selector_team_link)
|
||||
.next()
|
||||
.and_then(|link| link.attr("href"))
|
||||
.unwrap_or_default();
|
||||
let name = select_text(team, &self.selector_team_link, "failed to find team name");
|
||||
let division =
|
||||
select_text(team, &self.selector_team_joined, "failed to find division");
|
||||
let joined = select_text(team, &self.selector_team_joined, "");
|
||||
let left = select_text(team, &self.selector_team_left, "");
|
||||
|
||||
let id = match link.rsplit_once("=") {
|
||||
Some((_, id)) => id.parse().unwrap_or_default(),
|
||||
_ => 0,
|
||||
};
|
||||
let format = format_description!("[month padding:none]/[day padding:none]/[year]");
|
||||
|
||||
MembershipHistory {
|
||||
joined: Date::parse(joined, format).unwrap_or(Date::MIN),
|
||||
left: Date::parse(left, format).ok(),
|
||||
team: TeamRef {
|
||||
name: name.to_string(),
|
||||
id,
|
||||
},
|
||||
division: division.to_string(),
|
||||
}
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue