mirror of
https://codeberg.org/icewind/ugc-scaper.git
synced 2026-06-03 10:14:11 +02:00
add transactions
This commit is contained in:
parent
0cada6847a
commit
bd2cd8afe6
9 changed files with 2907 additions and 32 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
|
@ -1540,7 +1540,7 @@ checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed"
|
|||
|
||||
[[package]]
|
||||
name = "ugc-scraper"
|
||||
version = "0.1.1"
|
||||
version = "0.2.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"main_error",
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "ugc-scraper"
|
||||
version = "0.1.1"
|
||||
version = "0.2.0"
|
||||
edition = "2021"
|
||||
rust-version = "1.67.0"
|
||||
description = "Scraper for ugcleague.com"
|
||||
|
|
|
|||
65
src/data.rs
65
src/data.rs
|
|
@ -1,3 +1,5 @@
|
|||
use crate::ParseError;
|
||||
use std::str::FromStr;
|
||||
pub use steamid_ng::SteamID;
|
||||
use time::{Date, OffsetDateTime};
|
||||
|
||||
|
|
@ -200,3 +202,66 @@ pub struct MatchInfo {
|
|||
pub score_home: u8,
|
||||
pub score_away: u8,
|
||||
}
|
||||
|
||||
pub enum GameMode {
|
||||
Highlander,
|
||||
Sixes,
|
||||
Fours,
|
||||
Ultiduo,
|
||||
}
|
||||
|
||||
impl FromStr for GameMode {
|
||||
type Err = ();
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"9v9" => Ok(GameMode::Highlander),
|
||||
"6v6" => Ok(GameMode::Sixes),
|
||||
"4v4" => Ok(GameMode::Fours),
|
||||
"2v2" => Ok(GameMode::Ultiduo),
|
||||
_ => Err(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl GameMode {
|
||||
pub fn letter(&self) -> char {
|
||||
match self {
|
||||
GameMode::Highlander => 'h',
|
||||
GameMode::Sixes => '6',
|
||||
GameMode::Fours => '4',
|
||||
GameMode::Ultiduo => '2',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
pub struct Transaction {
|
||||
pub name: String,
|
||||
pub steam_id: SteamID,
|
||||
pub action: TranactionAction,
|
||||
pub team: TeamRef,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
pub enum TranactionAction {
|
||||
Joined,
|
||||
Left,
|
||||
}
|
||||
|
||||
impl FromStr for TranactionAction {
|
||||
type Err = ParseError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"Joined" => Ok(TranactionAction::Joined),
|
||||
"Left" => Ok(TranactionAction::Left),
|
||||
_ => Err(ParseError::InvalidText {
|
||||
role: "transaction action",
|
||||
text: s.to_string(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
46
src/lib.rs
46
src/lib.rs
|
|
@ -4,11 +4,12 @@ mod error;
|
|||
pub mod parser;
|
||||
|
||||
use crate::data::{
|
||||
MatchInfo, MembershipHistory, Player, RosterHistory, Seasons, Team, TeamRef, TeamSeason,
|
||||
GameMode, MatchInfo, MembershipHistory, Player, RosterHistory, Seasons, Team, TeamRef,
|
||||
TeamSeason, Transaction,
|
||||
};
|
||||
use crate::parser::{
|
||||
MatchPageParser, Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser,
|
||||
TeamMatchesParser, TeamParser, TeamRosterHistoryParser,
|
||||
TeamMatchesParser, TeamParser, TeamRosterHistoryParser, TransactionParser,
|
||||
};
|
||||
pub use error::*;
|
||||
use reqwest::redirect::Policy;
|
||||
|
|
@ -28,6 +29,7 @@ pub struct UgcClient {
|
|||
seasons_parser: SeasonsParser,
|
||||
team_lookup_parser: TeamLookupParser,
|
||||
match_page_parser: MatchPageParser,
|
||||
transaction_parser: TransactionParser,
|
||||
}
|
||||
|
||||
/// "API client" for ugc by scraping the website
|
||||
|
|
@ -43,6 +45,7 @@ impl UgcClient {
|
|||
seasons_parser: SeasonsParser::new(),
|
||||
team_lookup_parser: TeamLookupParser::new(),
|
||||
match_page_parser: MatchPageParser::new(),
|
||||
transaction_parser: TransactionParser::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -137,35 +140,15 @@ impl UgcClient {
|
|||
self.seasons_parser.parse(&body)
|
||||
}
|
||||
|
||||
async fn teams(&self, link: &str) -> Result<Vec<TeamRef>> {
|
||||
pub async fn teams(&self, format: GameMode) -> Result<Vec<TeamRef>> {
|
||||
let link = format!(
|
||||
"https://www.ugcleague.com/team_lookup_tf2{}.cfm",
|
||||
format.letter()
|
||||
);
|
||||
let body = self.client.get(link).send().await?.text().await?;
|
||||
self.team_lookup_parser.parse(&body)
|
||||
}
|
||||
|
||||
/// Get a list of all 9v9 teams
|
||||
pub async fn teams_9v9(&self) -> Result<Vec<TeamRef>> {
|
||||
self.teams("https://www.ugcleague.com/team_lookup_tf2h.cfm")
|
||||
.await
|
||||
}
|
||||
|
||||
/// Get a list of all 6v6 teams
|
||||
pub async fn teams_6v6(&self) -> Result<Vec<TeamRef>> {
|
||||
self.teams("https://www.ugcleague.com/team_lookup_tf26.cfm")
|
||||
.await
|
||||
}
|
||||
|
||||
/// Get a list of all 4v4 teams
|
||||
pub async fn teams_4v4(&self) -> Result<Vec<TeamRef>> {
|
||||
self.teams("https://www.ugcleague.com/team_lookup_tf24.cfm")
|
||||
.await
|
||||
}
|
||||
|
||||
/// Get a list of all 2v2 teams
|
||||
pub async fn teams_2v2(&self) -> Result<Vec<TeamRef>> {
|
||||
self.teams("https://www.ugcleague.com/team_lookup_tf22.cfm")
|
||||
.await
|
||||
}
|
||||
|
||||
/// Get match page info
|
||||
pub async fn match_info(&self, id: u32) -> Result<MatchInfo> {
|
||||
let body = self
|
||||
|
|
@ -180,4 +163,13 @@ impl UgcClient {
|
|||
.await?;
|
||||
self.match_page_parser.parse(&body)
|
||||
}
|
||||
|
||||
pub async fn transactions(&self, format: GameMode) -> Result<Vec<Transaction>> {
|
||||
let link = format!(
|
||||
"https://www.ugcleague.com/rostertransactions_tf2{}_all.cfm",
|
||||
format.letter()
|
||||
);
|
||||
let body = self.client.get(link).send().await?.text().await?;
|
||||
self.transaction_parser.parse(&body)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ mod team;
|
|||
mod team_lookup;
|
||||
mod team_matches;
|
||||
mod team_roster_history;
|
||||
mod transactions;
|
||||
|
||||
pub use match_page::*;
|
||||
pub use player::*;
|
||||
|
|
@ -21,6 +22,7 @@ pub use team::*;
|
|||
pub use team_lookup::*;
|
||||
pub use team_matches::*;
|
||||
pub use team_roster_history::*;
|
||||
pub use transactions::*;
|
||||
|
||||
pub trait Parser {
|
||||
type Output;
|
||||
|
|
@ -28,8 +30,9 @@ pub trait Parser {
|
|||
}
|
||||
|
||||
trait ElementExt<'a> {
|
||||
fn first_text(&'a self) -> Option<&'a str>;
|
||||
fn nth_text(&'a self, n: usize) -> Option<&'a str>;
|
||||
fn first_text(&self) -> Option<&'a str>;
|
||||
fn nth_text(&self, n: usize) -> Option<&'a str>;
|
||||
fn last_text(&self) -> Option<&'a str>;
|
||||
}
|
||||
|
||||
impl<'a> ElementExt<'a> for ElementRef<'a> {
|
||||
|
|
@ -42,6 +45,9 @@ impl<'a> ElementExt<'a> for ElementRef<'a> {
|
|||
.nth(n - 1)
|
||||
.map(str::trim)
|
||||
}
|
||||
fn last_text(&self) -> Option<&'a str> {
|
||||
self.text().map(str::trim).filter(|s| !s.is_empty()).last()
|
||||
}
|
||||
}
|
||||
|
||||
fn select_text<'a>(el: ElementRef<'a>, selector: &Selector) -> Option<&'a str> {
|
||||
|
|
|
|||
96
src/parser/transactions.rs
Normal file
96
src/parser/transactions.rs
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
use super::Parser;
|
||||
use crate::data::{TeamRef, Transaction};
|
||||
use crate::parser::{
|
||||
select_last_text, select_text, steam_id_from_link, team_id_from_link, ElementExt,
|
||||
};
|
||||
use crate::{ParseError, Result};
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
const SELECTOR_TRANSACTION_ROW: &str = "table.table.table-condensed.table-striped tr";
|
||||
const SELECTOR_TRANSACTION_PLAYER_LINK: &str = "a[href^=\"players_page\"][title^=\"Roster\"]";
|
||||
const SELECTOR_TRANSACTION_ACTION: &str = "td:nth-child(4) span b";
|
||||
const SELECTOR_TRANSACTION_TEAM_LINK: &str = "a[href^=\"team_page\"]";
|
||||
const SELECTOR_TRANSACTION_TEAM_NAME: &str = "td:nth-child(5)";
|
||||
|
||||
pub struct TransactionParser {
|
||||
selector_row: Selector,
|
||||
selector_player: Selector,
|
||||
selector_action: Selector,
|
||||
selector_team_link: Selector,
|
||||
selector_team_name: Selector,
|
||||
}
|
||||
|
||||
impl Default for TransactionParser {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl TransactionParser {
|
||||
pub fn new() -> Self {
|
||||
TransactionParser {
|
||||
selector_row: Selector::parse(SELECTOR_TRANSACTION_ROW).unwrap(),
|
||||
selector_player: Selector::parse(SELECTOR_TRANSACTION_PLAYER_LINK).unwrap(),
|
||||
selector_action: Selector::parse(SELECTOR_TRANSACTION_ACTION).unwrap(),
|
||||
selector_team_link: Selector::parse(SELECTOR_TRANSACTION_TEAM_LINK).unwrap(),
|
||||
selector_team_name: Selector::parse(SELECTOR_TRANSACTION_TEAM_NAME).unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser for TransactionParser {
|
||||
type Output = Vec<Transaction>;
|
||||
|
||||
fn parse(&self, document: &str) -> Result<Self::Output> {
|
||||
let document = Html::parse_document(document);
|
||||
|
||||
document
|
||||
.select(&self.selector_row)
|
||||
.filter(|row| row.select(&self.selector_player).next().is_some())
|
||||
.map(|row| {
|
||||
let player_link = row.select(&self.selector_player).next().ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TRANSACTION_PLAYER_LINK,
|
||||
role: "player link",
|
||||
},
|
||||
)?;
|
||||
let name = player_link.first_text().ok_or(ParseError::EmptyText {
|
||||
selector: SELECTOR_TRANSACTION_PLAYER_LINK,
|
||||
role: "player name",
|
||||
})?;
|
||||
let steam_id = steam_id_from_link(player_link.attr("href").unwrap_or_default())?;
|
||||
|
||||
let action = select_text(row, &self.selector_action)
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TRANSACTION_ACTION,
|
||||
role: "transaction action",
|
||||
})?
|
||||
.parse()?;
|
||||
|
||||
let team_link = row.select(&self.selector_team_link).next().ok_or(
|
||||
ParseError::ElementNotFound {
|
||||
selector: SELECTOR_TRANSACTION_TEAM_LINK,
|
||||
role: "team link",
|
||||
},
|
||||
)?;
|
||||
let team_id = team_id_from_link(team_link.attr("href").unwrap_or_default())?;
|
||||
let team_name = select_last_text(row, &self.selector_team_name).ok_or(
|
||||
ParseError::EmptyText {
|
||||
selector: SELECTOR_TRANSACTION_TEAM_LINK,
|
||||
role: "team link",
|
||||
},
|
||||
)?;
|
||||
|
||||
Ok(Transaction {
|
||||
name: name.to_string(),
|
||||
steam_id,
|
||||
action,
|
||||
team: TeamRef {
|
||||
id: team_id,
|
||||
name: team_name.to_string(),
|
||||
},
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
2639
tests/data/transactions_4v4.html
Normal file
2639
tests/data/transactions_4v4.html
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -2,7 +2,7 @@ use insta::assert_json_snapshot;
|
|||
use std::fs::read_to_string;
|
||||
use ugc_scraper::parser::{
|
||||
MatchPageParser, Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser,
|
||||
TeamMatchesParser, TeamParser, TeamRosterHistoryParser,
|
||||
TeamMatchesParser, TeamParser, TeamRosterHistoryParser, TransactionParser,
|
||||
};
|
||||
|
||||
#[test]
|
||||
|
|
@ -100,3 +100,11 @@ fn test_parse_match_html() {
|
|||
let parsed = parser.parse(&body).unwrap();
|
||||
assert_json_snapshot!(parsed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_transaction_html() {
|
||||
let body = read_to_string("tests/data/transactions_4v4.html").unwrap();
|
||||
let parser = TransactionParser::new();
|
||||
let parsed = parser.parse(&body).unwrap();
|
||||
assert_json_snapshot!(parsed);
|
||||
}
|
||||
|
|
|
|||
69
tests/snapshots/snapshot__parse_transaction_html.snap
Normal file
69
tests/snapshots/snapshot__parse_transaction_html.snap
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
---
|
||||
source: tests/snapshot.rs
|
||||
expression: parsed
|
||||
---
|
||||
[
|
||||
{
|
||||
"name": "L9 ivan",
|
||||
"steam_id": 76561199017754044,
|
||||
"action": "Joined",
|
||||
"team": {
|
||||
"name": "1. e4 e5 2. Ke2 Ke7",
|
||||
"id": 33233
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Rhythm",
|
||||
"steam_id": 76561198062801366,
|
||||
"action": "Joined",
|
||||
"team": {
|
||||
"name": "#FreeTheBlackHeavy",
|
||||
"id": 33011
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "G Punish",
|
||||
"steam_id": 76561198207987376,
|
||||
"action": "Left",
|
||||
"team": {
|
||||
"name": "1. e4 e5 2. Ke2 Ke7",
|
||||
"id": 33233
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "hellboy",
|
||||
"steam_id": 76561199089196165,
|
||||
"action": "Joined",
|
||||
"team": {
|
||||
"name": "1. e4 e5 2. Ke2 Ke7",
|
||||
"id": 33233
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "realhedgehog2082",
|
||||
"steam_id": 76561198168931330,
|
||||
"action": "Left",
|
||||
"team": {
|
||||
"name": "1. e4 e5 2. Ke2 Ke7",
|
||||
"id": 33233
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Rhythm",
|
||||
"steam_id": 76561198062801366,
|
||||
"action": "Left",
|
||||
"team": {
|
||||
"name": "el gato",
|
||||
"id": 33014
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Pope Sonder",
|
||||
"steam_id": 76561198214202059,
|
||||
"action": "Left",
|
||||
"team": {
|
||||
"name": "Gaelic Gladiators",
|
||||
"id": 29916
|
||||
}
|
||||
}
|
||||
]
|
||||
Loading…
Add table
Add a link
Reference in a new issue