add transactions

This commit is contained in:
Robin Appelman 2023-11-19 15:04:09 +01:00
commit bd2cd8afe6
9 changed files with 2907 additions and 32 deletions

2
Cargo.lock generated
View file

@ -1540,7 +1540,7 @@ checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed"
[[package]]
name = "ugc-scraper"
version = "0.1.1"
version = "0.2.0"
dependencies = [
"insta",
"main_error",

View file

@ -1,6 +1,6 @@
[package]
name = "ugc-scraper"
version = "0.1.1"
version = "0.2.0"
edition = "2021"
rust-version = "1.67.0"
description = "Scraper for ugcleague.com"

View file

@ -1,3 +1,5 @@
use crate::ParseError;
use std::str::FromStr;
pub use steamid_ng::SteamID;
use time::{Date, OffsetDateTime};
@ -200,3 +202,66 @@ pub struct MatchInfo {
pub score_home: u8,
pub score_away: u8,
}
pub enum GameMode {
Highlander,
Sixes,
Fours,
Ultiduo,
}
impl FromStr for GameMode {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"9v9" => Ok(GameMode::Highlander),
"6v6" => Ok(GameMode::Sixes),
"4v4" => Ok(GameMode::Fours),
"2v2" => Ok(GameMode::Ultiduo),
_ => Err(()),
}
}
}
impl GameMode {
pub fn letter(&self) -> char {
match self {
GameMode::Highlander => 'h',
GameMode::Sixes => '6',
GameMode::Fours => '4',
GameMode::Ultiduo => '2',
}
}
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct Transaction {
pub name: String,
pub steam_id: SteamID,
pub action: TranactionAction,
pub team: TeamRef,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub enum TranactionAction {
Joined,
Left,
}
impl FromStr for TranactionAction {
type Err = ParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"Joined" => Ok(TranactionAction::Joined),
"Left" => Ok(TranactionAction::Left),
_ => Err(ParseError::InvalidText {
role: "transaction action",
text: s.to_string(),
}),
}
}
}

View file

@ -4,11 +4,12 @@ mod error;
pub mod parser;
use crate::data::{
MatchInfo, MembershipHistory, Player, RosterHistory, Seasons, Team, TeamRef, TeamSeason,
GameMode, MatchInfo, MembershipHistory, Player, RosterHistory, Seasons, Team, TeamRef,
TeamSeason, Transaction,
};
use crate::parser::{
MatchPageParser, Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser,
TeamMatchesParser, TeamParser, TeamRosterHistoryParser,
TeamMatchesParser, TeamParser, TeamRosterHistoryParser, TransactionParser,
};
pub use error::*;
use reqwest::redirect::Policy;
@ -28,6 +29,7 @@ pub struct UgcClient {
seasons_parser: SeasonsParser,
team_lookup_parser: TeamLookupParser,
match_page_parser: MatchPageParser,
transaction_parser: TransactionParser,
}
/// "API client" for ugc by scraping the website
@ -43,6 +45,7 @@ impl UgcClient {
seasons_parser: SeasonsParser::new(),
team_lookup_parser: TeamLookupParser::new(),
match_page_parser: MatchPageParser::new(),
transaction_parser: TransactionParser::new(),
}
}
@ -137,35 +140,15 @@ impl UgcClient {
self.seasons_parser.parse(&body)
}
async fn teams(&self, link: &str) -> Result<Vec<TeamRef>> {
pub async fn teams(&self, format: GameMode) -> Result<Vec<TeamRef>> {
let link = format!(
"https://www.ugcleague.com/team_lookup_tf2{}.cfm",
format.letter()
);
let body = self.client.get(link).send().await?.text().await?;
self.team_lookup_parser.parse(&body)
}
/// Get a list of all 9v9 teams
pub async fn teams_9v9(&self) -> Result<Vec<TeamRef>> {
self.teams("https://www.ugcleague.com/team_lookup_tf2h.cfm")
.await
}
/// Get a list of all 6v6 teams
pub async fn teams_6v6(&self) -> Result<Vec<TeamRef>> {
self.teams("https://www.ugcleague.com/team_lookup_tf26.cfm")
.await
}
/// Get a list of all 4v4 teams
pub async fn teams_4v4(&self) -> Result<Vec<TeamRef>> {
self.teams("https://www.ugcleague.com/team_lookup_tf24.cfm")
.await
}
/// Get a list of all 2v2 teams
pub async fn teams_2v2(&self) -> Result<Vec<TeamRef>> {
self.teams("https://www.ugcleague.com/team_lookup_tf22.cfm")
.await
}
/// Get match page info
pub async fn match_info(&self, id: u32) -> Result<MatchInfo> {
let body = self
@ -180,4 +163,13 @@ impl UgcClient {
.await?;
self.match_page_parser.parse(&body)
}
pub async fn transactions(&self, format: GameMode) -> Result<Vec<Transaction>> {
let link = format!(
"https://www.ugcleague.com/rostertransactions_tf2{}_all.cfm",
format.letter()
);
let body = self.client.get(link).send().await?.text().await?;
self.transaction_parser.parse(&body)
}
}

View file

@ -12,6 +12,7 @@ mod team;
mod team_lookup;
mod team_matches;
mod team_roster_history;
mod transactions;
pub use match_page::*;
pub use player::*;
@ -21,6 +22,7 @@ pub use team::*;
pub use team_lookup::*;
pub use team_matches::*;
pub use team_roster_history::*;
pub use transactions::*;
pub trait Parser {
type Output;
@ -28,8 +30,9 @@ pub trait Parser {
}
trait ElementExt<'a> {
fn first_text(&'a self) -> Option<&'a str>;
fn nth_text(&'a self, n: usize) -> Option<&'a str>;
fn first_text(&self) -> Option<&'a str>;
fn nth_text(&self, n: usize) -> Option<&'a str>;
fn last_text(&self) -> Option<&'a str>;
}
impl<'a> ElementExt<'a> for ElementRef<'a> {
@ -42,6 +45,9 @@ impl<'a> ElementExt<'a> for ElementRef<'a> {
.nth(n - 1)
.map(str::trim)
}
fn last_text(&self) -> Option<&'a str> {
self.text().map(str::trim).filter(|s| !s.is_empty()).last()
}
}
fn select_text<'a>(el: ElementRef<'a>, selector: &Selector) -> Option<&'a str> {

View file

@ -0,0 +1,96 @@
use super::Parser;
use crate::data::{TeamRef, Transaction};
use crate::parser::{
select_last_text, select_text, steam_id_from_link, team_id_from_link, ElementExt,
};
use crate::{ParseError, Result};
use scraper::{Html, Selector};
const SELECTOR_TRANSACTION_ROW: &str = "table.table.table-condensed.table-striped tr";
const SELECTOR_TRANSACTION_PLAYER_LINK: &str = "a[href^=\"players_page\"][title^=\"Roster\"]";
const SELECTOR_TRANSACTION_ACTION: &str = "td:nth-child(4) span b";
const SELECTOR_TRANSACTION_TEAM_LINK: &str = "a[href^=\"team_page\"]";
const SELECTOR_TRANSACTION_TEAM_NAME: &str = "td:nth-child(5)";
pub struct TransactionParser {
selector_row: Selector,
selector_player: Selector,
selector_action: Selector,
selector_team_link: Selector,
selector_team_name: Selector,
}
impl Default for TransactionParser {
fn default() -> Self {
Self::new()
}
}
impl TransactionParser {
pub fn new() -> Self {
TransactionParser {
selector_row: Selector::parse(SELECTOR_TRANSACTION_ROW).unwrap(),
selector_player: Selector::parse(SELECTOR_TRANSACTION_PLAYER_LINK).unwrap(),
selector_action: Selector::parse(SELECTOR_TRANSACTION_ACTION).unwrap(),
selector_team_link: Selector::parse(SELECTOR_TRANSACTION_TEAM_LINK).unwrap(),
selector_team_name: Selector::parse(SELECTOR_TRANSACTION_TEAM_NAME).unwrap(),
}
}
}
impl Parser for TransactionParser {
type Output = Vec<Transaction>;
fn parse(&self, document: &str) -> Result<Self::Output> {
let document = Html::parse_document(document);
document
.select(&self.selector_row)
.filter(|row| row.select(&self.selector_player).next().is_some())
.map(|row| {
let player_link = row.select(&self.selector_player).next().ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_TRANSACTION_PLAYER_LINK,
role: "player link",
},
)?;
let name = player_link.first_text().ok_or(ParseError::EmptyText {
selector: SELECTOR_TRANSACTION_PLAYER_LINK,
role: "player name",
})?;
let steam_id = steam_id_from_link(player_link.attr("href").unwrap_or_default())?;
let action = select_text(row, &self.selector_action)
.ok_or(ParseError::ElementNotFound {
selector: SELECTOR_TRANSACTION_ACTION,
role: "transaction action",
})?
.parse()?;
let team_link = row.select(&self.selector_team_link).next().ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_TRANSACTION_TEAM_LINK,
role: "team link",
},
)?;
let team_id = team_id_from_link(team_link.attr("href").unwrap_or_default())?;
let team_name = select_last_text(row, &self.selector_team_name).ok_or(
ParseError::EmptyText {
selector: SELECTOR_TRANSACTION_TEAM_LINK,
role: "team link",
},
)?;
Ok(Transaction {
name: name.to_string(),
steam_id,
action,
team: TeamRef {
id: team_id,
name: team_name.to_string(),
},
})
})
.collect()
}
}

File diff suppressed because it is too large Load diff

View file

@ -2,7 +2,7 @@ use insta::assert_json_snapshot;
use std::fs::read_to_string;
use ugc_scraper::parser::{
MatchPageParser, Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser,
TeamMatchesParser, TeamParser, TeamRosterHistoryParser,
TeamMatchesParser, TeamParser, TeamRosterHistoryParser, TransactionParser,
};
#[test]
@ -100,3 +100,11 @@ fn test_parse_match_html() {
let parsed = parser.parse(&body).unwrap();
assert_json_snapshot!(parsed);
}
#[test]
fn test_parse_transaction_html() {
let body = read_to_string("tests/data/transactions_4v4.html").unwrap();
let parser = TransactionParser::new();
let parsed = parser.parse(&body).unwrap();
assert_json_snapshot!(parsed);
}

View file

@ -0,0 +1,69 @@
---
source: tests/snapshot.rs
expression: parsed
---
[
{
"name": "L9 ivan",
"steam_id": 76561199017754044,
"action": "Joined",
"team": {
"name": "1. e4 e5 2. Ke2 Ke7",
"id": 33233
}
},
{
"name": "Rhythm",
"steam_id": 76561198062801366,
"action": "Joined",
"team": {
"name": "#FreeTheBlackHeavy",
"id": 33011
}
},
{
"name": "G Punish",
"steam_id": 76561198207987376,
"action": "Left",
"team": {
"name": "1. e4 e5 2. Ke2 Ke7",
"id": 33233
}
},
{
"name": "hellboy",
"steam_id": 76561199089196165,
"action": "Joined",
"team": {
"name": "1. e4 e5 2. Ke2 Ke7",
"id": 33233
}
},
{
"name": "realhedgehog2082",
"steam_id": 76561198168931330,
"action": "Left",
"team": {
"name": "1. e4 e5 2. Ke2 Ke7",
"id": 33233
}
},
{
"name": "Rhythm",
"steam_id": 76561198062801366,
"action": "Left",
"team": {
"name": "el gato",
"id": 33014
}
},
{
"name": "Pope Sonder",
"steam_id": 76561198214202059,
"action": "Left",
"team": {
"name": "Gaelic Gladiators",
"id": 29916
}
}
]