mirror of
https://codeberg.org/icewind/ugc-scaper.git
synced 2026-06-03 10:14:11 +02:00
add transactions
This commit is contained in:
parent
0cada6847a
commit
bd2cd8afe6
9 changed files with 2907 additions and 32 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
|
@ -1540,7 +1540,7 @@ checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ugc-scraper"
|
name = "ugc-scraper"
|
||||||
version = "0.1.1"
|
version = "0.2.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"main_error",
|
"main_error",
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "ugc-scraper"
|
name = "ugc-scraper"
|
||||||
version = "0.1.1"
|
version = "0.2.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
rust-version = "1.67.0"
|
rust-version = "1.67.0"
|
||||||
description = "Scraper for ugcleague.com"
|
description = "Scraper for ugcleague.com"
|
||||||
|
|
|
||||||
65
src/data.rs
65
src/data.rs
|
|
@ -1,3 +1,5 @@
|
||||||
|
use crate::ParseError;
|
||||||
|
use std::str::FromStr;
|
||||||
pub use steamid_ng::SteamID;
|
pub use steamid_ng::SteamID;
|
||||||
use time::{Date, OffsetDateTime};
|
use time::{Date, OffsetDateTime};
|
||||||
|
|
||||||
|
|
@ -200,3 +202,66 @@ pub struct MatchInfo {
|
||||||
pub score_home: u8,
|
pub score_home: u8,
|
||||||
pub score_away: u8,
|
pub score_away: u8,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub enum GameMode {
|
||||||
|
Highlander,
|
||||||
|
Sixes,
|
||||||
|
Fours,
|
||||||
|
Ultiduo,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FromStr for GameMode {
|
||||||
|
type Err = ();
|
||||||
|
|
||||||
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
|
match s {
|
||||||
|
"9v9" => Ok(GameMode::Highlander),
|
||||||
|
"6v6" => Ok(GameMode::Sixes),
|
||||||
|
"4v4" => Ok(GameMode::Fours),
|
||||||
|
"2v2" => Ok(GameMode::Ultiduo),
|
||||||
|
_ => Err(()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GameMode {
|
||||||
|
pub fn letter(&self) -> char {
|
||||||
|
match self {
|
||||||
|
GameMode::Highlander => 'h',
|
||||||
|
GameMode::Sixes => '6',
|
||||||
|
GameMode::Fours => '4',
|
||||||
|
GameMode::Ultiduo => '2',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||||
|
pub struct Transaction {
|
||||||
|
pub name: String,
|
||||||
|
pub steam_id: SteamID,
|
||||||
|
pub action: TranactionAction,
|
||||||
|
pub team: TeamRef,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||||
|
pub enum TranactionAction {
|
||||||
|
Joined,
|
||||||
|
Left,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FromStr for TranactionAction {
|
||||||
|
type Err = ParseError;
|
||||||
|
|
||||||
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
|
match s {
|
||||||
|
"Joined" => Ok(TranactionAction::Joined),
|
||||||
|
"Left" => Ok(TranactionAction::Left),
|
||||||
|
_ => Err(ParseError::InvalidText {
|
||||||
|
role: "transaction action",
|
||||||
|
text: s.to_string(),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
46
src/lib.rs
46
src/lib.rs
|
|
@ -4,11 +4,12 @@ mod error;
|
||||||
pub mod parser;
|
pub mod parser;
|
||||||
|
|
||||||
use crate::data::{
|
use crate::data::{
|
||||||
MatchInfo, MembershipHistory, Player, RosterHistory, Seasons, Team, TeamRef, TeamSeason,
|
GameMode, MatchInfo, MembershipHistory, Player, RosterHistory, Seasons, Team, TeamRef,
|
||||||
|
TeamSeason, Transaction,
|
||||||
};
|
};
|
||||||
use crate::parser::{
|
use crate::parser::{
|
||||||
MatchPageParser, Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser,
|
MatchPageParser, Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser,
|
||||||
TeamMatchesParser, TeamParser, TeamRosterHistoryParser,
|
TeamMatchesParser, TeamParser, TeamRosterHistoryParser, TransactionParser,
|
||||||
};
|
};
|
||||||
pub use error::*;
|
pub use error::*;
|
||||||
use reqwest::redirect::Policy;
|
use reqwest::redirect::Policy;
|
||||||
|
|
@ -28,6 +29,7 @@ pub struct UgcClient {
|
||||||
seasons_parser: SeasonsParser,
|
seasons_parser: SeasonsParser,
|
||||||
team_lookup_parser: TeamLookupParser,
|
team_lookup_parser: TeamLookupParser,
|
||||||
match_page_parser: MatchPageParser,
|
match_page_parser: MatchPageParser,
|
||||||
|
transaction_parser: TransactionParser,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// "API client" for ugc by scraping the website
|
/// "API client" for ugc by scraping the website
|
||||||
|
|
@ -43,6 +45,7 @@ impl UgcClient {
|
||||||
seasons_parser: SeasonsParser::new(),
|
seasons_parser: SeasonsParser::new(),
|
||||||
team_lookup_parser: TeamLookupParser::new(),
|
team_lookup_parser: TeamLookupParser::new(),
|
||||||
match_page_parser: MatchPageParser::new(),
|
match_page_parser: MatchPageParser::new(),
|
||||||
|
transaction_parser: TransactionParser::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -137,35 +140,15 @@ impl UgcClient {
|
||||||
self.seasons_parser.parse(&body)
|
self.seasons_parser.parse(&body)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn teams(&self, link: &str) -> Result<Vec<TeamRef>> {
|
pub async fn teams(&self, format: GameMode) -> Result<Vec<TeamRef>> {
|
||||||
|
let link = format!(
|
||||||
|
"https://www.ugcleague.com/team_lookup_tf2{}.cfm",
|
||||||
|
format.letter()
|
||||||
|
);
|
||||||
let body = self.client.get(link).send().await?.text().await?;
|
let body = self.client.get(link).send().await?.text().await?;
|
||||||
self.team_lookup_parser.parse(&body)
|
self.team_lookup_parser.parse(&body)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get a list of all 9v9 teams
|
|
||||||
pub async fn teams_9v9(&self) -> Result<Vec<TeamRef>> {
|
|
||||||
self.teams("https://www.ugcleague.com/team_lookup_tf2h.cfm")
|
|
||||||
.await
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get a list of all 6v6 teams
|
|
||||||
pub async fn teams_6v6(&self) -> Result<Vec<TeamRef>> {
|
|
||||||
self.teams("https://www.ugcleague.com/team_lookup_tf26.cfm")
|
|
||||||
.await
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get a list of all 4v4 teams
|
|
||||||
pub async fn teams_4v4(&self) -> Result<Vec<TeamRef>> {
|
|
||||||
self.teams("https://www.ugcleague.com/team_lookup_tf24.cfm")
|
|
||||||
.await
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get a list of all 2v2 teams
|
|
||||||
pub async fn teams_2v2(&self) -> Result<Vec<TeamRef>> {
|
|
||||||
self.teams("https://www.ugcleague.com/team_lookup_tf22.cfm")
|
|
||||||
.await
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get match page info
|
/// Get match page info
|
||||||
pub async fn match_info(&self, id: u32) -> Result<MatchInfo> {
|
pub async fn match_info(&self, id: u32) -> Result<MatchInfo> {
|
||||||
let body = self
|
let body = self
|
||||||
|
|
@ -180,4 +163,13 @@ impl UgcClient {
|
||||||
.await?;
|
.await?;
|
||||||
self.match_page_parser.parse(&body)
|
self.match_page_parser.parse(&body)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn transactions(&self, format: GameMode) -> Result<Vec<Transaction>> {
|
||||||
|
let link = format!(
|
||||||
|
"https://www.ugcleague.com/rostertransactions_tf2{}_all.cfm",
|
||||||
|
format.letter()
|
||||||
|
);
|
||||||
|
let body = self.client.get(link).send().await?.text().await?;
|
||||||
|
self.transaction_parser.parse(&body)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ mod team;
|
||||||
mod team_lookup;
|
mod team_lookup;
|
||||||
mod team_matches;
|
mod team_matches;
|
||||||
mod team_roster_history;
|
mod team_roster_history;
|
||||||
|
mod transactions;
|
||||||
|
|
||||||
pub use match_page::*;
|
pub use match_page::*;
|
||||||
pub use player::*;
|
pub use player::*;
|
||||||
|
|
@ -21,6 +22,7 @@ pub use team::*;
|
||||||
pub use team_lookup::*;
|
pub use team_lookup::*;
|
||||||
pub use team_matches::*;
|
pub use team_matches::*;
|
||||||
pub use team_roster_history::*;
|
pub use team_roster_history::*;
|
||||||
|
pub use transactions::*;
|
||||||
|
|
||||||
pub trait Parser {
|
pub trait Parser {
|
||||||
type Output;
|
type Output;
|
||||||
|
|
@ -28,8 +30,9 @@ pub trait Parser {
|
||||||
}
|
}
|
||||||
|
|
||||||
trait ElementExt<'a> {
|
trait ElementExt<'a> {
|
||||||
fn first_text(&'a self) -> Option<&'a str>;
|
fn first_text(&self) -> Option<&'a str>;
|
||||||
fn nth_text(&'a self, n: usize) -> Option<&'a str>;
|
fn nth_text(&self, n: usize) -> Option<&'a str>;
|
||||||
|
fn last_text(&self) -> Option<&'a str>;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> ElementExt<'a> for ElementRef<'a> {
|
impl<'a> ElementExt<'a> for ElementRef<'a> {
|
||||||
|
|
@ -42,6 +45,9 @@ impl<'a> ElementExt<'a> for ElementRef<'a> {
|
||||||
.nth(n - 1)
|
.nth(n - 1)
|
||||||
.map(str::trim)
|
.map(str::trim)
|
||||||
}
|
}
|
||||||
|
fn last_text(&self) -> Option<&'a str> {
|
||||||
|
self.text().map(str::trim).filter(|s| !s.is_empty()).last()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn select_text<'a>(el: ElementRef<'a>, selector: &Selector) -> Option<&'a str> {
|
fn select_text<'a>(el: ElementRef<'a>, selector: &Selector) -> Option<&'a str> {
|
||||||
|
|
|
||||||
96
src/parser/transactions.rs
Normal file
96
src/parser/transactions.rs
Normal file
|
|
@ -0,0 +1,96 @@
|
||||||
|
use super::Parser;
|
||||||
|
use crate::data::{TeamRef, Transaction};
|
||||||
|
use crate::parser::{
|
||||||
|
select_last_text, select_text, steam_id_from_link, team_id_from_link, ElementExt,
|
||||||
|
};
|
||||||
|
use crate::{ParseError, Result};
|
||||||
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
|
const SELECTOR_TRANSACTION_ROW: &str = "table.table.table-condensed.table-striped tr";
|
||||||
|
const SELECTOR_TRANSACTION_PLAYER_LINK: &str = "a[href^=\"players_page\"][title^=\"Roster\"]";
|
||||||
|
const SELECTOR_TRANSACTION_ACTION: &str = "td:nth-child(4) span b";
|
||||||
|
const SELECTOR_TRANSACTION_TEAM_LINK: &str = "a[href^=\"team_page\"]";
|
||||||
|
const SELECTOR_TRANSACTION_TEAM_NAME: &str = "td:nth-child(5)";
|
||||||
|
|
||||||
|
pub struct TransactionParser {
|
||||||
|
selector_row: Selector,
|
||||||
|
selector_player: Selector,
|
||||||
|
selector_action: Selector,
|
||||||
|
selector_team_link: Selector,
|
||||||
|
selector_team_name: Selector,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for TransactionParser {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TransactionParser {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
TransactionParser {
|
||||||
|
selector_row: Selector::parse(SELECTOR_TRANSACTION_ROW).unwrap(),
|
||||||
|
selector_player: Selector::parse(SELECTOR_TRANSACTION_PLAYER_LINK).unwrap(),
|
||||||
|
selector_action: Selector::parse(SELECTOR_TRANSACTION_ACTION).unwrap(),
|
||||||
|
selector_team_link: Selector::parse(SELECTOR_TRANSACTION_TEAM_LINK).unwrap(),
|
||||||
|
selector_team_name: Selector::parse(SELECTOR_TRANSACTION_TEAM_NAME).unwrap(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Parser for TransactionParser {
|
||||||
|
type Output = Vec<Transaction>;
|
||||||
|
|
||||||
|
fn parse(&self, document: &str) -> Result<Self::Output> {
|
||||||
|
let document = Html::parse_document(document);
|
||||||
|
|
||||||
|
document
|
||||||
|
.select(&self.selector_row)
|
||||||
|
.filter(|row| row.select(&self.selector_player).next().is_some())
|
||||||
|
.map(|row| {
|
||||||
|
let player_link = row.select(&self.selector_player).next().ok_or(
|
||||||
|
ParseError::ElementNotFound {
|
||||||
|
selector: SELECTOR_TRANSACTION_PLAYER_LINK,
|
||||||
|
role: "player link",
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
let name = player_link.first_text().ok_or(ParseError::EmptyText {
|
||||||
|
selector: SELECTOR_TRANSACTION_PLAYER_LINK,
|
||||||
|
role: "player name",
|
||||||
|
})?;
|
||||||
|
let steam_id = steam_id_from_link(player_link.attr("href").unwrap_or_default())?;
|
||||||
|
|
||||||
|
let action = select_text(row, &self.selector_action)
|
||||||
|
.ok_or(ParseError::ElementNotFound {
|
||||||
|
selector: SELECTOR_TRANSACTION_ACTION,
|
||||||
|
role: "transaction action",
|
||||||
|
})?
|
||||||
|
.parse()?;
|
||||||
|
|
||||||
|
let team_link = row.select(&self.selector_team_link).next().ok_or(
|
||||||
|
ParseError::ElementNotFound {
|
||||||
|
selector: SELECTOR_TRANSACTION_TEAM_LINK,
|
||||||
|
role: "team link",
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
let team_id = team_id_from_link(team_link.attr("href").unwrap_or_default())?;
|
||||||
|
let team_name = select_last_text(row, &self.selector_team_name).ok_or(
|
||||||
|
ParseError::EmptyText {
|
||||||
|
selector: SELECTOR_TRANSACTION_TEAM_LINK,
|
||||||
|
role: "team link",
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Ok(Transaction {
|
||||||
|
name: name.to_string(),
|
||||||
|
steam_id,
|
||||||
|
action,
|
||||||
|
team: TeamRef {
|
||||||
|
id: team_id,
|
||||||
|
name: team_name.to_string(),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
}
|
||||||
2639
tests/data/transactions_4v4.html
Normal file
2639
tests/data/transactions_4v4.html
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -2,7 +2,7 @@ use insta::assert_json_snapshot;
|
||||||
use std::fs::read_to_string;
|
use std::fs::read_to_string;
|
||||||
use ugc_scraper::parser::{
|
use ugc_scraper::parser::{
|
||||||
MatchPageParser, Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser,
|
MatchPageParser, Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser,
|
||||||
TeamMatchesParser, TeamParser, TeamRosterHistoryParser,
|
TeamMatchesParser, TeamParser, TeamRosterHistoryParser, TransactionParser,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
@ -100,3 +100,11 @@ fn test_parse_match_html() {
|
||||||
let parsed = parser.parse(&body).unwrap();
|
let parsed = parser.parse(&body).unwrap();
|
||||||
assert_json_snapshot!(parsed);
|
assert_json_snapshot!(parsed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_transaction_html() {
|
||||||
|
let body = read_to_string("tests/data/transactions_4v4.html").unwrap();
|
||||||
|
let parser = TransactionParser::new();
|
||||||
|
let parsed = parser.parse(&body).unwrap();
|
||||||
|
assert_json_snapshot!(parsed);
|
||||||
|
}
|
||||||
|
|
|
||||||
69
tests/snapshots/snapshot__parse_transaction_html.snap
Normal file
69
tests/snapshots/snapshot__parse_transaction_html.snap
Normal file
|
|
@ -0,0 +1,69 @@
|
||||||
|
---
|
||||||
|
source: tests/snapshot.rs
|
||||||
|
expression: parsed
|
||||||
|
---
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "L9 ivan",
|
||||||
|
"steam_id": 76561199017754044,
|
||||||
|
"action": "Joined",
|
||||||
|
"team": {
|
||||||
|
"name": "1. e4 e5 2. Ke2 Ke7",
|
||||||
|
"id": 33233
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Rhythm",
|
||||||
|
"steam_id": 76561198062801366,
|
||||||
|
"action": "Joined",
|
||||||
|
"team": {
|
||||||
|
"name": "#FreeTheBlackHeavy",
|
||||||
|
"id": 33011
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "G Punish",
|
||||||
|
"steam_id": 76561198207987376,
|
||||||
|
"action": "Left",
|
||||||
|
"team": {
|
||||||
|
"name": "1. e4 e5 2. Ke2 Ke7",
|
||||||
|
"id": 33233
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "hellboy",
|
||||||
|
"steam_id": 76561199089196165,
|
||||||
|
"action": "Joined",
|
||||||
|
"team": {
|
||||||
|
"name": "1. e4 e5 2. Ke2 Ke7",
|
||||||
|
"id": 33233
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "realhedgehog2082",
|
||||||
|
"steam_id": 76561198168931330,
|
||||||
|
"action": "Left",
|
||||||
|
"team": {
|
||||||
|
"name": "1. e4 e5 2. Ke2 Ke7",
|
||||||
|
"id": 33233
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Rhythm",
|
||||||
|
"steam_id": 76561198062801366,
|
||||||
|
"action": "Left",
|
||||||
|
"team": {
|
||||||
|
"name": "el gato",
|
||||||
|
"id": 33014
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Pope Sonder",
|
||||||
|
"steam_id": 76561198214202059,
|
||||||
|
"action": "Left",
|
||||||
|
"team": {
|
||||||
|
"name": "Gaelic Gladiators",
|
||||||
|
"id": 29916
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
Loading…
Add table
Add a link
Reference in a new issue