This commit is contained in:
Robin Appelman 2026-03-26 23:44:23 +01:00
commit fb383652c8
22 changed files with 3161 additions and 3071 deletions

1897
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,20 +1,20 @@
[package]
name = "ugc-scraper"
version = "0.5.0"
edition = "2021"
rust-version = "1.71.1"
version = "0.6.0"
edition = "2024"
rust-version = "1.88.0"
description = "Scraper for ugcleague.com"
license = "MIT OR Apache-2.0"
homepage = "https://codeberg.org/icewind/ugc-scaper"
[dependencies]
tokio = "1.44.2"
reqwest = "0.12.15"
scraper = "0.23.1"
reqwest = "0.13.2"
scraper = "0.26.0"
thiserror = "2.0.3"
time = { version = "0.3.41", features = ["parsing", "macros"] }
steamid-ng = "1.0.0"
ugc-scraper-types = { version = "0.2.0", path = "./types" }
steamid-ng = "3.0.0"
ugc-scraper-types = { version = "0.3.0", path = "./types" }
regex = "1.11.1"
tracing = "0.1.41"

View file

@ -10,7 +10,7 @@ use ugc_scraper::{Result, SteamID, UgcClient};
#[tokio::main]
async fn main() -> Result<()> {
let client = UgcClient::new();
let id = SteamID::from(76561198024494988);
let id = SteamID::from_steam64(76561198024494988).unwrap();
let player = client.player(id).await?;
println!("{}", player.name);
for team in player.teams {

1692
api-server/Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,15 +1,16 @@
[package]
name = "ugc-api-server"
version = "0.1.0"
edition = "2021"
edition = "2024"
rust-version = "1.88.0"
[dependencies]
tokio = { version = "1.44.2", features = ["macros", "rt-multi-thread", "rt", "signal"] }
main_error = "0.1.2"
ugc-scraper = { version = "*", path = ".." }
#ugc-scraper = "0.5.0"
#ugc-scraper = "0.6.0"
axum = "0.8.3"
steamid-ng = "1.0.0"
steamid-ng = "3.0.0"
thiserror = "2.0.12"
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }

View file

@ -7,7 +7,7 @@ use std::env::var;
use std::net::Ipv4Addr;
use std::str::FromStr;
use std::sync::Arc;
use steamid_ng::{SteamID, SteamIDError};
use steamid_ng::{SteamID, SteamIDParseError};
use thiserror::Error;
use tokio::net::TcpListener;
use tokio::signal;
@ -24,7 +24,7 @@ struct AppState {
#[derive(Debug, Error)]
enum ApiError {
#[error(transparent)]
SteamId(#[from] SteamIDError),
SteamId(#[from] SteamIDParseError),
#[error(transparent)]
Scrape(#[from] ScrapeError),
#[error("malformed request")]
@ -92,7 +92,7 @@ async fn player(
Path(id): Path<String>,
State(state): State<AppState>,
) -> Result<impl IntoResponse, ApiError> {
let steam_id = SteamID::try_from(id.as_str())?;
let steam_id = SteamID::from_str(id.as_str())?;
debug!(player = steam_id.steam3(), "requesting player");
let response = state.client.player(steam_id).await?;
Ok(Json(response))
@ -103,7 +103,7 @@ async fn player_history(
Path(id): Path<String>,
State(state): State<AppState>,
) -> Result<impl IntoResponse, ApiError> {
let steam_id = SteamID::try_from(id.as_str())?;
let steam_id = SteamID::from_str(id.as_str())?;
debug!(player = steam_id.steam3(), "requesting player history");
let response = state.client.player_team_history(steam_id).await?;
Ok(Json(response))

1761
archiver/Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,18 +1,18 @@
[package]
name = "archiver"
version = "0.1.0"
edition = "2021"
edition = "2024"
rust-version = "1.88.0"
[dependencies]
# ugc-scraper-types = "0.1.2"
ugc-scraper-types = { version = "0.2.0", path = "../types", features = ["sqlx"] }
reqwest = { version = "0.12.15", features = ["json"] }
ugc-scraper-types = { version = "0.3.0", path = "../types", features = ["sqlx"] }
reqwest = { version = "0.13.2", features = ["json"] }
clap = { version = "4.5.35", features = ["derive"] }
tracing = "0.1.41"
tracing-subscriber = "0.3.19"
serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.140"
toml = "0.8.20"
toml = "1.1.0"
secretfile = "0.1.0"
tokio = { version = "1.44.2", features = ["macros", "rt-multi-thread"] }
sqlx = { version = "0.8.3", features = ["postgres", "runtime-tokio", "time"] }

View file

@ -1,19 +1,20 @@
use crate::steam_id_from_u64;
use futures_util::stream::TryStreamExt;
use serde::{Serialize, Serializer};
use sqlx::postgres::PgConnectOptions;
use sqlx::{query, query_as, Error, Executor, PgPool, Postgres};
use sqlx::{Error, Executor, PgPool, Postgres, query, query_as};
use std::ops::Range;
use std::str::FromStr;
use thiserror::Error;
use time::Date;
use time::format_description::FormatItem;
use time::macros::format_description;
use time::parsing::Parsed;
use time::Date;
use tokio_stream::Stream;
use tracing::{debug, error, warn};
use ugc_scraper_types::{
serde_steam_id_as_string, Class, GameMode, MapHistory, MatchInfo, Membership, MembershipRole,
NameChange, Player, Record, Region, RosterHistory, SteamID, Team, TeamRef, TeamSeason,
Class, GameMode, MapHistory, MatchInfo, Membership, MembershipRole, NameChange, Player, Record,
Region, RosterHistory, SteamID, Team, TeamRef, TeamSeason, serde_steam_id_as_string,
};
const MATCH_DATE_FORMAT: &[FormatItem<'static>] = format_description!(
@ -251,7 +252,7 @@ impl Archive {
description: "getting player steam ids",
error,
})
.map_ok(|map| (map.steam_id as u64).into())
.map_ok(|map| steam_id_from_u64(map.steam_id as u64))
}
pub async fn get_max_player(&self) -> Result<SteamID, ArchiveError> {
@ -264,9 +265,9 @@ impl Archive {
error,
})?
{
Ok((row.max as u64).into())
Ok(steam_id_from_u64(row.max as u64))
} else {
Ok(0.into())
Ok(steam_id_from_u64(0))
}
}
@ -850,7 +851,7 @@ pub struct PlayerData {
impl From<PlayerDataRaw> for PlayerData {
fn from(player: PlayerDataRaw) -> Self {
PlayerData {
steam_id: (player.steam_id as u64).into(),
steam_id: steam_id_from_u64(player.steam_id as u64),
name: player.name,
avatar: player.avatar,
country: player.country,
@ -897,7 +898,7 @@ impl From<MembershipDataRaw> for MembershipData {
fn from(membership: MembershipDataRaw) -> Self {
MembershipData {
team_id: membership.team_id,
steam_id: (membership.steam_id as u64).into(),
steam_id: steam_id_from_u64(membership.steam_id as u64),
role: membership.role,
joined: membership.joined,
left: membership.left,
@ -930,7 +931,9 @@ fn parse_match_date(date: &str, year: i32) -> Date {
}
fn parse_old_match_date(date: &str) -> Result<Date, time::Error> {
const MATCH_DATE_FORMAT_OLD: &[FormatItem<'static>] = format_description!("[weekday case_sensitive:false repr:short], [month padding:none]/[day padding:none]/[year repr:last_two]");
const MATCH_DATE_FORMAT_OLD: &[FormatItem<'static>] = format_description!(
"[weekday case_sensitive:false repr:short], [month padding:none]/[day padding:none]/[year repr:last_two]"
);
let mut parsed = Parsed::new();
parsed.parse_items(date.as_bytes(), MATCH_DATE_FORMAT_OLD)?;

View file

@ -17,8 +17,8 @@ use std::str::FromStr;
use std::time::Duration;
use tokio::time::sleep;
use tokio_stream::StreamExt;
use tracing::{error, info, instrument, span, warn, Level};
use ugc_scraper_types::GameMode;
use tracing::{Level, error, info, instrument, span, warn};
use ugc_scraper_types::{GameMode, SteamID};
#[derive(Debug, Parser)]
struct Args {
@ -258,18 +258,18 @@ async fn fixup_matches(client: &UgcClient, archive: &Archive) -> MainResult {
for season in matches.seasons.iter() {
for season_match in season.matches.iter() {
if let Some(match_id) = season_match.result.match_id() {
if !archive.has_match(match_id).await? {
warn!(match_id, "match not archived yet");
sleep(Duration::from_millis(500)).await;
if archive_match(client, archive, match_id).await.is_err() {
let match_info = season_match
.match_info(&matches.team, season.format)
.expect("failed to build match info");
assert_eq!(format, match_info.format);
info!("reconstructed match");
archive.store_match(match_id as i32, match_info).await?;
}
if let Some(match_id) = season_match.result.match_id()
&& !archive.has_match(match_id).await?
{
warn!(match_id, "match not archived yet");
sleep(Duration::from_millis(500)).await;
if archive_match(client, archive, match_id).await.is_err() {
let match_info = season_match
.match_info(&matches.team, season.format)
.expect("failed to build match info");
assert_eq!(format, match_info.format);
info!("reconstructed match");
archive.store_match(match_id as i32, match_info).await?;
}
}
}
@ -388,3 +388,7 @@ async fn dump_data(archive: &Archive, data: Data, output: &str) -> MainResult {
writeln!(&mut output, "\n]")?;
Ok(())
}
pub fn steam_id_from_u64(id: u64) -> SteamID {
SteamID::from_steam64(id).unwrap_or_else(|_| SteamID::from_steam64(0).unwrap())
}

View file

@ -1,5 +1,6 @@
use main_error::MainResult;
use std::env::args;
use std::str::FromStr;
use steamid_ng::SteamID;
use ugc_scraper::UgcClient;
@ -7,7 +8,7 @@ use ugc_scraper::UgcClient;
async fn main() -> MainResult {
let client = UgcClient::new();
let id = args().nth(1).expect("no steam id provided");
let id = SteamID::try_from(id.as_str()).expect("invalid steam id provided");
let id = SteamID::from_str(id.as_str()).expect("invalid steam id provided");
let player = client.player(id).await?;
println!("{}", player.name);
for team in player.teams {

View file

@ -3,7 +3,7 @@ use ugc_scraper::{Result, SteamID, UgcClient};
#[tokio::main]
async fn main() -> Result<()> {
let client = UgcClient::new();
let id = SteamID::from(76561198024494988);
let id = SteamID::from_steam64(76561198024494988).unwrap();
let player = client.player(id).await?;
println!("{}", player.name);
for team in player.teams {

View file

@ -6,8 +6,8 @@
}: let
inherit (lib.sources) sourceByRegex;
inherit (builtins) fromTOML readFile;
src = sourceByRegex ./. ["Cargo.*" "((types|archiver|)/?(src|.sqlx)?)(/.*)?" "README.md"];
version = (fromTOML (readFile archiver/Cargo.toml)).package.version;
src = sourceByRegex ../. ["Cargo.*" "((types|archiver|)/?(src|.sqlx)?)(/.*)?" "README.md"];
version = (fromTOML (readFile ../archiver/Cargo.toml)).package.version;
in
rustPlatform.buildRustPackage rec {
pname = "ugc-api-archiver";
@ -28,6 +28,6 @@ in
doCheck = false;
cargoLock = {
lockFile = ./archiver/Cargo.lock;
lockFile = ../archiver/Cargo.lock;
};
}

View file

@ -2,7 +2,7 @@ use super::Parser;
use crate::data::{
CurrentSeasonMap, CurrentSeasonMapList, MapHistory, PreviousSeasonMap, PreviousSeasonMapList,
};
use crate::parser::{select_text, ElementExt};
use crate::parser::{ElementExt, select_text};
use crate::{ParseError, Result};
use scraper::{Html, Selector};
use time::{Date, Month};
@ -141,31 +141,30 @@ impl Parser for MapHistoryParser {
.filter(|child| child.value().is_element())
.count()
== 3
&& let Some(season) = prev_season.as_mut()
{
if let Some(season) = prev_season.as_mut() {
let week = select_text(row, &self.selector_previous_week).ok_or(
let week = select_text(row, &self.selector_previous_week).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_PREVIOUS_WEEK,
role: "previous season week number",
},
)?;
if week != "Week" {
let week = week.parse().map_err(|_| ParseError::InvalidText {
role: "previous season week number",
text: week.to_string(),
})?;
let date = select_text(row, &self.selector_previous_date).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_PREVIOUS_WEEK,
selector: SELECTOR_PREVIOUS_DATE,
role: "previous season week number",
},
)?;
if week != "Week" {
let week = week.parse().map_err(|_| ParseError::InvalidText {
role: "previous season week number",
text: week.to_string(),
})?;
let date = select_text(row, &self.selector_previous_date).ok_or(
ParseError::ElementNotFound {
selector: SELECTOR_PREVIOUS_DATE,
role: "previous season week number",
},
)?;
let date = parse_date(date)?;
let map = select_text(row, &self.selector_previous_map)
.unwrap_or_default()
.to_string();
season.maps.push(PreviousSeasonMap { week, date, map })
}
let date = parse_date(date)?;
let map = select_text(row, &self.selector_previous_map)
.unwrap_or_default()
.to_string();
season.maps.push(PreviousSeasonMap { week, date, map })
}
}
}

View file

@ -35,12 +35,17 @@ pub trait Parser {
trait ElementExt<'a> {
fn first_text(&self) -> Option<&'a str>;
fn last_text(&self) -> Option<&'a str>;
}
impl<'a> ElementExt<'a> for ElementRef<'a> {
fn first_text(&self) -> Option<&'a str> {
self.text().map(str::trim).find(|s| !s.is_empty())
}
fn last_text(&self) -> Option<&'a str> {
self.text().map(str::trim).filter(|s| !s.is_empty()).last()
}
}
fn select_text<'a>(el: ElementRef<'a>, selector: &Selector) -> Option<&'a str> {
@ -103,7 +108,12 @@ fn steam_id_from_link(link: &str) -> Result<SteamID, ParseError> {
link: link.to_string(),
role: "user id",
})
.map(SteamID::from)
.and_then(|id| {
SteamID::from_steam64(id).map_err(|_| ParseError::InvalidLink {
link: link.to_string(),
role: "user id",
})
})
}
static WHITESPACE_REGEX: OnceLock<Regex> = OnceLock::new();

View file

@ -255,7 +255,8 @@ impl Parser for PlayerParser {
Ok(Player {
name,
avatar,
steam_id: SteamID::try_from(id.as_str()).unwrap_or_default(),
steam_id: SteamID::from_str(id.as_str())
.unwrap_or_else(|_| SteamID::from_steam64(0).unwrap()),
honors,
teams,
favorite_classes,

View file

@ -1,7 +1,7 @@
use super::{select_text_empty, whitespace_regex, ElementExt, Parser};
use super::{ElementExt, Parser, select_text_empty, whitespace_regex};
use crate::data::{Membership, NameChange, Record, Team};
use crate::parser::{
select_text, steam_id_from_link, DATE_FORMAT, MEMBER_DATE_ALT_FORMAT, MEMBER_DATE_FORMAT,
DATE_FORMAT, MEMBER_DATE_ALT_FORMAT, MEMBER_DATE_FORMAT, select_text, steam_id_from_link,
};
use crate::{ParseError, Result, ScrapeError};
use scraper::{Html, Selector};
@ -183,10 +183,10 @@ impl Parser for TeamParser {
let timezone = select_text(root, &self.selector_team_timezone).map(String::from);
if let Some(timezone) = timezone.as_deref() {
if region.is_none() {
region = Region::from_str(timezone).ok();
}
if let Some(timezone) = timezone.as_deref()
&& region.is_none()
{
region = Region::from_str(timezone).ok();
}
if region.is_none() {

View file

@ -48,7 +48,7 @@ impl Parser for TeamLookupParser {
role: "team link",
selector: SELECTOR_OPTION,
})?;
let text = option.first_text().ok_or(ParseError::EmptyText {
let text = option.last_text().ok_or(ParseError::EmptyText {
role: "team name",
selector: SELECTOR_OPTION,
})?;

View file

@ -3272,7 +3272,7 @@ expression: parsed
"id": 12121
},
{
"name": "- - Combat Wombats",
"name": "- Combat Wombats",
"id": 22319
},
{
@ -6692,7 +6692,7 @@ expression: parsed
"id": 9512
},
{
"name": "",
"name": " ",
"id": 9965
},
{

753
types/Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,14 +1,14 @@
[package]
name = "ugc-scraper-types"
version = "0.2.0"
edition = "2021"
rust-version = "1.71.1"
version = "0.3.0"
edition = "2024"
rust-version = "1.88.0"
description = "Scraper for ugcleague.com - data types"
license = "MIT OR Apache-2.0"
homepage = "https://codeberg.org/icewind/ugc-scaper"
[dependencies]
steamid-ng = "1.0.0"
steamid-ng = "3.0.0"
serde = { version = "1.0.215", features = ["derive"], optional = true }
time = { version = "0.3.36", features = ["parsing", "macros"] }
thiserror = "2.0.12"

View file

@ -1,7 +1,7 @@
use serde::de::Error;
use std::fmt::Display;
use std::str::FromStr;
pub use steamid_ng::SteamID;
pub use steamid_ng::{SteamID, SteamIDParseError};
use thiserror::Error;
use time::error::Parse;
use time::format_description::FormatItem;
@ -680,8 +680,13 @@ pub struct Week<'a> {
}
impl MapHistory {
pub fn weeks(&self, current_season_year: u16) -> impl Iterator<Item = Result<Week, Parse>> {
const CURRENT_DATE_FORMAT: &[FormatItem<'static>] = format_description!("[weekday case_sensitive:false repr:short], [month repr:short] [day padding:none] [year]");
pub fn weeks<'a>(
&'a self,
current_season_year: u16,
) -> impl Iterator<Item = Result<Week<'a>, Parse>> {
const CURRENT_DATE_FORMAT: &[FormatItem<'static>] = format_description!(
"[weekday case_sensitive:false repr:short], [month repr:short] [day padding:none] [year]"
);
let current_season = self.current.maps.iter().map(move |map| {
Ok(Week {
@ -743,7 +748,9 @@ pub struct PreviousSeasonMap {
#[cfg(feature = "serde")]
pub mod serde_steam_id_as_string {
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use serde::{de::Error, Deserialize, Deserializer, Serialize, Serializer};
use std::borrow::Cow;
use std::str::FromStr;
use steamid_ng::SteamID;
pub fn serialize<S: Serializer>(steam_id: &SteamID, serializer: S) -> Result<S::Ok, S::Error> {
@ -756,6 +763,7 @@ pub mod serde_steam_id_as_string {
where
D: Deserializer<'de>,
{
SteamID::deserialize(deserializer)
let s = <Cow<'de, str>>::deserialize(deserializer)?;
SteamID::from_str(&s).map_err(D::Error::custom)
}
}