archiver wip

This commit is contained in:
Robin Appelman 2025-04-11 20:20:51 +02:00
commit a660675932
13 changed files with 3466 additions and 0 deletions

3
archiver/.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
password
config.toml
.env

View file

@ -0,0 +1,18 @@
{
"db_name": "PostgreSQL",
"query": "SELECT id FROM matches ORDER BY id DESC LIMIT 1",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "id",
"type_info": "Int4"
}
],
"parameters": {
"Left": []
},
"nullable": [false]
},
"hash": "68839b3409d22689fd472dc5eb33e4381e9a621956f1c4ce90cda5d8872a4d6d"
}

View file

@ -0,0 +1,12 @@
{
"db_name": "PostgreSQL",
"query": "INSERT INTO matches (\n id, team_home, team_away, score_home, score_away, comment, comment_author\n ) VALUES ($1, $2, $3, $4, $5, $6, $7)",
"describe": {
"columns": [],
"parameters": {
"Left": ["Int4", "Int4", "Int4", "Int2", "Int2", "Varchar", "Varchar"]
},
"nullable": []
},
"hash": "9de8aebc65fc0ad8b1fe1580425020559a346d1d0bbb0f6683e38674f54593b2"
}

3020
archiver/Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

18
archiver/Cargo.toml Normal file
View file

@ -0,0 +1,18 @@
[package]
name = "archiver"
version = "0.1.0"
edition = "2021"
[dependencies]
ugc-scraper-types = "0.1.2"
reqwest = { version = "0.12.15", features = ["json"] }
clap = { version = "4.5.35", features = ["derive"] }
tracing = "0.1.41"
tracing-subscriber = "0.3.19"
serde = { version = "1.0.219", features = ["derive"] }
toml = "0.8.20"
secretfile = "0.1.0"
tokio = { version = "1.44.2", features = ["macros", "rt-multi-thread"] }
sqlx = { version = "0.8.3", features = ["postgres", "runtime-tokio"] }
thiserror = "2.0.12"
main_error = "0.1.2"

View file

@ -0,0 +1,19 @@
CREATE TABLE matches
(
id INTEGER NOT NULL,
team_home INTEGER NOT NULL,
team_away INTEGER NOT NULL,
score_home SMALLINT NOT NULL,
score_away SMALLINT NOT NULL,
comment VARCHAR,
comment_author VARCHAR
);
CREATE UNIQUE INDEX matches_id_idx
ON matches USING BTREE (id);
CREATE INDEX matches_home_idx
ON matches USING BTREE (team_home);
CREATE INDEX matches_away_idx
ON matches USING BTREE (team_away);

73
archiver/src/archive.rs Normal file
View file

@ -0,0 +1,73 @@
use sqlx::postgres::PgConnectOptions;
use sqlx::{query, Error, PgPool};
use std::str::FromStr;
use thiserror::Error;
use ugc_scraper_types::MatchInfo;
#[derive(Debug, Error)]
pub enum ArchiveError {
#[error("Invalid database {url}: {error:#}")]
InvalidDbUrl { url: String, error: Error },
#[error("Error while connecting to database {url}: {error:#}")]
Connect { url: String, error: sqlx::Error },
#[error("Error while running query for {description}: {error:#}")]
Query {
description: &'static str,
error: sqlx::Error,
},
}
pub struct Archive {
pool: PgPool,
}
impl Archive {
pub async fn new(url: &str, password: &str) -> Result<Archive, ArchiveError> {
let opt = PgConnectOptions::from_str(url)
.map_err(|error| ArchiveError::InvalidDbUrl {
url: url.into(),
error,
})?
.password(password);
let pool = PgPool::connect_with(opt)
.await
.map_err(|error| ArchiveError::Connect {
url: url.into(),
error,
})?;
Ok(Archive { pool })
}
pub async fn store_match(&self, id: u32, match_info: MatchInfo) -> Result<(), ArchiveError> {
query!(
"INSERT INTO matches (
id, team_home, team_away, score_home, score_away, comment, comment_author
) VALUES ($1, $2, $3, $4, $5, $6, $7)",
id as i32,
match_info.team_home.id as i32,
match_info.team_away.id as i32,
match_info.score_home as i16,
match_info.score_away as i16,
match_info.comment,
match_info.comment_author
)
.execute(&self.pool)
.await
.map_err(|error| ArchiveError::Query {
description: "inserting match",
error,
})?;
Ok(())
}
pub async fn get_last_match_id(&self) -> Result<Option<u32>, ArchiveError> {
Ok(query!("SELECT id FROM matches ORDER BY id DESC LIMIT 1")
.fetch_optional(&self.pool)
.await
.map_err(|error| ArchiveError::Query {
description: "getting latest match",
error,
})?
.map(|row| row.id as u32))
}
}

125
archiver/src/client.rs Normal file
View file

@ -0,0 +1,125 @@
use reqwest::{Client, ClientBuilder, Error, Response, StatusCode};
use serde::de::DeserializeOwned;
use thiserror::Error;
use ugc_scraper_types::{
GameMode, MapHistory, MatchInfo, MembershipHistory, Player, RosterHistory, Team,
TeamSeasonMatch, Transaction,
};
#[derive(Debug, Error)]
pub enum UgcClientError {
#[error("Error sending request to {endpoint:?}: {error:#}")]
Request { endpoint: Endpoint, error: Error },
#[error("Error parsing response from {endpoint:?}: {error:#}")]
Response { endpoint: Endpoint, error: Error },
#[error("{endpoint:?} not found")]
NotFound { endpoint: Endpoint },
}
#[derive(Debug)]
pub struct UgcClient {
client: Client,
api_url: String,
}
#[allow(dead_code)]
impl UgcClient {
pub fn new(api_url: String) -> Self {
let client = ClientBuilder::new()
.user_agent("UGC_ARCHIVER")
.build()
.expect("failed to build client");
Self { client, api_url }
}
async fn send_request<T: DeserializeOwned>(
&self,
endpoint: Endpoint,
) -> Result<T, UgcClientError> {
self.client
.get(endpoint.build_url(&self.api_url))
.send()
.await
.map_err(|error| UgcClientError::Request { endpoint, error })?
.check_not_found(endpoint)?
.json()
.await
.map_err(|error| UgcClientError::Response { endpoint, error })
}
pub async fn get_match(&self, id: u32) -> Result<MatchInfo, UgcClientError> {
self.send_request(Endpoint::Match { id }).await
}
pub async fn get_team(&self, id: u32) -> Result<Team, UgcClientError> {
self.send_request(Endpoint::Team { id }).await
}
pub async fn get_team_roster(&self, id: u32) -> Result<Vec<RosterHistory>, UgcClientError> {
self.send_request(Endpoint::TeamRoster { id }).await
}
pub async fn get_team_matches(&self, id: u32) -> Result<Vec<TeamSeasonMatch>, UgcClientError> {
self.send_request(Endpoint::TeamMatches { id }).await
}
pub async fn get_player(&self, id: u32) -> Result<Player, UgcClientError> {
self.send_request(Endpoint::Player { id }).await
}
pub async fn get_player_history(&self, id: u32) -> Result<MembershipHistory, UgcClientError> {
self.send_request(Endpoint::PlayerHistory { id }).await
}
pub async fn get_maps(&self, format: GameMode) -> Result<MapHistory, UgcClientError> {
self.send_request(Endpoint::Maps { format }).await
}
pub async fn get_transactions(
&self,
format: GameMode,
) -> Result<Vec<Transaction>, UgcClientError> {
self.send_request(Endpoint::Transactions { format }).await
}
}
#[derive(Debug, Copy, Clone)]
pub enum Endpoint {
Match { id: u32 },
Player { id: u32 },
PlayerHistory { id: u32 },
Transactions { format: GameMode },
Team { id: u32 },
TeamRoster { id: u32 },
TeamMatches { id: u32 },
Maps { format: GameMode },
}
impl Endpoint {
pub fn build_url(&self, api_url: &str) -> String {
match self {
Endpoint::Match { id } => format!("{}/match/{id}", api_url),
Endpoint::Player { id } => format!("{}/player/{id}", api_url),
Endpoint::PlayerHistory { id } => format!("{}/player/{id}/history", api_url),
Endpoint::Transactions { format } => format!("{}/transactions/{format}", api_url),
Endpoint::Team { id } => format!("{}/team/{id}", api_url),
Endpoint::TeamRoster { id } => format!("{}/team/{id}/roster", api_url),
Endpoint::TeamMatches { id } => format!("{}/team/{id}/matches", api_url),
Endpoint::Maps { format } => format!("{}/maps/{format}", api_url),
}
}
}
trait ResponseExt: Sized {
fn check_not_found(self, endpoint: Endpoint) -> Result<Self, UgcClientError>;
}
impl ResponseExt for Response {
fn check_not_found(self, endpoint: Endpoint) -> Result<Self, UgcClientError> {
if self.status() == StatusCode::NOT_FOUND {
Err(UgcClientError::NotFound { endpoint })
} else {
Ok(self)
}
}
}

55
archiver/src/config.rs Normal file
View file

@ -0,0 +1,55 @@
use secretfile::SecretError;
use serde::Deserialize;
use std::fs::read_to_string;
use std::path::Path;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum ConfigError {
#[error("Error reading config from {path}: {error:#}")]
Read { path: String, error: std::io::Error },
#[error("Error parsing config from {path}: {error:#}")]
Parse {
path: String,
error: toml::de::Error,
},
#[error("Error reading password from file: {0:#}")]
PasswordSecret(SecretError),
}
#[derive(Deserialize)]
pub struct Config {
pub db: DBConfig,
pub api: ApiConfig,
}
impl Config {
pub fn read(path: impl AsRef<Path>) -> Result<Self, ConfigError> {
let path = path.as_ref();
let raw = read_to_string(path).map_err(|error| ConfigError::Read {
path: path.display().to_string(),
error,
})?;
toml::from_str(&raw).map_err(|error| ConfigError::Parse {
path: path.display().to_string(),
error,
})
}
}
#[derive(Deserialize)]
pub struct ApiConfig {
pub url: String,
}
#[derive(Deserialize)]
pub struct DBConfig {
pub url: String,
password_file: String,
}
impl DBConfig {
pub fn password(&self) -> Result<String, ConfigError> {
secretfile::load(&self.password_file).map_err(ConfigError::PasswordSecret)
}
}

84
archiver/src/main.rs Normal file
View file

@ -0,0 +1,84 @@
mod archive;
mod client;
mod config;
use crate::archive::Archive;
use crate::client::{UgcClient, UgcClientError};
use crate::config::Config;
use clap::{Parser, Subcommand};
use main_error::MainResult;
use std::path::PathBuf;
use std::time::Duration;
use tokio::time::sleep;
use tracing::{error, info, span, warn, Level};
#[derive(Debug, Parser)]
struct Args {
#[clap(long, short)]
config: PathBuf,
#[command(subcommand)]
command: Command,
}
#[derive(Debug, Subcommand)]
enum Command {
Matches,
}
const LAST_MATCH: u32 = 117047;
const MAYBE_FIRST_MATCH: u32 = 14486;
#[tokio::main]
async fn main() -> MainResult {
tracing_subscriber::fmt::init();
let args = Args::parse();
let config = Config::read(&args.config)?;
let client = UgcClient::new(config.api.url);
let archive = Archive::new(&config.db.url, &config.db.password()?).await?;
match args.command {
Command::Matches => {
archive_matches(&client, &archive).await?;
}
}
Ok(())
}
async fn archive_matches(client: &UgcClient, archive: &Archive) -> MainResult {
let next_match = archive
.get_last_match_id()
.await?
.unwrap_or(MAYBE_FIRST_MATCH - 1)
+ 1;
for id in next_match..=LAST_MATCH {
let _span = span!(Level::INFO, "archive_match", id = id).entered();
match client.get_match(id).await.check_not_found() {
Ok(Some(match_data)) => {
info!("storing match");
archive.store_match(id, match_data).await?;
}
Ok(None) => {
warn!("match not found");
}
Err(e) => {
error!("error fetching match: {}", e);
}
}
sleep(Duration::from_millis(500)).await;
}
Ok(())
}
trait NotFoundResultExt<T>: Sized {
fn check_not_found(self) -> Result<Option<T>, UgcClientError>;
}
impl<T> NotFoundResultExt<T> for Result<T, UgcClientError> {
fn check_not_found(self) -> Result<Option<T>, UgcClientError> {
match self {
Ok(x) => Ok(Some(x)),
Err(UgcClientError::NotFound { .. }) => Ok(None),
Err(e) => Err(e),
}
}
}