mirror of
https://codeberg.org/icewind/ugc-scaper.git
synced 2026-06-03 18:24:10 +02:00
archiver wip
This commit is contained in:
parent
8e1ea846da
commit
a660675932
13 changed files with 3466 additions and 0 deletions
33
archiver.nix
Normal file
33
archiver.nix
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
{ rustPlatform
|
||||||
|
, openssl
|
||||||
|
, pkg-config
|
||||||
|
, lib
|
||||||
|
,
|
||||||
|
}:
|
||||||
|
let
|
||||||
|
inherit (lib.sources) sourceByRegex;
|
||||||
|
inherit (builtins) fromTOML readFile;
|
||||||
|
src = sourceByRegex ./archiver [ "Cargo.*" "(src)(/.*)?" "README.md" "(.sqlx)(/.*)?" ];
|
||||||
|
version = (fromTOML (readFile archiver/Cargo.toml)).package.version;
|
||||||
|
in
|
||||||
|
rustPlatform.buildRustPackage rec {
|
||||||
|
pname = "ugc-api-archiver";
|
||||||
|
|
||||||
|
inherit src version;
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
openssl
|
||||||
|
];
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
pkg-config
|
||||||
|
];
|
||||||
|
|
||||||
|
OPENSSL_NO_VENDOR = 1;
|
||||||
|
|
||||||
|
doCheck = false;
|
||||||
|
|
||||||
|
cargoLock = {
|
||||||
|
lockFile = ./archiver/Cargo.lock;
|
||||||
|
};
|
||||||
|
}
|
||||||
3
archiver/.gitignore
vendored
Normal file
3
archiver/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
password
|
||||||
|
config.toml
|
||||||
|
.env
|
||||||
18
archiver/.sqlx/query-68839b3409d22689fd472dc5eb33e4381e9a621956f1c4ce90cda5d8872a4d6d.json
generated
Normal file
18
archiver/.sqlx/query-68839b3409d22689fd472dc5eb33e4381e9a621956f1c4ce90cda5d8872a4d6d.json
generated
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
{
|
||||||
|
"db_name": "PostgreSQL",
|
||||||
|
"query": "SELECT id FROM matches ORDER BY id DESC LIMIT 1",
|
||||||
|
"describe": {
|
||||||
|
"columns": [
|
||||||
|
{
|
||||||
|
"ordinal": 0,
|
||||||
|
"name": "id",
|
||||||
|
"type_info": "Int4"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"parameters": {
|
||||||
|
"Left": []
|
||||||
|
},
|
||||||
|
"nullable": [false]
|
||||||
|
},
|
||||||
|
"hash": "68839b3409d22689fd472dc5eb33e4381e9a621956f1c4ce90cda5d8872a4d6d"
|
||||||
|
}
|
||||||
12
archiver/.sqlx/query-9de8aebc65fc0ad8b1fe1580425020559a346d1d0bbb0f6683e38674f54593b2.json
generated
Normal file
12
archiver/.sqlx/query-9de8aebc65fc0ad8b1fe1580425020559a346d1d0bbb0f6683e38674f54593b2.json
generated
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
{
|
||||||
|
"db_name": "PostgreSQL",
|
||||||
|
"query": "INSERT INTO matches (\n id, team_home, team_away, score_home, score_away, comment, comment_author\n ) VALUES ($1, $2, $3, $4, $5, $6, $7)",
|
||||||
|
"describe": {
|
||||||
|
"columns": [],
|
||||||
|
"parameters": {
|
||||||
|
"Left": ["Int4", "Int4", "Int4", "Int2", "Int2", "Varchar", "Varchar"]
|
||||||
|
},
|
||||||
|
"nullable": []
|
||||||
|
},
|
||||||
|
"hash": "9de8aebc65fc0ad8b1fe1580425020559a346d1d0bbb0f6683e38674f54593b2"
|
||||||
|
}
|
||||||
3020
archiver/Cargo.lock
generated
Normal file
3020
archiver/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
18
archiver/Cargo.toml
Normal file
18
archiver/Cargo.toml
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
[package]
|
||||||
|
name = "archiver"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
ugc-scraper-types = "0.1.2"
|
||||||
|
reqwest = { version = "0.12.15", features = ["json"] }
|
||||||
|
clap = { version = "4.5.35", features = ["derive"] }
|
||||||
|
tracing = "0.1.41"
|
||||||
|
tracing-subscriber = "0.3.19"
|
||||||
|
serde = { version = "1.0.219", features = ["derive"] }
|
||||||
|
toml = "0.8.20"
|
||||||
|
secretfile = "0.1.0"
|
||||||
|
tokio = { version = "1.44.2", features = ["macros", "rt-multi-thread"] }
|
||||||
|
sqlx = { version = "0.8.3", features = ["postgres", "runtime-tokio"] }
|
||||||
|
thiserror = "2.0.12"
|
||||||
|
main_error = "0.1.2"
|
||||||
19
archiver/migrations/20250410224414_matches.sql
Normal file
19
archiver/migrations/20250410224414_matches.sql
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
CREATE TABLE matches
|
||||||
|
(
|
||||||
|
id INTEGER NOT NULL,
|
||||||
|
team_home INTEGER NOT NULL,
|
||||||
|
team_away INTEGER NOT NULL,
|
||||||
|
score_home SMALLINT NOT NULL,
|
||||||
|
score_away SMALLINT NOT NULL,
|
||||||
|
comment VARCHAR,
|
||||||
|
comment_author VARCHAR
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX matches_id_idx
|
||||||
|
ON matches USING BTREE (id);
|
||||||
|
|
||||||
|
CREATE INDEX matches_home_idx
|
||||||
|
ON matches USING BTREE (team_home);
|
||||||
|
|
||||||
|
CREATE INDEX matches_away_idx
|
||||||
|
ON matches USING BTREE (team_away);
|
||||||
73
archiver/src/archive.rs
Normal file
73
archiver/src/archive.rs
Normal file
|
|
@ -0,0 +1,73 @@
|
||||||
|
use sqlx::postgres::PgConnectOptions;
|
||||||
|
use sqlx::{query, Error, PgPool};
|
||||||
|
use std::str::FromStr;
|
||||||
|
use thiserror::Error;
|
||||||
|
use ugc_scraper_types::MatchInfo;
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum ArchiveError {
|
||||||
|
#[error("Invalid database {url}: {error:#}")]
|
||||||
|
InvalidDbUrl { url: String, error: Error },
|
||||||
|
#[error("Error while connecting to database {url}: {error:#}")]
|
||||||
|
Connect { url: String, error: sqlx::Error },
|
||||||
|
#[error("Error while running query for {description}: {error:#}")]
|
||||||
|
Query {
|
||||||
|
description: &'static str,
|
||||||
|
error: sqlx::Error,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Archive {
|
||||||
|
pool: PgPool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Archive {
|
||||||
|
pub async fn new(url: &str, password: &str) -> Result<Archive, ArchiveError> {
|
||||||
|
let opt = PgConnectOptions::from_str(url)
|
||||||
|
.map_err(|error| ArchiveError::InvalidDbUrl {
|
||||||
|
url: url.into(),
|
||||||
|
error,
|
||||||
|
})?
|
||||||
|
.password(password);
|
||||||
|
let pool = PgPool::connect_with(opt)
|
||||||
|
.await
|
||||||
|
.map_err(|error| ArchiveError::Connect {
|
||||||
|
url: url.into(),
|
||||||
|
error,
|
||||||
|
})?;
|
||||||
|
Ok(Archive { pool })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn store_match(&self, id: u32, match_info: MatchInfo) -> Result<(), ArchiveError> {
|
||||||
|
query!(
|
||||||
|
"INSERT INTO matches (
|
||||||
|
id, team_home, team_away, score_home, score_away, comment, comment_author
|
||||||
|
) VALUES ($1, $2, $3, $4, $5, $6, $7)",
|
||||||
|
id as i32,
|
||||||
|
match_info.team_home.id as i32,
|
||||||
|
match_info.team_away.id as i32,
|
||||||
|
match_info.score_home as i16,
|
||||||
|
match_info.score_away as i16,
|
||||||
|
match_info.comment,
|
||||||
|
match_info.comment_author
|
||||||
|
)
|
||||||
|
.execute(&self.pool)
|
||||||
|
.await
|
||||||
|
.map_err(|error| ArchiveError::Query {
|
||||||
|
description: "inserting match",
|
||||||
|
error,
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_last_match_id(&self) -> Result<Option<u32>, ArchiveError> {
|
||||||
|
Ok(query!("SELECT id FROM matches ORDER BY id DESC LIMIT 1")
|
||||||
|
.fetch_optional(&self.pool)
|
||||||
|
.await
|
||||||
|
.map_err(|error| ArchiveError::Query {
|
||||||
|
description: "getting latest match",
|
||||||
|
error,
|
||||||
|
})?
|
||||||
|
.map(|row| row.id as u32))
|
||||||
|
}
|
||||||
|
}
|
||||||
125
archiver/src/client.rs
Normal file
125
archiver/src/client.rs
Normal file
|
|
@ -0,0 +1,125 @@
|
||||||
|
use reqwest::{Client, ClientBuilder, Error, Response, StatusCode};
|
||||||
|
use serde::de::DeserializeOwned;
|
||||||
|
use thiserror::Error;
|
||||||
|
use ugc_scraper_types::{
|
||||||
|
GameMode, MapHistory, MatchInfo, MembershipHistory, Player, RosterHistory, Team,
|
||||||
|
TeamSeasonMatch, Transaction,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum UgcClientError {
|
||||||
|
#[error("Error sending request to {endpoint:?}: {error:#}")]
|
||||||
|
Request { endpoint: Endpoint, error: Error },
|
||||||
|
#[error("Error parsing response from {endpoint:?}: {error:#}")]
|
||||||
|
Response { endpoint: Endpoint, error: Error },
|
||||||
|
#[error("{endpoint:?} not found")]
|
||||||
|
NotFound { endpoint: Endpoint },
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct UgcClient {
|
||||||
|
client: Client,
|
||||||
|
api_url: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
impl UgcClient {
|
||||||
|
pub fn new(api_url: String) -> Self {
|
||||||
|
let client = ClientBuilder::new()
|
||||||
|
.user_agent("UGC_ARCHIVER")
|
||||||
|
.build()
|
||||||
|
.expect("failed to build client");
|
||||||
|
Self { client, api_url }
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn send_request<T: DeserializeOwned>(
|
||||||
|
&self,
|
||||||
|
endpoint: Endpoint,
|
||||||
|
) -> Result<T, UgcClientError> {
|
||||||
|
self.client
|
||||||
|
.get(endpoint.build_url(&self.api_url))
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|error| UgcClientError::Request { endpoint, error })?
|
||||||
|
.check_not_found(endpoint)?
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.map_err(|error| UgcClientError::Response { endpoint, error })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_match(&self, id: u32) -> Result<MatchInfo, UgcClientError> {
|
||||||
|
self.send_request(Endpoint::Match { id }).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_team(&self, id: u32) -> Result<Team, UgcClientError> {
|
||||||
|
self.send_request(Endpoint::Team { id }).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_team_roster(&self, id: u32) -> Result<Vec<RosterHistory>, UgcClientError> {
|
||||||
|
self.send_request(Endpoint::TeamRoster { id }).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_team_matches(&self, id: u32) -> Result<Vec<TeamSeasonMatch>, UgcClientError> {
|
||||||
|
self.send_request(Endpoint::TeamMatches { id }).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_player(&self, id: u32) -> Result<Player, UgcClientError> {
|
||||||
|
self.send_request(Endpoint::Player { id }).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_player_history(&self, id: u32) -> Result<MembershipHistory, UgcClientError> {
|
||||||
|
self.send_request(Endpoint::PlayerHistory { id }).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_maps(&self, format: GameMode) -> Result<MapHistory, UgcClientError> {
|
||||||
|
self.send_request(Endpoint::Maps { format }).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_transactions(
|
||||||
|
&self,
|
||||||
|
format: GameMode,
|
||||||
|
) -> Result<Vec<Transaction>, UgcClientError> {
|
||||||
|
self.send_request(Endpoint::Transactions { format }).await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
pub enum Endpoint {
|
||||||
|
Match { id: u32 },
|
||||||
|
Player { id: u32 },
|
||||||
|
PlayerHistory { id: u32 },
|
||||||
|
Transactions { format: GameMode },
|
||||||
|
Team { id: u32 },
|
||||||
|
TeamRoster { id: u32 },
|
||||||
|
TeamMatches { id: u32 },
|
||||||
|
Maps { format: GameMode },
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Endpoint {
|
||||||
|
pub fn build_url(&self, api_url: &str) -> String {
|
||||||
|
match self {
|
||||||
|
Endpoint::Match { id } => format!("{}/match/{id}", api_url),
|
||||||
|
Endpoint::Player { id } => format!("{}/player/{id}", api_url),
|
||||||
|
Endpoint::PlayerHistory { id } => format!("{}/player/{id}/history", api_url),
|
||||||
|
Endpoint::Transactions { format } => format!("{}/transactions/{format}", api_url),
|
||||||
|
Endpoint::Team { id } => format!("{}/team/{id}", api_url),
|
||||||
|
Endpoint::TeamRoster { id } => format!("{}/team/{id}/roster", api_url),
|
||||||
|
Endpoint::TeamMatches { id } => format!("{}/team/{id}/matches", api_url),
|
||||||
|
Endpoint::Maps { format } => format!("{}/maps/{format}", api_url),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
trait ResponseExt: Sized {
|
||||||
|
fn check_not_found(self, endpoint: Endpoint) -> Result<Self, UgcClientError>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ResponseExt for Response {
|
||||||
|
fn check_not_found(self, endpoint: Endpoint) -> Result<Self, UgcClientError> {
|
||||||
|
if self.status() == StatusCode::NOT_FOUND {
|
||||||
|
Err(UgcClientError::NotFound { endpoint })
|
||||||
|
} else {
|
||||||
|
Ok(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
55
archiver/src/config.rs
Normal file
55
archiver/src/config.rs
Normal file
|
|
@ -0,0 +1,55 @@
|
||||||
|
use secretfile::SecretError;
|
||||||
|
use serde::Deserialize;
|
||||||
|
use std::fs::read_to_string;
|
||||||
|
use std::path::Path;
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum ConfigError {
|
||||||
|
#[error("Error reading config from {path}: {error:#}")]
|
||||||
|
Read { path: String, error: std::io::Error },
|
||||||
|
#[error("Error parsing config from {path}: {error:#}")]
|
||||||
|
Parse {
|
||||||
|
path: String,
|
||||||
|
error: toml::de::Error,
|
||||||
|
},
|
||||||
|
#[error("Error reading password from file: {0:#}")]
|
||||||
|
PasswordSecret(SecretError),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct Config {
|
||||||
|
pub db: DBConfig,
|
||||||
|
pub api: ApiConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Config {
|
||||||
|
pub fn read(path: impl AsRef<Path>) -> Result<Self, ConfigError> {
|
||||||
|
let path = path.as_ref();
|
||||||
|
let raw = read_to_string(path).map_err(|error| ConfigError::Read {
|
||||||
|
path: path.display().to_string(),
|
||||||
|
error,
|
||||||
|
})?;
|
||||||
|
toml::from_str(&raw).map_err(|error| ConfigError::Parse {
|
||||||
|
path: path.display().to_string(),
|
||||||
|
error,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct ApiConfig {
|
||||||
|
pub url: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct DBConfig {
|
||||||
|
pub url: String,
|
||||||
|
password_file: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DBConfig {
|
||||||
|
pub fn password(&self) -> Result<String, ConfigError> {
|
||||||
|
secretfile::load(&self.password_file).map_err(ConfigError::PasswordSecret)
|
||||||
|
}
|
||||||
|
}
|
||||||
84
archiver/src/main.rs
Normal file
84
archiver/src/main.rs
Normal file
|
|
@ -0,0 +1,84 @@
|
||||||
|
mod archive;
|
||||||
|
mod client;
|
||||||
|
mod config;
|
||||||
|
|
||||||
|
use crate::archive::Archive;
|
||||||
|
use crate::client::{UgcClient, UgcClientError};
|
||||||
|
use crate::config::Config;
|
||||||
|
use clap::{Parser, Subcommand};
|
||||||
|
use main_error::MainResult;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::time::Duration;
|
||||||
|
use tokio::time::sleep;
|
||||||
|
use tracing::{error, info, span, warn, Level};
|
||||||
|
|
||||||
|
#[derive(Debug, Parser)]
|
||||||
|
struct Args {
|
||||||
|
#[clap(long, short)]
|
||||||
|
config: PathBuf,
|
||||||
|
#[command(subcommand)]
|
||||||
|
command: Command,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Subcommand)]
|
||||||
|
enum Command {
|
||||||
|
Matches,
|
||||||
|
}
|
||||||
|
|
||||||
|
const LAST_MATCH: u32 = 117047;
|
||||||
|
const MAYBE_FIRST_MATCH: u32 = 14486;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> MainResult {
|
||||||
|
tracing_subscriber::fmt::init();
|
||||||
|
let args = Args::parse();
|
||||||
|
let config = Config::read(&args.config)?;
|
||||||
|
let client = UgcClient::new(config.api.url);
|
||||||
|
let archive = Archive::new(&config.db.url, &config.db.password()?).await?;
|
||||||
|
|
||||||
|
match args.command {
|
||||||
|
Command::Matches => {
|
||||||
|
archive_matches(&client, &archive).await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn archive_matches(client: &UgcClient, archive: &Archive) -> MainResult {
|
||||||
|
let next_match = archive
|
||||||
|
.get_last_match_id()
|
||||||
|
.await?
|
||||||
|
.unwrap_or(MAYBE_FIRST_MATCH - 1)
|
||||||
|
+ 1;
|
||||||
|
for id in next_match..=LAST_MATCH {
|
||||||
|
let _span = span!(Level::INFO, "archive_match", id = id).entered();
|
||||||
|
match client.get_match(id).await.check_not_found() {
|
||||||
|
Ok(Some(match_data)) => {
|
||||||
|
info!("storing match");
|
||||||
|
archive.store_match(id, match_data).await?;
|
||||||
|
}
|
||||||
|
Ok(None) => {
|
||||||
|
warn!("match not found");
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("error fetching match: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sleep(Duration::from_millis(500)).await;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
trait NotFoundResultExt<T>: Sized {
|
||||||
|
fn check_not_found(self) -> Result<Option<T>, UgcClientError>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> NotFoundResultExt<T> for Result<T, UgcClientError> {
|
||||||
|
fn check_not_found(self) -> Result<Option<T>, UgcClientError> {
|
||||||
|
match self {
|
||||||
|
Ok(x) => Ok(Some(x)),
|
||||||
|
Err(UgcClientError::NotFound { .. }) => Ok(None),
|
||||||
|
Err(e) => Err(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -12,6 +12,11 @@
|
||||||
};
|
};
|
||||||
outputs = { mill-scale, ... }: mill-scale ./. {
|
outputs = { mill-scale, ... }: mill-scale ./. {
|
||||||
extraFilesRegex = [ ".*\.html" ];
|
extraFilesRegex = [ ".*\.html" ];
|
||||||
|
withOverlays = [(import ./overlay.nix)];
|
||||||
|
packages = {
|
||||||
|
ugc-api-server = pkgs: pkgs.ugc-api-server;
|
||||||
|
ugc-api-archiver = pkgs: pkgs.ugc-api-archiver;
|
||||||
|
};
|
||||||
tools = pkgs: with pkgs; [
|
tools = pkgs: with pkgs; [
|
||||||
bacon
|
bacon
|
||||||
cargo-insta
|
cargo-insta
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
final: prev: {
|
final: prev: {
|
||||||
ugc-api-server = final.callPackage ./package.nix { };
|
ugc-api-server = final.callPackage ./package.nix { };
|
||||||
|
ugc-api-archiver = final.callPackage ./archiver.nix { };
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue