mirror of
https://codeberg.org/icewind/ugc-scaper.git
synced 2026-06-03 18:24:10 +02:00
archiver wip
This commit is contained in:
parent
8e1ea846da
commit
a660675932
13 changed files with 3466 additions and 0 deletions
73
archiver/src/archive.rs
Normal file
73
archiver/src/archive.rs
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
use sqlx::postgres::PgConnectOptions;
|
||||
use sqlx::{query, Error, PgPool};
|
||||
use std::str::FromStr;
|
||||
use thiserror::Error;
|
||||
use ugc_scraper_types::MatchInfo;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ArchiveError {
|
||||
#[error("Invalid database {url}: {error:#}")]
|
||||
InvalidDbUrl { url: String, error: Error },
|
||||
#[error("Error while connecting to database {url}: {error:#}")]
|
||||
Connect { url: String, error: sqlx::Error },
|
||||
#[error("Error while running query for {description}: {error:#}")]
|
||||
Query {
|
||||
description: &'static str,
|
||||
error: sqlx::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub struct Archive {
|
||||
pool: PgPool,
|
||||
}
|
||||
|
||||
impl Archive {
|
||||
pub async fn new(url: &str, password: &str) -> Result<Archive, ArchiveError> {
|
||||
let opt = PgConnectOptions::from_str(url)
|
||||
.map_err(|error| ArchiveError::InvalidDbUrl {
|
||||
url: url.into(),
|
||||
error,
|
||||
})?
|
||||
.password(password);
|
||||
let pool = PgPool::connect_with(opt)
|
||||
.await
|
||||
.map_err(|error| ArchiveError::Connect {
|
||||
url: url.into(),
|
||||
error,
|
||||
})?;
|
||||
Ok(Archive { pool })
|
||||
}
|
||||
|
||||
pub async fn store_match(&self, id: u32, match_info: MatchInfo) -> Result<(), ArchiveError> {
|
||||
query!(
|
||||
"INSERT INTO matches (
|
||||
id, team_home, team_away, score_home, score_away, comment, comment_author
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7)",
|
||||
id as i32,
|
||||
match_info.team_home.id as i32,
|
||||
match_info.team_away.id as i32,
|
||||
match_info.score_home as i16,
|
||||
match_info.score_away as i16,
|
||||
match_info.comment,
|
||||
match_info.comment_author
|
||||
)
|
||||
.execute(&self.pool)
|
||||
.await
|
||||
.map_err(|error| ArchiveError::Query {
|
||||
description: "inserting match",
|
||||
error,
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn get_last_match_id(&self) -> Result<Option<u32>, ArchiveError> {
|
||||
Ok(query!("SELECT id FROM matches ORDER BY id DESC LIMIT 1")
|
||||
.fetch_optional(&self.pool)
|
||||
.await
|
||||
.map_err(|error| ArchiveError::Query {
|
||||
description: "getting latest match",
|
||||
error,
|
||||
})?
|
||||
.map(|row| row.id as u32))
|
||||
}
|
||||
}
|
||||
125
archiver/src/client.rs
Normal file
125
archiver/src/client.rs
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
use reqwest::{Client, ClientBuilder, Error, Response, StatusCode};
|
||||
use serde::de::DeserializeOwned;
|
||||
use thiserror::Error;
|
||||
use ugc_scraper_types::{
|
||||
GameMode, MapHistory, MatchInfo, MembershipHistory, Player, RosterHistory, Team,
|
||||
TeamSeasonMatch, Transaction,
|
||||
};
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum UgcClientError {
|
||||
#[error("Error sending request to {endpoint:?}: {error:#}")]
|
||||
Request { endpoint: Endpoint, error: Error },
|
||||
#[error("Error parsing response from {endpoint:?}: {error:#}")]
|
||||
Response { endpoint: Endpoint, error: Error },
|
||||
#[error("{endpoint:?} not found")]
|
||||
NotFound { endpoint: Endpoint },
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct UgcClient {
|
||||
client: Client,
|
||||
api_url: String,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl UgcClient {
|
||||
pub fn new(api_url: String) -> Self {
|
||||
let client = ClientBuilder::new()
|
||||
.user_agent("UGC_ARCHIVER")
|
||||
.build()
|
||||
.expect("failed to build client");
|
||||
Self { client, api_url }
|
||||
}
|
||||
|
||||
async fn send_request<T: DeserializeOwned>(
|
||||
&self,
|
||||
endpoint: Endpoint,
|
||||
) -> Result<T, UgcClientError> {
|
||||
self.client
|
||||
.get(endpoint.build_url(&self.api_url))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|error| UgcClientError::Request { endpoint, error })?
|
||||
.check_not_found(endpoint)?
|
||||
.json()
|
||||
.await
|
||||
.map_err(|error| UgcClientError::Response { endpoint, error })
|
||||
}
|
||||
|
||||
pub async fn get_match(&self, id: u32) -> Result<MatchInfo, UgcClientError> {
|
||||
self.send_request(Endpoint::Match { id }).await
|
||||
}
|
||||
|
||||
pub async fn get_team(&self, id: u32) -> Result<Team, UgcClientError> {
|
||||
self.send_request(Endpoint::Team { id }).await
|
||||
}
|
||||
|
||||
pub async fn get_team_roster(&self, id: u32) -> Result<Vec<RosterHistory>, UgcClientError> {
|
||||
self.send_request(Endpoint::TeamRoster { id }).await
|
||||
}
|
||||
|
||||
pub async fn get_team_matches(&self, id: u32) -> Result<Vec<TeamSeasonMatch>, UgcClientError> {
|
||||
self.send_request(Endpoint::TeamMatches { id }).await
|
||||
}
|
||||
|
||||
pub async fn get_player(&self, id: u32) -> Result<Player, UgcClientError> {
|
||||
self.send_request(Endpoint::Player { id }).await
|
||||
}
|
||||
|
||||
pub async fn get_player_history(&self, id: u32) -> Result<MembershipHistory, UgcClientError> {
|
||||
self.send_request(Endpoint::PlayerHistory { id }).await
|
||||
}
|
||||
|
||||
pub async fn get_maps(&self, format: GameMode) -> Result<MapHistory, UgcClientError> {
|
||||
self.send_request(Endpoint::Maps { format }).await
|
||||
}
|
||||
|
||||
pub async fn get_transactions(
|
||||
&self,
|
||||
format: GameMode,
|
||||
) -> Result<Vec<Transaction>, UgcClientError> {
|
||||
self.send_request(Endpoint::Transactions { format }).await
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub enum Endpoint {
|
||||
Match { id: u32 },
|
||||
Player { id: u32 },
|
||||
PlayerHistory { id: u32 },
|
||||
Transactions { format: GameMode },
|
||||
Team { id: u32 },
|
||||
TeamRoster { id: u32 },
|
||||
TeamMatches { id: u32 },
|
||||
Maps { format: GameMode },
|
||||
}
|
||||
|
||||
impl Endpoint {
|
||||
pub fn build_url(&self, api_url: &str) -> String {
|
||||
match self {
|
||||
Endpoint::Match { id } => format!("{}/match/{id}", api_url),
|
||||
Endpoint::Player { id } => format!("{}/player/{id}", api_url),
|
||||
Endpoint::PlayerHistory { id } => format!("{}/player/{id}/history", api_url),
|
||||
Endpoint::Transactions { format } => format!("{}/transactions/{format}", api_url),
|
||||
Endpoint::Team { id } => format!("{}/team/{id}", api_url),
|
||||
Endpoint::TeamRoster { id } => format!("{}/team/{id}/roster", api_url),
|
||||
Endpoint::TeamMatches { id } => format!("{}/team/{id}/matches", api_url),
|
||||
Endpoint::Maps { format } => format!("{}/maps/{format}", api_url),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
trait ResponseExt: Sized {
|
||||
fn check_not_found(self, endpoint: Endpoint) -> Result<Self, UgcClientError>;
|
||||
}
|
||||
|
||||
impl ResponseExt for Response {
|
||||
fn check_not_found(self, endpoint: Endpoint) -> Result<Self, UgcClientError> {
|
||||
if self.status() == StatusCode::NOT_FOUND {
|
||||
Err(UgcClientError::NotFound { endpoint })
|
||||
} else {
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
}
|
||||
55
archiver/src/config.rs
Normal file
55
archiver/src/config.rs
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
use secretfile::SecretError;
|
||||
use serde::Deserialize;
|
||||
use std::fs::read_to_string;
|
||||
use std::path::Path;
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ConfigError {
|
||||
#[error("Error reading config from {path}: {error:#}")]
|
||||
Read { path: String, error: std::io::Error },
|
||||
#[error("Error parsing config from {path}: {error:#}")]
|
||||
Parse {
|
||||
path: String,
|
||||
error: toml::de::Error,
|
||||
},
|
||||
#[error("Error reading password from file: {0:#}")]
|
||||
PasswordSecret(SecretError),
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct Config {
|
||||
pub db: DBConfig,
|
||||
pub api: ApiConfig,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
pub fn read(path: impl AsRef<Path>) -> Result<Self, ConfigError> {
|
||||
let path = path.as_ref();
|
||||
let raw = read_to_string(path).map_err(|error| ConfigError::Read {
|
||||
path: path.display().to_string(),
|
||||
error,
|
||||
})?;
|
||||
toml::from_str(&raw).map_err(|error| ConfigError::Parse {
|
||||
path: path.display().to_string(),
|
||||
error,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct ApiConfig {
|
||||
pub url: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct DBConfig {
|
||||
pub url: String,
|
||||
password_file: String,
|
||||
}
|
||||
|
||||
impl DBConfig {
|
||||
pub fn password(&self) -> Result<String, ConfigError> {
|
||||
secretfile::load(&self.password_file).map_err(ConfigError::PasswordSecret)
|
||||
}
|
||||
}
|
||||
84
archiver/src/main.rs
Normal file
84
archiver/src/main.rs
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
mod archive;
|
||||
mod client;
|
||||
mod config;
|
||||
|
||||
use crate::archive::Archive;
|
||||
use crate::client::{UgcClient, UgcClientError};
|
||||
use crate::config::Config;
|
||||
use clap::{Parser, Subcommand};
|
||||
use main_error::MainResult;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
use tokio::time::sleep;
|
||||
use tracing::{error, info, span, warn, Level};
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
struct Args {
|
||||
#[clap(long, short)]
|
||||
config: PathBuf,
|
||||
#[command(subcommand)]
|
||||
command: Command,
|
||||
}
|
||||
|
||||
#[derive(Debug, Subcommand)]
|
||||
enum Command {
|
||||
Matches,
|
||||
}
|
||||
|
||||
const LAST_MATCH: u32 = 117047;
|
||||
const MAYBE_FIRST_MATCH: u32 = 14486;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> MainResult {
|
||||
tracing_subscriber::fmt::init();
|
||||
let args = Args::parse();
|
||||
let config = Config::read(&args.config)?;
|
||||
let client = UgcClient::new(config.api.url);
|
||||
let archive = Archive::new(&config.db.url, &config.db.password()?).await?;
|
||||
|
||||
match args.command {
|
||||
Command::Matches => {
|
||||
archive_matches(&client, &archive).await?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn archive_matches(client: &UgcClient, archive: &Archive) -> MainResult {
|
||||
let next_match = archive
|
||||
.get_last_match_id()
|
||||
.await?
|
||||
.unwrap_or(MAYBE_FIRST_MATCH - 1)
|
||||
+ 1;
|
||||
for id in next_match..=LAST_MATCH {
|
||||
let _span = span!(Level::INFO, "archive_match", id = id).entered();
|
||||
match client.get_match(id).await.check_not_found() {
|
||||
Ok(Some(match_data)) => {
|
||||
info!("storing match");
|
||||
archive.store_match(id, match_data).await?;
|
||||
}
|
||||
Ok(None) => {
|
||||
warn!("match not found");
|
||||
}
|
||||
Err(e) => {
|
||||
error!("error fetching match: {}", e);
|
||||
}
|
||||
}
|
||||
sleep(Duration::from_millis(500)).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
trait NotFoundResultExt<T>: Sized {
|
||||
fn check_not_found(self) -> Result<Option<T>, UgcClientError>;
|
||||
}
|
||||
|
||||
impl<T> NotFoundResultExt<T> for Result<T, UgcClientError> {
|
||||
fn check_not_found(self) -> Result<Option<T>, UgcClientError> {
|
||||
match self {
|
||||
Ok(x) => Ok(Some(x)),
|
||||
Err(UgcClientError::NotFound { .. }) => Ok(None),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue