This commit is contained in:
Robin Appelman 2025-04-21 14:16:01 +02:00
commit e71ba3b490
15 changed files with 416 additions and 92 deletions

View file

@ -0,0 +1,18 @@
{
"db_name": "PostgreSQL",
"query": "SELECT id FROM matches WHERE week = $1 AND team_home IN ($2, $3) AND team_away IN ($2, $3) AND map = $4 AND id > 0 ORDER BY id DESC LIMIT 1",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "id",
"type_info": "Int4"
}
],
"parameters": {
"Left": ["Int4", "Int4", "Int4", "Text"]
},
"nullable": [false]
},
"hash": "3ec8ec092f25c86ae4160829cd419efaf0340afb5e7ad65492a1b2eae988d7c3"
}

View file

@ -0,0 +1,18 @@
{
"db_name": "PostgreSQL",
"query": "SELECT MIN(id) as id FROM matches WHERE id < 0",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "id",
"type_info": "Int4"
}
],
"parameters": {
"Left": []
},
"nullable": [null]
},
"hash": "626c573368c57c8bf544ba40e68ca80e034e2c918e0fe7d0e2319354f80b7de2"
}

View file

@ -0,0 +1,25 @@
{
"db_name": "PostgreSQL",
"query": "UPDATE matches SET map = $2, week = $3, format = $4, season = $5 WHERE id = $1",
"describe": {
"columns": [],
"parameters": {
"Left": [
"Int4",
"Varchar",
"Int4",
{
"Custom": {
"name": "game_mode",
"kind": {
"Enum": ["highlander", "eights", "sixes", "fours", "ultiduo"]
}
}
},
"Int4"
]
},
"nullable": []
},
"hash": "82af3af8aaa4974e507fc6d4ac68654541c1c459d0850f4412ca1cd124fe1d60"
}

View file

@ -0,0 +1,18 @@
{
"db_name": "PostgreSQL",
"query": "select LEAST(MIN(team_home), MIN(team_away)) as team_id from matches INNER JOIN teams ON (team_home = teams.id OR team_away = teams.id) WHERE season IS NULL",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "team_id",
"type_info": "Int4"
}
],
"parameters": {
"Left": []
},
"nullable": [null]
},
"hash": "98d74e80ad5bfcde41e6ba24592dff950957b0d0a1ee0df8c5ebc487f262556f"
}

View file

@ -1,12 +0,0 @@
{
"db_name": "PostgreSQL",
"query": "INSERT INTO matches (\n id, team_home, team_away, score_home, score_away, comment, comment_author\n ) VALUES ($1, $2, $3, $4, $5, $6, $7)",
"describe": {
"columns": [],
"parameters": {
"Left": ["Int4", "Int4", "Int4", "Int2", "Int2", "Varchar", "Varchar"]
},
"nullable": []
},
"hash": "9de8aebc65fc0ad8b1fe1580425020559a346d1d0bbb0f6683e38674f54593b2"
}

View file

@ -0,0 +1,25 @@
{
"db_name": "PostgreSQL",
"query": "SELECT format as \"format: GameMode\" FROM teams WHERE id = $1",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "format: GameMode",
"type_info": {
"Custom": {
"name": "game_mode",
"kind": {
"Enum": ["highlander", "eights", "sixes", "fours", "ultiduo"]
}
}
}
}
],
"parameters": {
"Left": ["Int4"]
},
"nullable": [false]
},
"hash": "b55c859eae400f5b4231f8b9c8417371dce54bde4d7743af588dba96c4b0fcee"
}

View file

@ -0,0 +1,30 @@
{
"db_name": "PostgreSQL",
"query": "INSERT INTO matches (\n id, team_home, team_away, score_home, score_away, comment, comment_author, map, format, week\n ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)",
"describe": {
"columns": [],
"parameters": {
"Left": [
"Int4",
"Int4",
"Int4",
"Int2",
"Int2",
"Varchar",
"Varchar",
"Varchar",
{
"Custom": {
"name": "game_mode",
"kind": {
"Enum": ["highlander", "eights", "sixes", "fours", "ultiduo"]
}
}
},
"Int4"
]
},
"nullable": []
},
"hash": "b7620a0c45d6964f79142d6487b10256cc3a010b027dd63b9a3b9882d908201b"
}

View file

@ -0,0 +1,18 @@
{
"db_name": "PostgreSQL",
"query": "select id from matches WHERE id = $1",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "id",
"type_info": "Int4"
}
],
"parameters": {
"Left": ["Int4"]
},
"nullable": [false]
},
"hash": "db05230b038541378850c1294c260c97a209d6220583ad85c64a2f30319a6132"
}

1
archiver/Cargo.lock generated
View file

@ -92,6 +92,7 @@ dependencies = [
"reqwest",
"secretfile",
"serde",
"serde_json",
"sqlx",
"thiserror 2.0.12",
"time",

View file

@ -11,6 +11,7 @@ clap = { version = "4.5.35", features = ["derive"] }
tracing = "0.1.41"
tracing-subscriber = "0.3.19"
serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.140"
toml = "0.8.20"
secretfile = "0.1.0"
tokio = { version = "1.44.2", features = ["macros", "rt-multi-thread"] }

View file

@ -9,10 +9,10 @@ use time::macros::format_description;
use time::parsing::Parsed;
use time::{Date, Duration};
use tokio_stream::Stream;
use tracing::debug;
use tracing::{debug, info, warn};
use ugc_scraper_types::{
Class, GameMode, MapHistory, MatchInfo, Membership, MembershipRole, NameChange, Player, Record,
Region, RosterHistory, SteamID, Team, TeamSeason, TeamSeasonMatch,
Region, RosterHistory, SteamID, Team, TeamRef, TeamSeason,
};
const MATCH_DATE_FORMAT: &[FormatItem<'static>] = format_description!(
@ -59,18 +59,21 @@ impl Archive {
Ok(Archive { pool })
}
pub async fn store_match(&self, id: u32, match_info: MatchInfo) -> Result<(), ArchiveError> {
pub async fn store_match(&self, id: i32, match_info: MatchInfo) -> Result<(), ArchiveError> {
query!(
"INSERT INTO matches (
id, team_home, team_away, score_home, score_away, comment, comment_author
) VALUES ($1, $2, $3, $4, $5, $6, $7)",
id as i32,
id, team_home, team_away, score_home, score_away, comment, comment_author, map, format, week
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)",
id,
match_info.team_home.id as i32,
match_info.team_away.id as i32,
match_info.score_home as i16,
match_info.score_away as i16,
match_info.comment,
match_info.comment_author
match_info.comment_author,
match_info.map,
match_info.format as GameMode,
match_info.week as i32,
)
.execute(&self.pool)
.await
@ -498,6 +501,7 @@ impl Archive {
Ok(())
}
#[allow(dead_code)]
pub fn get_match_ids_without_map(
&self,
) -> impl Stream<Item = Result<u32, ArchiveError>> + use<'_> {
@ -510,6 +514,28 @@ impl Archive {
.map_ok(|map| map.id as u32)
}
pub async fn get_min_team_id_without_match_seasons(&self) -> Result<u32, ArchiveError> {
Ok(query!("select LEAST(MIN(team_home), MIN(team_away)) as team_id from matches INNER JOIN teams ON (team_home = teams.id OR team_away = teams.id) WHERE season IS NULL")
.fetch_one(&self.pool)
.await
.map_err(|error| ArchiveError::Query {
description: "getting team ids",
error,
})?.team_id.unwrap_or_default() as u32)
}
pub async fn has_match(&self, id: u32) -> Result<bool, ArchiveError> {
Ok(query!("select id from matches WHERE id = $1", id as i32)
.fetch_optional(&self.pool)
.await
.map_err(|error| ArchiveError::Query {
description: "checking match existence",
error,
})?
.is_some())
}
#[allow(dead_code)]
pub async fn get_match_date(
&self,
match_info: &MatchInfo,
@ -551,6 +577,21 @@ impl Archive {
Ok(None)
}
pub async fn get_team_format(&self, id: u32) -> Result<GameMode, ArchiveError> {
Ok(query!(
r#"SELECT format as "format: GameMode" FROM teams WHERE id = $1"#,
id as i32
)
.fetch_one(&self.pool)
.await
.map_err(|error| ArchiveError::Query {
description: "getting team format",
error,
})?
.format)
}
#[allow(dead_code)]
pub async fn update_match_details(
&self,
id: u32,
@ -593,7 +634,9 @@ impl Archive {
pub async fn update_match_details_from_team_matches(
&self,
season: TeamSeason,
team: &TeamRef,
format: GameMode,
season: &TeamSeason,
) -> Result<(), ArchiveError> {
let mut transaction = self
.pool
@ -604,23 +647,55 @@ impl Archive {
error,
})?;
for match_info in season.matches {
if let Some(id) = match_info.result.match_id() {
query!(
"UPDATE matches SET map = $2, week = $3, format = $4, season = $5 WHERE id = $1",
id as i32,
match_info.map,
match_info.week as i32,
format as Option<GameMode>,
date
for match_info in season.matches.iter() {
let id = if let Some(id) = match_info.result.match_id() {
id as i32
} else if let Some(opponent) = match_info.result.opponents() {
let options = Self::find_match_id(
&mut *transaction,
match_info.week,
team.id,
opponent.id,
&match_info.map,
)
.execute(&self.pool)
.await
.map_err(|error| ArchiveError::Query {
description: "updating match",
error,
})?;
}
.await?;
if options.len() == 1 {
options[0] as i32
} else if options.is_empty() {
let fake_id = Self::find_next_negative_match_id(&mut *transaction).await?;
assert!(fake_id < 0);
info!(id = fake_id, "inserting synthetic match");
panic!("?");
let fake_match = match_info
.match_info(team, format)
.expect("no match info but we do have opponent");
self.store_match(fake_id, fake_match).await?;
fake_id
} else {
warn!(
possible_options = options.len(),
season.season, match_info.week, "Failed to find match, multiple options"
);
panic!();
}
} else {
continue;
};
query!(
"UPDATE matches SET map = $2, week = $3, format = $4, season = $5 WHERE id = $1",
id as i32,
match_info.map,
match_info.week as i32,
format as GameMode,
season.season as i32
)
.execute(&self.pool)
.await
.map_err(|error| ArchiveError::Query {
description: "updating match with team match data",
error,
})?;
}
transaction
@ -633,6 +708,48 @@ impl Archive {
Ok(())
}
async fn find_match_id(
db: impl Executor<'_, Database = Postgres>,
week: u8,
team_a: u32,
team_b: u32,
map: &str,
) -> Result<Vec<u32>, ArchiveError> {
Ok(
query!(
"SELECT id FROM matches WHERE week = $1 AND team_home IN ($2, $3) AND team_away IN ($2, $3) AND map = $4 AND id > 0 ORDER BY id DESC LIMIT 1",
week as i32,
team_a as i32,
team_b as i32,
map
)
.fetch_all(db)
.await
.map_err(|error| ArchiveError::Query {
description: "searching match",
error,
})?
.into_iter()
.map(|row| row.id as u32)
.collect(),
)
}
async fn find_next_negative_match_id(
db: impl Executor<'_, Database = Postgres>,
) -> Result<i32, ArchiveError> {
let min = query!("SELECT MIN(id) as id FROM matches WHERE id < 0")
.fetch_optional(db)
.await
.map_err(|error| ArchiveError::Query {
description: "getting next negative match",
error,
})?
.map(|row| row.id.unwrap_or_default())
.unwrap_or_default();
Ok(min - 1)
}
}
fn parse_old_match_date(date: &str) -> Result<Date, time::Error> {

View file

@ -3,15 +3,21 @@ use serde::de::DeserializeOwned;
use thiserror::Error;
use ugc_scraper_types::{
GameMode, MapHistory, MatchInfo, MembershipHistory, Player, RosterHistory, SteamID, Team,
TeamRosterData, TeamSeason, TeamSeasonMatch, Transaction,
TeamMatches, TeamRosterData, Transaction,
};
#[derive(Debug, Error)]
pub enum UgcClientError {
#[error("Error sending request to {endpoint:?}: {error:#}")]
Request { endpoint: Endpoint, error: Error },
#[error("Error parsing response from {endpoint:?}: {error:#}")]
#[error("Error receiving response from {endpoint:?}: {error:#}")]
Response { endpoint: Endpoint, error: Error },
#[error("Error parsing response from {endpoint:?}: {error:#}, from: {text}")]
Decode {
endpoint: Endpoint,
error: serde_json::Error,
text: String,
},
#[error("{endpoint:?} not found")]
NotFound { endpoint: Endpoint },
}
@ -36,15 +42,23 @@ impl UgcClient {
&self,
endpoint: Endpoint,
) -> Result<T, UgcClientError> {
self.client
let text = self
.client
.get(endpoint.build_url(&self.api_url))
.send()
.await
.map_err(|error| UgcClientError::Request { endpoint, error })?
.check_not_found(endpoint)?
.json()
.error_for_status()
.map_err(|error| UgcClientError::Response { endpoint, error })?
.text()
.await
.map_err(|error| UgcClientError::Response { endpoint, error })
.map_err(|error| UgcClientError::Response { endpoint, error })?;
serde_json::from_str(&text).map_err(|error| UgcClientError::Decode {
endpoint,
error,
text,
})
}
pub async fn get_match(&self, id: u32) -> Result<MatchInfo, UgcClientError> {
@ -61,7 +75,7 @@ impl UgcClient {
.map(|data| data.history)
}
pub async fn get_team_matches(&self, id: u32) -> Result<TeamSeason, UgcClientError> {
pub async fn get_team_matches(&self, id: u32) -> Result<TeamMatches, UgcClientError> {
self.send_request(Endpoint::TeamMatches { id }).await
}

View file

@ -13,7 +13,7 @@ use std::str::FromStr;
use std::time::Duration;
use tokio::time::sleep;
use tokio_stream::StreamExt;
use tracing::{error, info, span, warn, Level};
use tracing::{error, info, instrument, span, warn, Level};
use ugc_scraper_types::GameMode;
#[derive(Debug, Parser)]
@ -79,49 +79,61 @@ async fn archive_matches(client: &UgcClient, archive: &Archive) -> MainResult {
.await?
.unwrap_or(MAYBE_FIRST_MATCH - 1)
+ 1;
for id in next_match..=LAST_MATCH {
let _span = span!(Level::INFO, "archive_match", id = id).entered();
match client.get_match(id).await.check_not_found() {
Ok(Some(match_data)) => {
info!("storing match");
archive.store_match(id, match_data).await?;
}
Ok(None) => {
warn!("match not found");
}
Err(e) => {
error!("error fetching match: {}", e);
}
}
for id in 200..=MAYBE_FIRST_MATCH {
archive_match(client, archive, id).await.ok();
sleep(Duration::from_millis(500)).await;
}
Ok(())
}
#[instrument(skip(client, archive))]
async fn archive_match(client: &UgcClient, archive: &Archive, id: u32) -> MainResult {
match client.get_match(id).await.check_not_found() {
Ok(Some(match_data)) => {
info!("storing match");
archive.store_match(id as i32, match_data).await?;
Ok(())
}
Ok(None) => {
warn!("match not found");
Ok(())
}
Err(e) => {
error!("error fetching match: {}", e);
Err(e.into())
}
}
}
async fn archive_teams(client: &UgcClient, archive: &Archive) -> MainResult {
let range = archive.get_team_range().await?;
let next_team = archive.get_last_team_id().await?.unwrap_or(range.start - 1) + 1;
for id in next_team..=range.end {
let _span = span!(Level::INFO, "archive_team", id = id).entered();
match client.get_team(id).await.check_not_found() {
Ok(Some(team_data)) => {
if team_data.format.is_tf2() {
info!("storing team");
archive.store_team(id, &team_data).await?;
} else {
info!("skipping non-tf2 team");
}
}
Ok(None) => {
warn!("team not found");
}
Err(e) => {
error!("error fetching team: {:?}", e);
panic!();
archive_team(client, archive, id).await?;
sleep(Duration::from_millis(500)).await;
}
Ok(())
}
#[instrument(skip(client, archive))]
async fn archive_team(client: &UgcClient, archive: &Archive, id: u32) -> MainResult {
match client.get_team(id).await.check_not_found() {
Ok(Some(team_data)) => {
if team_data.format.is_tf2() {
info!("storing team");
archive.store_team(id, &team_data).await?;
} else {
info!("skipping non-tf2 team");
}
}
sleep(Duration::from_millis(500)).await;
Ok(None) => {
warn!("team not found");
}
Err(e) => {
error!("error fetching team: {:?}", e);
panic!();
}
}
Ok(())
}
@ -215,24 +227,42 @@ async fn archive_map_history(client: &UgcClient, archive: &Archive, mode: GameMo
}
async fn fixup_matches(client: &UgcClient, archive: &Archive) -> MainResult {
let mut match_ids = pin!(archive.get_match_ids_without_map());
let min_team = archive.get_min_team_id_without_match_seasons().await?;
let mut team_ids = pin!(archive.get_team_ids(min_team - 1));
while let Some(Ok(id)) = match_ids.next().await {
let _span = span!(Level::INFO, "fixup_match", id = id).entered();
let match_info = client.get_match(id).await?;
let date = None; // archive.get_match_date(&match_info).await?;
if false && date.is_none()
&& (match_info.format == GameMode::Highlander
|| match_info.format == GameMode::Sixes
|| match_info.format == GameMode::Fours
|| match_info.format == GameMode::Ultiduo)
{
dbg!(match_info.default_date);
error!("failed to parse match date");
panic!();
while let Some(Ok(team_id)) = team_ids.next().await {
let _span = span!(Level::INFO, "fixup_matches", team_id).entered();
let format = archive.get_team_format(team_id).await?;
let matches = client.get_team_matches(team_id).await?;
info!(
seasons = matches.seasons.len(),
?format,
"updating matches for team"
);
for season in matches.seasons.iter() {
for season_match in season.matches.iter() {
if let Some(match_id) = season_match.result.match_id() {
if !archive.has_match(match_id).await? {
warn!(match_id, "match not archived yet");
sleep(Duration::from_millis(500)).await;
if let Err(_) = archive_match(client, archive, match_id).await {
let match_info = season_match
.match_info(&matches.team, season.format)
.expect("failed to build match info");
assert_eq!(format, match_info.format);
info!("reconstructed match");
archive.store_match(match_id as i32, match_info).await?;
}
}
}
}
archive
.update_match_details_from_team_matches(&matches.team, format, season)
.await?;
}
info!(date = ?date, format = %match_info.format, "updating match");
archive.update_match_details(id, &match_info, date).await?;
sleep(Duration::from_millis(500)).await;
}
Ok(())