This commit is contained in:
Robin Appelman 2025-04-21 14:16:01 +02:00
commit e71ba3b490
15 changed files with 416 additions and 92 deletions

View file

@ -0,0 +1,18 @@
{
"db_name": "PostgreSQL",
"query": "SELECT id FROM matches WHERE week = $1 AND team_home IN ($2, $3) AND team_away IN ($2, $3) AND map = $4 AND id > 0 ORDER BY id DESC LIMIT 1",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "id",
"type_info": "Int4"
}
],
"parameters": {
"Left": ["Int4", "Int4", "Int4", "Text"]
},
"nullable": [false]
},
"hash": "3ec8ec092f25c86ae4160829cd419efaf0340afb5e7ad65492a1b2eae988d7c3"
}

View file

@ -0,0 +1,18 @@
{
"db_name": "PostgreSQL",
"query": "SELECT MIN(id) as id FROM matches WHERE id < 0",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "id",
"type_info": "Int4"
}
],
"parameters": {
"Left": []
},
"nullable": [null]
},
"hash": "626c573368c57c8bf544ba40e68ca80e034e2c918e0fe7d0e2319354f80b7de2"
}

View file

@ -0,0 +1,25 @@
{
"db_name": "PostgreSQL",
"query": "UPDATE matches SET map = $2, week = $3, format = $4, season = $5 WHERE id = $1",
"describe": {
"columns": [],
"parameters": {
"Left": [
"Int4",
"Varchar",
"Int4",
{
"Custom": {
"name": "game_mode",
"kind": {
"Enum": ["highlander", "eights", "sixes", "fours", "ultiduo"]
}
}
},
"Int4"
]
},
"nullable": []
},
"hash": "82af3af8aaa4974e507fc6d4ac68654541c1c459d0850f4412ca1cd124fe1d60"
}

View file

@ -0,0 +1,18 @@
{
"db_name": "PostgreSQL",
"query": "select LEAST(MIN(team_home), MIN(team_away)) as team_id from matches INNER JOIN teams ON (team_home = teams.id OR team_away = teams.id) WHERE season IS NULL",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "team_id",
"type_info": "Int4"
}
],
"parameters": {
"Left": []
},
"nullable": [null]
},
"hash": "98d74e80ad5bfcde41e6ba24592dff950957b0d0a1ee0df8c5ebc487f262556f"
}

View file

@ -1,12 +0,0 @@
{
"db_name": "PostgreSQL",
"query": "INSERT INTO matches (\n id, team_home, team_away, score_home, score_away, comment, comment_author\n ) VALUES ($1, $2, $3, $4, $5, $6, $7)",
"describe": {
"columns": [],
"parameters": {
"Left": ["Int4", "Int4", "Int4", "Int2", "Int2", "Varchar", "Varchar"]
},
"nullable": []
},
"hash": "9de8aebc65fc0ad8b1fe1580425020559a346d1d0bbb0f6683e38674f54593b2"
}

View file

@ -0,0 +1,25 @@
{
"db_name": "PostgreSQL",
"query": "SELECT format as \"format: GameMode\" FROM teams WHERE id = $1",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "format: GameMode",
"type_info": {
"Custom": {
"name": "game_mode",
"kind": {
"Enum": ["highlander", "eights", "sixes", "fours", "ultiduo"]
}
}
}
}
],
"parameters": {
"Left": ["Int4"]
},
"nullable": [false]
},
"hash": "b55c859eae400f5b4231f8b9c8417371dce54bde4d7743af588dba96c4b0fcee"
}

View file

@ -0,0 +1,30 @@
{
"db_name": "PostgreSQL",
"query": "INSERT INTO matches (\n id, team_home, team_away, score_home, score_away, comment, comment_author, map, format, week\n ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)",
"describe": {
"columns": [],
"parameters": {
"Left": [
"Int4",
"Int4",
"Int4",
"Int2",
"Int2",
"Varchar",
"Varchar",
"Varchar",
{
"Custom": {
"name": "game_mode",
"kind": {
"Enum": ["highlander", "eights", "sixes", "fours", "ultiduo"]
}
}
},
"Int4"
]
},
"nullable": []
},
"hash": "b7620a0c45d6964f79142d6487b10256cc3a010b027dd63b9a3b9882d908201b"
}

View file

@ -0,0 +1,18 @@
{
"db_name": "PostgreSQL",
"query": "select id from matches WHERE id = $1",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "id",
"type_info": "Int4"
}
],
"parameters": {
"Left": ["Int4"]
},
"nullable": [false]
},
"hash": "db05230b038541378850c1294c260c97a209d6220583ad85c64a2f30319a6132"
}

1
archiver/Cargo.lock generated
View file

@ -92,6 +92,7 @@ dependencies = [
"reqwest", "reqwest",
"secretfile", "secretfile",
"serde", "serde",
"serde_json",
"sqlx", "sqlx",
"thiserror 2.0.12", "thiserror 2.0.12",
"time", "time",

View file

@ -11,6 +11,7 @@ clap = { version = "4.5.35", features = ["derive"] }
tracing = "0.1.41" tracing = "0.1.41"
tracing-subscriber = "0.3.19" tracing-subscriber = "0.3.19"
serde = { version = "1.0.219", features = ["derive"] } serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.140"
toml = "0.8.20" toml = "0.8.20"
secretfile = "0.1.0" secretfile = "0.1.0"
tokio = { version = "1.44.2", features = ["macros", "rt-multi-thread"] } tokio = { version = "1.44.2", features = ["macros", "rt-multi-thread"] }

View file

@ -9,10 +9,10 @@ use time::macros::format_description;
use time::parsing::Parsed; use time::parsing::Parsed;
use time::{Date, Duration}; use time::{Date, Duration};
use tokio_stream::Stream; use tokio_stream::Stream;
use tracing::debug; use tracing::{debug, info, warn};
use ugc_scraper_types::{ use ugc_scraper_types::{
Class, GameMode, MapHistory, MatchInfo, Membership, MembershipRole, NameChange, Player, Record, Class, GameMode, MapHistory, MatchInfo, Membership, MembershipRole, NameChange, Player, Record,
Region, RosterHistory, SteamID, Team, TeamSeason, TeamSeasonMatch, Region, RosterHistory, SteamID, Team, TeamRef, TeamSeason,
}; };
const MATCH_DATE_FORMAT: &[FormatItem<'static>] = format_description!( const MATCH_DATE_FORMAT: &[FormatItem<'static>] = format_description!(
@ -59,18 +59,21 @@ impl Archive {
Ok(Archive { pool }) Ok(Archive { pool })
} }
pub async fn store_match(&self, id: u32, match_info: MatchInfo) -> Result<(), ArchiveError> { pub async fn store_match(&self, id: i32, match_info: MatchInfo) -> Result<(), ArchiveError> {
query!( query!(
"INSERT INTO matches ( "INSERT INTO matches (
id, team_home, team_away, score_home, score_away, comment, comment_author id, team_home, team_away, score_home, score_away, comment, comment_author, map, format, week
) VALUES ($1, $2, $3, $4, $5, $6, $7)", ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)",
id as i32, id,
match_info.team_home.id as i32, match_info.team_home.id as i32,
match_info.team_away.id as i32, match_info.team_away.id as i32,
match_info.score_home as i16, match_info.score_home as i16,
match_info.score_away as i16, match_info.score_away as i16,
match_info.comment, match_info.comment,
match_info.comment_author match_info.comment_author,
match_info.map,
match_info.format as GameMode,
match_info.week as i32,
) )
.execute(&self.pool) .execute(&self.pool)
.await .await
@ -498,6 +501,7 @@ impl Archive {
Ok(()) Ok(())
} }
#[allow(dead_code)]
pub fn get_match_ids_without_map( pub fn get_match_ids_without_map(
&self, &self,
) -> impl Stream<Item = Result<u32, ArchiveError>> + use<'_> { ) -> impl Stream<Item = Result<u32, ArchiveError>> + use<'_> {
@ -510,6 +514,28 @@ impl Archive {
.map_ok(|map| map.id as u32) .map_ok(|map| map.id as u32)
} }
pub async fn get_min_team_id_without_match_seasons(&self) -> Result<u32, ArchiveError> {
Ok(query!("select LEAST(MIN(team_home), MIN(team_away)) as team_id from matches INNER JOIN teams ON (team_home = teams.id OR team_away = teams.id) WHERE season IS NULL")
.fetch_one(&self.pool)
.await
.map_err(|error| ArchiveError::Query {
description: "getting team ids",
error,
})?.team_id.unwrap_or_default() as u32)
}
pub async fn has_match(&self, id: u32) -> Result<bool, ArchiveError> {
Ok(query!("select id from matches WHERE id = $1", id as i32)
.fetch_optional(&self.pool)
.await
.map_err(|error| ArchiveError::Query {
description: "checking match existence",
error,
})?
.is_some())
}
#[allow(dead_code)]
pub async fn get_match_date( pub async fn get_match_date(
&self, &self,
match_info: &MatchInfo, match_info: &MatchInfo,
@ -551,6 +577,21 @@ impl Archive {
Ok(None) Ok(None)
} }
pub async fn get_team_format(&self, id: u32) -> Result<GameMode, ArchiveError> {
Ok(query!(
r#"SELECT format as "format: GameMode" FROM teams WHERE id = $1"#,
id as i32
)
.fetch_one(&self.pool)
.await
.map_err(|error| ArchiveError::Query {
description: "getting team format",
error,
})?
.format)
}
#[allow(dead_code)]
pub async fn update_match_details( pub async fn update_match_details(
&self, &self,
id: u32, id: u32,
@ -593,7 +634,9 @@ impl Archive {
pub async fn update_match_details_from_team_matches( pub async fn update_match_details_from_team_matches(
&self, &self,
season: TeamSeason, team: &TeamRef,
format: GameMode,
season: &TeamSeason,
) -> Result<(), ArchiveError> { ) -> Result<(), ArchiveError> {
let mut transaction = self let mut transaction = self
.pool .pool
@ -604,23 +647,55 @@ impl Archive {
error, error,
})?; })?;
for match_info in season.matches { for match_info in season.matches.iter() {
if let Some(id) = match_info.result.match_id() { let id = if let Some(id) = match_info.result.match_id() {
query!( id as i32
"UPDATE matches SET map = $2, week = $3, format = $4, season = $5 WHERE id = $1", } else if let Some(opponent) = match_info.result.opponents() {
id as i32, let options = Self::find_match_id(
match_info.map, &mut *transaction,
match_info.week as i32, match_info.week,
format as Option<GameMode>, team.id,
date opponent.id,
&match_info.map,
) )
.execute(&self.pool) .await?;
.await if options.len() == 1 {
.map_err(|error| ArchiveError::Query { options[0] as i32
description: "updating match", } else if options.is_empty() {
error, let fake_id = Self::find_next_negative_match_id(&mut *transaction).await?;
})?; assert!(fake_id < 0);
} info!(id = fake_id, "inserting synthetic match");
panic!("?");
let fake_match = match_info
.match_info(team, format)
.expect("no match info but we do have opponent");
self.store_match(fake_id, fake_match).await?;
fake_id
} else {
warn!(
possible_options = options.len(),
season.season, match_info.week, "Failed to find match, multiple options"
);
panic!();
}
} else {
continue;
};
query!(
"UPDATE matches SET map = $2, week = $3, format = $4, season = $5 WHERE id = $1",
id as i32,
match_info.map,
match_info.week as i32,
format as GameMode,
season.season as i32
)
.execute(&self.pool)
.await
.map_err(|error| ArchiveError::Query {
description: "updating match with team match data",
error,
})?;
} }
transaction transaction
@ -633,6 +708,48 @@ impl Archive {
Ok(()) Ok(())
} }
async fn find_match_id(
db: impl Executor<'_, Database = Postgres>,
week: u8,
team_a: u32,
team_b: u32,
map: &str,
) -> Result<Vec<u32>, ArchiveError> {
Ok(
query!(
"SELECT id FROM matches WHERE week = $1 AND team_home IN ($2, $3) AND team_away IN ($2, $3) AND map = $4 AND id > 0 ORDER BY id DESC LIMIT 1",
week as i32,
team_a as i32,
team_b as i32,
map
)
.fetch_all(db)
.await
.map_err(|error| ArchiveError::Query {
description: "searching match",
error,
})?
.into_iter()
.map(|row| row.id as u32)
.collect(),
)
}
async fn find_next_negative_match_id(
db: impl Executor<'_, Database = Postgres>,
) -> Result<i32, ArchiveError> {
let min = query!("SELECT MIN(id) as id FROM matches WHERE id < 0")
.fetch_optional(db)
.await
.map_err(|error| ArchiveError::Query {
description: "getting next negative match",
error,
})?
.map(|row| row.id.unwrap_or_default())
.unwrap_or_default();
Ok(min - 1)
}
} }
fn parse_old_match_date(date: &str) -> Result<Date, time::Error> { fn parse_old_match_date(date: &str) -> Result<Date, time::Error> {

View file

@ -3,15 +3,21 @@ use serde::de::DeserializeOwned;
use thiserror::Error; use thiserror::Error;
use ugc_scraper_types::{ use ugc_scraper_types::{
GameMode, MapHistory, MatchInfo, MembershipHistory, Player, RosterHistory, SteamID, Team, GameMode, MapHistory, MatchInfo, MembershipHistory, Player, RosterHistory, SteamID, Team,
TeamRosterData, TeamSeason, TeamSeasonMatch, Transaction, TeamMatches, TeamRosterData, Transaction,
}; };
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum UgcClientError { pub enum UgcClientError {
#[error("Error sending request to {endpoint:?}: {error:#}")] #[error("Error sending request to {endpoint:?}: {error:#}")]
Request { endpoint: Endpoint, error: Error }, Request { endpoint: Endpoint, error: Error },
#[error("Error parsing response from {endpoint:?}: {error:#}")] #[error("Error receiving response from {endpoint:?}: {error:#}")]
Response { endpoint: Endpoint, error: Error }, Response { endpoint: Endpoint, error: Error },
#[error("Error parsing response from {endpoint:?}: {error:#}, from: {text}")]
Decode {
endpoint: Endpoint,
error: serde_json::Error,
text: String,
},
#[error("{endpoint:?} not found")] #[error("{endpoint:?} not found")]
NotFound { endpoint: Endpoint }, NotFound { endpoint: Endpoint },
} }
@ -36,15 +42,23 @@ impl UgcClient {
&self, &self,
endpoint: Endpoint, endpoint: Endpoint,
) -> Result<T, UgcClientError> { ) -> Result<T, UgcClientError> {
self.client let text = self
.client
.get(endpoint.build_url(&self.api_url)) .get(endpoint.build_url(&self.api_url))
.send() .send()
.await .await
.map_err(|error| UgcClientError::Request { endpoint, error })? .map_err(|error| UgcClientError::Request { endpoint, error })?
.check_not_found(endpoint)? .check_not_found(endpoint)?
.json() .error_for_status()
.map_err(|error| UgcClientError::Response { endpoint, error })?
.text()
.await .await
.map_err(|error| UgcClientError::Response { endpoint, error }) .map_err(|error| UgcClientError::Response { endpoint, error })?;
serde_json::from_str(&text).map_err(|error| UgcClientError::Decode {
endpoint,
error,
text,
})
} }
pub async fn get_match(&self, id: u32) -> Result<MatchInfo, UgcClientError> { pub async fn get_match(&self, id: u32) -> Result<MatchInfo, UgcClientError> {
@ -61,7 +75,7 @@ impl UgcClient {
.map(|data| data.history) .map(|data| data.history)
} }
pub async fn get_team_matches(&self, id: u32) -> Result<TeamSeason, UgcClientError> { pub async fn get_team_matches(&self, id: u32) -> Result<TeamMatches, UgcClientError> {
self.send_request(Endpoint::TeamMatches { id }).await self.send_request(Endpoint::TeamMatches { id }).await
} }

View file

@ -13,7 +13,7 @@ use std::str::FromStr;
use std::time::Duration; use std::time::Duration;
use tokio::time::sleep; use tokio::time::sleep;
use tokio_stream::StreamExt; use tokio_stream::StreamExt;
use tracing::{error, info, span, warn, Level}; use tracing::{error, info, instrument, span, warn, Level};
use ugc_scraper_types::GameMode; use ugc_scraper_types::GameMode;
#[derive(Debug, Parser)] #[derive(Debug, Parser)]
@ -79,49 +79,61 @@ async fn archive_matches(client: &UgcClient, archive: &Archive) -> MainResult {
.await? .await?
.unwrap_or(MAYBE_FIRST_MATCH - 1) .unwrap_or(MAYBE_FIRST_MATCH - 1)
+ 1; + 1;
for id in next_match..=LAST_MATCH { for id in 200..=MAYBE_FIRST_MATCH {
let _span = span!(Level::INFO, "archive_match", id = id).entered(); archive_match(client, archive, id).await.ok();
match client.get_match(id).await.check_not_found() {
Ok(Some(match_data)) => {
info!("storing match");
archive.store_match(id, match_data).await?;
}
Ok(None) => {
warn!("match not found");
}
Err(e) => {
error!("error fetching match: {}", e);
}
}
sleep(Duration::from_millis(500)).await; sleep(Duration::from_millis(500)).await;
} }
Ok(()) Ok(())
} }
#[instrument(skip(client, archive))]
async fn archive_match(client: &UgcClient, archive: &Archive, id: u32) -> MainResult {
match client.get_match(id).await.check_not_found() {
Ok(Some(match_data)) => {
info!("storing match");
archive.store_match(id as i32, match_data).await?;
Ok(())
}
Ok(None) => {
warn!("match not found");
Ok(())
}
Err(e) => {
error!("error fetching match: {}", e);
Err(e.into())
}
}
}
async fn archive_teams(client: &UgcClient, archive: &Archive) -> MainResult { async fn archive_teams(client: &UgcClient, archive: &Archive) -> MainResult {
let range = archive.get_team_range().await?; let range = archive.get_team_range().await?;
let next_team = archive.get_last_team_id().await?.unwrap_or(range.start - 1) + 1; let next_team = archive.get_last_team_id().await?.unwrap_or(range.start - 1) + 1;
for id in next_team..=range.end { for id in next_team..=range.end {
let _span = span!(Level::INFO, "archive_team", id = id).entered(); archive_team(client, archive, id).await?;
match client.get_team(id).await.check_not_found() { sleep(Duration::from_millis(500)).await;
Ok(Some(team_data)) => { }
if team_data.format.is_tf2() { Ok(())
info!("storing team"); }
archive.store_team(id, &team_data).await?;
} else { #[instrument(skip(client, archive))]
info!("skipping non-tf2 team"); async fn archive_team(client: &UgcClient, archive: &Archive, id: u32) -> MainResult {
} match client.get_team(id).await.check_not_found() {
} Ok(Some(team_data)) => {
Ok(None) => { if team_data.format.is_tf2() {
warn!("team not found"); info!("storing team");
} archive.store_team(id, &team_data).await?;
Err(e) => { } else {
error!("error fetching team: {:?}", e); info!("skipping non-tf2 team");
panic!();
} }
} }
sleep(Duration::from_millis(500)).await; Ok(None) => {
warn!("team not found");
}
Err(e) => {
error!("error fetching team: {:?}", e);
panic!();
}
} }
Ok(()) Ok(())
} }
@ -215,24 +227,42 @@ async fn archive_map_history(client: &UgcClient, archive: &Archive, mode: GameMo
} }
async fn fixup_matches(client: &UgcClient, archive: &Archive) -> MainResult { async fn fixup_matches(client: &UgcClient, archive: &Archive) -> MainResult {
let mut match_ids = pin!(archive.get_match_ids_without_map()); let min_team = archive.get_min_team_id_without_match_seasons().await?;
let mut team_ids = pin!(archive.get_team_ids(min_team - 1));
while let Some(Ok(id)) = match_ids.next().await { while let Some(Ok(team_id)) = team_ids.next().await {
let _span = span!(Level::INFO, "fixup_match", id = id).entered(); let _span = span!(Level::INFO, "fixup_matches", team_id).entered();
let match_info = client.get_match(id).await?; let format = archive.get_team_format(team_id).await?;
let date = None; // archive.get_match_date(&match_info).await?; let matches = client.get_team_matches(team_id).await?;
if false && date.is_none() info!(
&& (match_info.format == GameMode::Highlander seasons = matches.seasons.len(),
|| match_info.format == GameMode::Sixes ?format,
|| match_info.format == GameMode::Fours "updating matches for team"
|| match_info.format == GameMode::Ultiduo) );
{
dbg!(match_info.default_date); for season in matches.seasons.iter() {
error!("failed to parse match date"); for season_match in season.matches.iter() {
panic!(); if let Some(match_id) = season_match.result.match_id() {
if !archive.has_match(match_id).await? {
warn!(match_id, "match not archived yet");
sleep(Duration::from_millis(500)).await;
if let Err(_) = archive_match(client, archive, match_id).await {
let match_info = season_match
.match_info(&matches.team, season.format)
.expect("failed to build match info");
assert_eq!(format, match_info.format);
info!("reconstructed match");
archive.store_match(match_id as i32, match_info).await?;
}
}
}
}
archive
.update_match_details_from_team_matches(&matches.team, format, season)
.await?;
} }
info!(date = ?date, format = %match_info.format, "updating match");
archive.update_match_details(id, &match_info, date).await?;
sleep(Duration::from_millis(500)).await; sleep(Duration::from_millis(500)).await;
} }
Ok(()) Ok(())

View file

@ -227,7 +227,11 @@ impl Parser for TeamMatchesParser {
score, score,
score_opponent, score_opponent,
}, },
(Some(opponent), _, _, None) => MatchResult::Unknown { opponent }, (Some(opponent), _, _, None) => MatchResult::Unknown {
opponent,
score,
score_opponent,
},
_ => MatchResult::ByeWeek, _ => MatchResult::ByeWeek,
}; };
Ok(TeamSeasonMatch { Ok(TeamSeasonMatch {

View file

@ -327,6 +327,12 @@ impl TeamSeasonMatch {
score, score,
score_opponent, score_opponent,
.. ..
}
| MatchResult::Unknown {
opponent,
score,
score_opponent,
..
} => { } => {
let (team_home, team_away, score_home, score_away) = if self.side == Side::Home { let (team_home, team_away, score_home, score_away) = if self.side == Side::Home {
(team.clone(), opponent.clone(), *score, *score_opponent) (team.clone(), opponent.clone(), *score, *score_opponent)
@ -372,6 +378,8 @@ pub enum MatchResult {
ByeWeek, ByeWeek,
Unknown { Unknown {
opponent: TeamRef, opponent: TeamRef,
score: u8,
score_opponent: u8,
}, },
} }
@ -382,6 +390,15 @@ impl MatchResult {
_ => None, _ => None,
} }
} }
pub fn opponents(&self) -> Option<&TeamRef> {
match self {
MatchResult::Played { opponent, .. }
| MatchResult::Pending { opponent, .. }
| MatchResult::Unknown { opponent, .. } => Some(opponent),
_ => None,
}
}
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]