add some retry logic

This commit is contained in:
Robin Appelman 2025-04-19 15:46:46 +02:00
commit 01ee397e59
8 changed files with 64 additions and 48 deletions

13
Cargo.lock generated
View file

@ -1860,9 +1860,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
dependencies = [ dependencies = [
"pin-project-lite", "pin-project-lite",
"tracing-attributes",
"tracing-core", "tracing-core",
] ]
[[package]]
name = "tracing-attributes"
version = "0.1.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "tracing-core" name = "tracing-core"
version = "0.1.33" version = "0.1.33"
@ -1892,6 +1904,7 @@ dependencies = [
"thiserror 2.0.12", "thiserror 2.0.12",
"time", "time",
"tokio", "tokio",
"tracing",
"ugc-scraper-types", "ugc-scraper-types",
] ]

View file

@ -16,6 +16,7 @@ time = { version = "0.3.41", features = ["parsing", "macros"] }
steamid-ng = "1.0.0" steamid-ng = "1.0.0"
ugc-scraper-types = { version = "0.2.0", path = "./types" } ugc-scraper-types = { version = "0.2.0", path = "./types" }
regex = "1.11.1" regex = "1.11.1"
tracing = "0.1.41"
[dev-dependencies] [dev-dependencies]
tokio = { version = "1.44.2", features = ["macros", "rt-multi-thread", "rt"] } tokio = { version = "1.44.2", features = ["macros", "rt-multi-thread", "rt"] }

1
api-server/Cargo.lock generated
View file

@ -1972,6 +1972,7 @@ dependencies = [
"thiserror 2.0.12", "thiserror 2.0.12",
"time", "time",
"tokio", "tokio",
"tracing",
"ugc-scraper-types", "ugc-scraper-types",
] ]

View file

@ -13,8 +13,11 @@ use crate::parser::{
}; };
pub use error::*; pub use error::*;
use reqwest::redirect::Policy; use reqwest::redirect::Policy;
use reqwest::{Client, Response, StatusCode}; use reqwest::{Client, IntoUrl, Response, StatusCode};
use std::time::Duration;
pub use steamid_ng::SteamID; pub use steamid_ng::SteamID;
use tokio::time::sleep;
use tracing::warn;
pub type Result<T, E = ScrapeError> = std::result::Result<T, E>; pub type Result<T, E = ScrapeError> = std::result::Result<T, E>;
@ -63,19 +66,38 @@ impl UgcClient {
map_history_parser: MapHistoryParser::new(), map_history_parser: MapHistoryParser::new(),
} }
} }
async fn request<U: IntoUrl>(&self, url: U) -> Result<String> {
let url = url.into_url()?;
match self.try_request(url.clone()).await {
Ok(res) => Ok(res),
Err(ScrapeError::Request(e)) => {
warn!(url = url.as_str(), error = ?e, "failed to send request, retrying");
sleep(Duration::from_secs_f32(0.5)).await;
self.try_request(url).await
}
Err(e) => Err(e),
}
}
async fn try_request<U: IntoUrl>(&self, url: U) -> Result<String> {
Ok(self
.client
.get(url)
.send()
.await?
.check_not_found()?
.error_for_status()?
.text()
.await?)
}
/// Retrieve player information /// Retrieve player information
pub async fn player(&self, steam_id: SteamID) -> Result<Player> { pub async fn player(&self, steam_id: SteamID) -> Result<Player> {
let body = self let body = self
.client .request(format!(
.get(format!(
"https://www.ugcleague.com/players_page.cfm?player_id={}", "https://www.ugcleague.com/players_page.cfm?player_id={}",
u64::from(steam_id) u64::from(steam_id)
)) ))
.send()
.await?
.check_not_found()?
.text()
.await?; .await?;
self.player_parser.parse(&body) self.player_parser.parse(&body)
} }
@ -83,15 +105,10 @@ impl UgcClient {
/// Retrieve team membership history for a player /// Retrieve team membership history for a player
pub async fn player_team_history(&self, steam_id: SteamID) -> Result<Vec<MembershipHistory>> { pub async fn player_team_history(&self, steam_id: SteamID) -> Result<Vec<MembershipHistory>> {
let body = self let body = self
.client .request(format!(
.get(format!(
"https://www.ugcleague.com/players_page_details.cfm?player_id={}", "https://www.ugcleague.com/players_page_details.cfm?player_id={}",
u64::from(steam_id) u64::from(steam_id)
)) ))
.send()
.await?
.check_not_found()?
.text()
.await?; .await?;
self.player_detail_parser.parse(&body) self.player_detail_parser.parse(&body)
} }
@ -99,14 +116,10 @@ impl UgcClient {
/// Retrieve team information /// Retrieve team information
pub async fn team(&self, id: u32) -> Result<Team> { pub async fn team(&self, id: u32) -> Result<Team> {
let body = self let body = self
.client .request(format!(
.get(format!(
"https://www.ugcleague.com/team_page.cfm?clan_id={}", "https://www.ugcleague.com/team_page.cfm?clan_id={}",
id id
)) ))
.send()
.await?
.text()
.await?; .await?;
self.team_parser.parse(&body) self.team_parser.parse(&body)
} }
@ -114,14 +127,10 @@ impl UgcClient {
/// Retrieve team roster history /// Retrieve team roster history
pub async fn team_roster_history(&self, id: u32) -> Result<TeamRosterData> { pub async fn team_roster_history(&self, id: u32) -> Result<TeamRosterData> {
let body = self let body = self
.client .request(format!(
.get(format!(
"https://www.ugcleague.com/team_page_rosterhistory.cfm?clan_id={}", "https://www.ugcleague.com/team_page_rosterhistory.cfm?clan_id={}",
id id
)) ))
.send()
.await?
.text()
.await?; .await?;
self.team_roster_history_parser.parse(&body) self.team_roster_history_parser.parse(&body)
} }
@ -129,27 +138,17 @@ impl UgcClient {
/// Retrieve team match history /// Retrieve team match history
pub async fn team_matches(&self, id: u32) -> Result<Vec<TeamSeason>> { pub async fn team_matches(&self, id: u32) -> Result<Vec<TeamSeason>> {
let body = self let body = self
.client .request(format!(
.get(format!(
"https://www.ugcleague.com/team_page_matches.cfm?clan_id={}", "https://www.ugcleague.com/team_page_matches.cfm?clan_id={}",
id id
)) ))
.send()
.await?
.text()
.await?; .await?;
self.team_matches_parser.parse(&body) self.team_matches_parser.parse(&body)
} }
/// Get all historical seasons by game mode /// Get all historical seasons by game mode
pub async fn previous_seasons(&self) -> Result<Vec<Seasons>> { pub async fn previous_seasons(&self) -> Result<Vec<Seasons>> {
let body = self let body = self.request("https://www.ugcleague.com").await?;
.client
.get("https://www.ugcleague.com")
.send()
.await?
.text()
.await?;
self.seasons_parser.parse(&body) self.seasons_parser.parse(&body)
} }
@ -158,22 +157,17 @@ impl UgcClient {
"https://www.ugcleague.com/team_lookup_tf2{}.cfm", "https://www.ugcleague.com/team_lookup_tf2{}.cfm",
format.letter() format.letter()
); );
let body = self.client.get(link).send().await?.text().await?; let body = self.request(link).await?;
self.team_lookup_parser.parse(&body) self.team_lookup_parser.parse(&body)
} }
/// Get match page info /// Get match page info
pub async fn match_info(&self, id: u32) -> Result<MatchInfo> { pub async fn match_info(&self, id: u32) -> Result<MatchInfo> {
let body = self let body = self
.client .request(format!(
.get(format!(
"https://www.ugcleague.com/matchpage_tf2h.cfm?mid={}", "https://www.ugcleague.com/matchpage_tf2h.cfm?mid={}",
id id
)) ))
.send()
.await?
.check_not_found()?
.text()
.await?; .await?;
self.match_page_parser.parse(&body) self.match_page_parser.parse(&body)
} }
@ -183,7 +177,7 @@ impl UgcClient {
"https://www.ugcleague.com/rostertransactions_tf2{}_all.cfm", "https://www.ugcleague.com/rostertransactions_tf2{}_all.cfm",
format.letter() format.letter()
); );
let body = self.client.get(link).send().await?.text().await?; let body = self.request(link).await?;
self.transaction_parser.parse(&body) self.transaction_parser.parse(&body)
} }
@ -192,7 +186,7 @@ impl UgcClient {
"https://www.ugcleague.com/maplist_tf2{}.cfm", "https://www.ugcleague.com/maplist_tf2{}.cfm",
format.letter() format.letter()
); );
let body = self.client.get(link).send().await?.text().await?; let body = self.request(link).await?;
self.map_history_parser.parse(&body) self.map_history_parser.parse(&body)
} }
} }

View file

@ -14,5 +14,9 @@ expression: parsed
"id": 7861 "id": 7861
}, },
"score_home": 4, "score_home": 4,
"score_away": 0 "score_away": 0,
"map": "koth_ashville_final",
"week": 1,
"format": "9v9",
"default_date": "Mon Oct 09"
} }

View file

@ -193,5 +193,6 @@ expression: parsed
"demoman", "demoman",
"soldier", "soldier",
"scout" "scout"
] ],
"country": null
} }

View file

@ -18,5 +18,6 @@ expression: parsed
} }
], ],
"teams": [], "teams": [],
"favorite_classes": [] "favorite_classes": [],
"country": null
} }

View file

@ -386,5 +386,6 @@ expression: parsed
"since": "2013-08-09" "since": "2013-08-09"
} }
], ],
"favorite_classes": [] "favorite_classes": [],
"country": null
} }