mirror of
https://codeberg.org/icewind/ugc-scaper.git
synced 2026-06-03 18:24:10 +02:00
add match page
This commit is contained in:
parent
caf376ac42
commit
668edc434a
9 changed files with 3614 additions and 7 deletions
2
api-server/Cargo.lock
generated
2
api-server/Cargo.lock
generated
|
|
@ -1743,8 +1743,6 @@ dependencies = [
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ugc-scraper"
|
name = "ugc-scraper"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "3a65a605e079609b2f105c2f8930a0fb6786766fae5ef8776692cbb8a85b2b56"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"scraper",
|
"scraper",
|
||||||
|
|
|
||||||
|
|
@ -64,6 +64,7 @@ async fn main() -> MainResult {
|
||||||
.route("/team/:id", get(team))
|
.route("/team/:id", get(team))
|
||||||
.route("/team/:id/roster", get(team_roster))
|
.route("/team/:id/roster", get(team_roster))
|
||||||
.route("/team/:id/matches", get(team_matches))
|
.route("/team/:id/matches", get(team_matches))
|
||||||
|
.route("/match/:id", get(match_page))
|
||||||
.with_state(AppState::default());
|
.with_state(AppState::default());
|
||||||
|
|
||||||
// run it
|
// run it
|
||||||
|
|
@ -150,3 +151,13 @@ async fn team_matches(
|
||||||
let response = state.client.team_matches(id).await?;
|
let response = state.client.team_matches(id).await?;
|
||||||
Ok(Json(response))
|
Ok(Json(response))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[instrument(skip(state))]
|
||||||
|
async fn match_page(
|
||||||
|
Path(id): Path<u32>,
|
||||||
|
State(state): State<AppState>,
|
||||||
|
) -> Result<impl IntoResponse, ApiError> {
|
||||||
|
debug!(team = id, "requesting match");
|
||||||
|
let response = state.client.match_info(id).await?;
|
||||||
|
Ok(Json(response))
|
||||||
|
}
|
||||||
|
|
|
||||||
11
src/data.rs
11
src/data.rs
|
|
@ -189,3 +189,14 @@ pub struct Season {
|
||||||
pub id: String,
|
pub id: String,
|
||||||
pub name: String,
|
pub name: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||||
|
pub struct MatchInfo {
|
||||||
|
pub comment: Option<String>,
|
||||||
|
pub comment_author: Option<String>,
|
||||||
|
pub team_home: TeamRef,
|
||||||
|
pub team_away: TeamRef,
|
||||||
|
pub score_home: u8,
|
||||||
|
pub score_away: u8,
|
||||||
|
}
|
||||||
|
|
|
||||||
25
src/lib.rs
25
src/lib.rs
|
|
@ -3,10 +3,12 @@ mod error;
|
||||||
#[doc(hidden)]
|
#[doc(hidden)]
|
||||||
pub mod parser;
|
pub mod parser;
|
||||||
|
|
||||||
use crate::data::{MembershipHistory, Player, RosterHistory, Seasons, Team, TeamRef, TeamSeason};
|
use crate::data::{
|
||||||
|
MatchInfo, MembershipHistory, Player, RosterHistory, Seasons, Team, TeamRef, TeamSeason,
|
||||||
|
};
|
||||||
use crate::parser::{
|
use crate::parser::{
|
||||||
Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser, TeamMatchesParser,
|
MatchPageParser, Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser,
|
||||||
TeamParser, TeamRosterHistoryParser,
|
TeamMatchesParser, TeamParser, TeamRosterHistoryParser,
|
||||||
};
|
};
|
||||||
pub use error::*;
|
pub use error::*;
|
||||||
use reqwest::redirect::Policy;
|
use reqwest::redirect::Policy;
|
||||||
|
|
@ -25,6 +27,7 @@ pub struct UgcClient {
|
||||||
team_matches_parser: TeamMatchesParser,
|
team_matches_parser: TeamMatchesParser,
|
||||||
seasons_parser: SeasonsParser,
|
seasons_parser: SeasonsParser,
|
||||||
team_lookup_parser: TeamLookupParser,
|
team_lookup_parser: TeamLookupParser,
|
||||||
|
match_page_parser: MatchPageParser,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// "API client" for ugc by scraping the website
|
/// "API client" for ugc by scraping the website
|
||||||
|
|
@ -39,6 +42,7 @@ impl UgcClient {
|
||||||
team_matches_parser: TeamMatchesParser::new(),
|
team_matches_parser: TeamMatchesParser::new(),
|
||||||
seasons_parser: SeasonsParser::new(),
|
seasons_parser: SeasonsParser::new(),
|
||||||
team_lookup_parser: TeamLookupParser::new(),
|
team_lookup_parser: TeamLookupParser::new(),
|
||||||
|
match_page_parser: MatchPageParser::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -161,4 +165,19 @@ impl UgcClient {
|
||||||
self.teams("https://www.ugcleague.com/team_lookup_tf22.cfm")
|
self.teams("https://www.ugcleague.com/team_lookup_tf22.cfm")
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get match page info
|
||||||
|
pub async fn match_info(&self, id: u32) -> Result<MatchInfo> {
|
||||||
|
let body = self
|
||||||
|
.client
|
||||||
|
.get(&format!(
|
||||||
|
"https://www.ugcleague.com/matchpage_tf2h.cfm?mid={}",
|
||||||
|
id
|
||||||
|
))
|
||||||
|
.send()
|
||||||
|
.await?
|
||||||
|
.text()
|
||||||
|
.await?;
|
||||||
|
self.match_page_parser.parse(&body)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
138
src/parser/match_page.rs
Normal file
138
src/parser/match_page.rs
Normal file
|
|
@ -0,0 +1,138 @@
|
||||||
|
use super::Parser;
|
||||||
|
use crate::data::{MatchInfo, TeamRef};
|
||||||
|
use crate::parser::{select_last_text, select_text, team_id_from_link, ElementExt};
|
||||||
|
use crate::{ParseError, Result};
|
||||||
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
|
const SELECTOR_MATCH_COMMENT_AUTHOR: &str = ".row-fluid .col-md-12 span.text-success";
|
||||||
|
const SELECTOR_MATCH_COMMENT: &str = ".row-fluid .col-md-12 > .white-row-light-small > p";
|
||||||
|
const SELECTOR_MATCH_TEAM_LINK: &str = "a[href^=\"team_page\"]:not(.btn-large)";
|
||||||
|
const SELECTOR_MATCH_RESULT_TEAM: &str =
|
||||||
|
".table.table-condensed.table-bordered tr:nth-child(2) td:nth-child(1)";
|
||||||
|
const SELECTOR_MATCH_RESULT_SCORE: &str =
|
||||||
|
".table.table-condensed.table-bordered tr:nth-child(2) td:nth-child(2)";
|
||||||
|
|
||||||
|
pub struct MatchPageParser {
|
||||||
|
selector_author: Selector,
|
||||||
|
selector_comment: Selector,
|
||||||
|
selector_team_link: Selector,
|
||||||
|
selector_result_team: Selector,
|
||||||
|
selector_result_score: Selector,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for MatchPageParser {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MatchPageParser {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
MatchPageParser {
|
||||||
|
selector_author: Selector::parse(SELECTOR_MATCH_COMMENT_AUTHOR).unwrap(),
|
||||||
|
selector_comment: Selector::parse(SELECTOR_MATCH_COMMENT).unwrap(),
|
||||||
|
selector_team_link: Selector::parse(SELECTOR_MATCH_TEAM_LINK).unwrap(),
|
||||||
|
selector_result_team: Selector::parse(SELECTOR_MATCH_RESULT_TEAM).unwrap(),
|
||||||
|
selector_result_score: Selector::parse(SELECTOR_MATCH_RESULT_SCORE).unwrap(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Parser for MatchPageParser {
|
||||||
|
type Output = MatchInfo;
|
||||||
|
|
||||||
|
fn parse(&self, document: &str) -> Result<Self::Output> {
|
||||||
|
let document = Html::parse_document(document);
|
||||||
|
|
||||||
|
let author = select_text(document.root_element(), &self.selector_author);
|
||||||
|
let comment = select_last_text(document.root_element(), &self.selector_comment);
|
||||||
|
|
||||||
|
let mut team_links = document.select(&self.selector_team_link);
|
||||||
|
let team_link_home = team_links.next().ok_or(ParseError::ElementNotFound {
|
||||||
|
selector: SELECTOR_MATCH_TEAM_LINK,
|
||||||
|
role: "home team link",
|
||||||
|
})?;
|
||||||
|
let team_link_away = team_links.next().ok_or(ParseError::ElementNotFound {
|
||||||
|
selector: SELECTOR_MATCH_TEAM_LINK,
|
||||||
|
role: "away team link",
|
||||||
|
})?;
|
||||||
|
let home_team_id = team_id_from_link(team_link_home.attr("href").unwrap_or_default())?;
|
||||||
|
let away_team_id = team_id_from_link(team_link_away.attr("href").unwrap_or_default())?;
|
||||||
|
|
||||||
|
let mut team_names = document.select(&self.selector_result_team);
|
||||||
|
let team_name_home = team_names
|
||||||
|
.next()
|
||||||
|
.ok_or(ParseError::ElementNotFound {
|
||||||
|
selector: SELECTOR_MATCH_RESULT_TEAM,
|
||||||
|
role: "home team link",
|
||||||
|
})?
|
||||||
|
.first_text()
|
||||||
|
.ok_or(ParseError::EmptyText {
|
||||||
|
role: "home team name",
|
||||||
|
selector: SELECTOR_MATCH_RESULT_TEAM,
|
||||||
|
})?
|
||||||
|
.to_string();
|
||||||
|
let team_name_away = team_names
|
||||||
|
.next()
|
||||||
|
.ok_or(ParseError::ElementNotFound {
|
||||||
|
selector: SELECTOR_MATCH_RESULT_TEAM,
|
||||||
|
role: "away team link",
|
||||||
|
})?
|
||||||
|
.first_text()
|
||||||
|
.ok_or(ParseError::EmptyText {
|
||||||
|
role: "away team name",
|
||||||
|
selector: SELECTOR_MATCH_RESULT_TEAM,
|
||||||
|
})?
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
let mut team_scores = document.select(&self.selector_result_score);
|
||||||
|
|
||||||
|
let team_score_home = team_scores
|
||||||
|
.next()
|
||||||
|
.ok_or(ParseError::ElementNotFound {
|
||||||
|
selector: SELECTOR_MATCH_RESULT_SCORE,
|
||||||
|
role: "home team score",
|
||||||
|
})?
|
||||||
|
.first_text()
|
||||||
|
.ok_or(ParseError::EmptyText {
|
||||||
|
role: "home team score",
|
||||||
|
selector: SELECTOR_MATCH_RESULT_SCORE,
|
||||||
|
})?
|
||||||
|
.parse()
|
||||||
|
.map_err(|_| ParseError::InvalidText {
|
||||||
|
role: "away team score",
|
||||||
|
text: "dont have this".to_string(),
|
||||||
|
})?;
|
||||||
|
let team_score_away = team_scores
|
||||||
|
.next()
|
||||||
|
.ok_or(ParseError::ElementNotFound {
|
||||||
|
selector: SELECTOR_MATCH_RESULT_SCORE,
|
||||||
|
role: "away team link",
|
||||||
|
})?
|
||||||
|
.first_text()
|
||||||
|
.ok_or(ParseError::EmptyText {
|
||||||
|
role: "away team name",
|
||||||
|
selector: SELECTOR_MATCH_RESULT_SCORE,
|
||||||
|
})?
|
||||||
|
.parse()
|
||||||
|
.map_err(|_| ParseError::InvalidText {
|
||||||
|
role: "home team score",
|
||||||
|
text: "dont have this".to_string(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(MatchInfo {
|
||||||
|
comment_author: author.map(String::from),
|
||||||
|
comment: comment.map(String::from),
|
||||||
|
score_away: team_score_away,
|
||||||
|
score_home: team_score_home,
|
||||||
|
team_home: TeamRef {
|
||||||
|
name: team_name_home.to_string(),
|
||||||
|
id: home_team_id,
|
||||||
|
},
|
||||||
|
team_away: TeamRef {
|
||||||
|
name: team_name_away.to_string(),
|
||||||
|
id: away_team_id,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -4,6 +4,7 @@ use steamid_ng::SteamID;
|
||||||
use time::format_description::FormatItem;
|
use time::format_description::FormatItem;
|
||||||
use time::macros::format_description;
|
use time::macros::format_description;
|
||||||
|
|
||||||
|
mod match_page;
|
||||||
mod player;
|
mod player;
|
||||||
mod player_details;
|
mod player_details;
|
||||||
mod seasons;
|
mod seasons;
|
||||||
|
|
@ -12,6 +13,7 @@ mod team_lookup;
|
||||||
mod team_matches;
|
mod team_matches;
|
||||||
mod team_roster_history;
|
mod team_roster_history;
|
||||||
|
|
||||||
|
pub use match_page::*;
|
||||||
pub use player::*;
|
pub use player::*;
|
||||||
pub use player_details::*;
|
pub use player_details::*;
|
||||||
pub use seasons::*;
|
pub use seasons::*;
|
||||||
|
|
|
||||||
3402
tests/data/match_116246.html
Normal file
3402
tests/data/match_116246.html
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -1,8 +1,8 @@
|
||||||
use insta::assert_json_snapshot;
|
use insta::assert_json_snapshot;
|
||||||
use std::fs::read_to_string;
|
use std::fs::read_to_string;
|
||||||
use ugc_scraper::parser::{
|
use ugc_scraper::parser::{
|
||||||
Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser, TeamMatchesParser,
|
MatchPageParser, Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser,
|
||||||
TeamParser, TeamRosterHistoryParser,
|
TeamMatchesParser, TeamParser, TeamRosterHistoryParser,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
@ -92,3 +92,11 @@ fn test_parse_seasons_2_html() {
|
||||||
let parsed = parser.parse(&body).unwrap();
|
let parsed = parser.parse(&body).unwrap();
|
||||||
assert_json_snapshot!(parsed);
|
assert_json_snapshot!(parsed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_match_html() {
|
||||||
|
let body = read_to_string("tests/data/match_116246.html").unwrap();
|
||||||
|
let parser = MatchPageParser::new();
|
||||||
|
let parsed = parser.parse(&body).unwrap();
|
||||||
|
assert_json_snapshot!(parsed);
|
||||||
|
}
|
||||||
|
|
|
||||||
18
tests/snapshots/snapshot__parse_match_html.snap
Normal file
18
tests/snapshots/snapshot__parse_match_html.snap
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
---
|
||||||
|
source: tests/snapshot.rs
|
||||||
|
expression: parsed
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"comment": "https://logs.tf/3509421#76561198288857894\r\nhttps://logs.tf/3509435#76561198288857894",
|
||||||
|
"comment_author": "Vkid E-sports:",
|
||||||
|
"team_home": {
|
||||||
|
"name": "Vkid E-Sports",
|
||||||
|
"id": 32033
|
||||||
|
},
|
||||||
|
"team_away": {
|
||||||
|
"name": "Xenon",
|
||||||
|
"id": 7861
|
||||||
|
},
|
||||||
|
"score_home": 4,
|
||||||
|
"score_away": 0
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue