mirror of
https://codeberg.org/icewind/ugc-scaper.git
synced 2026-06-03 18:24:10 +02:00
add match page
This commit is contained in:
parent
caf376ac42
commit
668edc434a
9 changed files with 3614 additions and 7 deletions
2
api-server/Cargo.lock
generated
2
api-server/Cargo.lock
generated
|
|
@ -1743,8 +1743,6 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "ugc-scraper"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a65a605e079609b2f105c2f8930a0fb6786766fae5ef8776692cbb8a85b2b56"
|
||||
dependencies = [
|
||||
"reqwest",
|
||||
"scraper",
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ async fn main() -> MainResult {
|
|||
.route("/team/:id", get(team))
|
||||
.route("/team/:id/roster", get(team_roster))
|
||||
.route("/team/:id/matches", get(team_matches))
|
||||
.route("/match/:id", get(match_page))
|
||||
.with_state(AppState::default());
|
||||
|
||||
// run it
|
||||
|
|
@ -150,3 +151,13 @@ async fn team_matches(
|
|||
let response = state.client.team_matches(id).await?;
|
||||
Ok(Json(response))
|
||||
}
|
||||
|
||||
#[instrument(skip(state))]
|
||||
async fn match_page(
|
||||
Path(id): Path<u32>,
|
||||
State(state): State<AppState>,
|
||||
) -> Result<impl IntoResponse, ApiError> {
|
||||
debug!(team = id, "requesting match");
|
||||
let response = state.client.match_info(id).await?;
|
||||
Ok(Json(response))
|
||||
}
|
||||
|
|
|
|||
11
src/data.rs
11
src/data.rs
|
|
@ -189,3 +189,14 @@ pub struct Season {
|
|||
pub id: String,
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
pub struct MatchInfo {
|
||||
pub comment: Option<String>,
|
||||
pub comment_author: Option<String>,
|
||||
pub team_home: TeamRef,
|
||||
pub team_away: TeamRef,
|
||||
pub score_home: u8,
|
||||
pub score_away: u8,
|
||||
}
|
||||
|
|
|
|||
25
src/lib.rs
25
src/lib.rs
|
|
@ -3,10 +3,12 @@ mod error;
|
|||
#[doc(hidden)]
|
||||
pub mod parser;
|
||||
|
||||
use crate::data::{MembershipHistory, Player, RosterHistory, Seasons, Team, TeamRef, TeamSeason};
|
||||
use crate::data::{
|
||||
MatchInfo, MembershipHistory, Player, RosterHistory, Seasons, Team, TeamRef, TeamSeason,
|
||||
};
|
||||
use crate::parser::{
|
||||
Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser, TeamMatchesParser,
|
||||
TeamParser, TeamRosterHistoryParser,
|
||||
MatchPageParser, Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser,
|
||||
TeamMatchesParser, TeamParser, TeamRosterHistoryParser,
|
||||
};
|
||||
pub use error::*;
|
||||
use reqwest::redirect::Policy;
|
||||
|
|
@ -25,6 +27,7 @@ pub struct UgcClient {
|
|||
team_matches_parser: TeamMatchesParser,
|
||||
seasons_parser: SeasonsParser,
|
||||
team_lookup_parser: TeamLookupParser,
|
||||
match_page_parser: MatchPageParser,
|
||||
}
|
||||
|
||||
/// "API client" for ugc by scraping the website
|
||||
|
|
@ -39,6 +42,7 @@ impl UgcClient {
|
|||
team_matches_parser: TeamMatchesParser::new(),
|
||||
seasons_parser: SeasonsParser::new(),
|
||||
team_lookup_parser: TeamLookupParser::new(),
|
||||
match_page_parser: MatchPageParser::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -161,4 +165,19 @@ impl UgcClient {
|
|||
self.teams("https://www.ugcleague.com/team_lookup_tf22.cfm")
|
||||
.await
|
||||
}
|
||||
|
||||
/// Get match page info
|
||||
pub async fn match_info(&self, id: u32) -> Result<MatchInfo> {
|
||||
let body = self
|
||||
.client
|
||||
.get(&format!(
|
||||
"https://www.ugcleague.com/matchpage_tf2h.cfm?mid={}",
|
||||
id
|
||||
))
|
||||
.send()
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
self.match_page_parser.parse(&body)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
138
src/parser/match_page.rs
Normal file
138
src/parser/match_page.rs
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
use super::Parser;
|
||||
use crate::data::{MatchInfo, TeamRef};
|
||||
use crate::parser::{select_last_text, select_text, team_id_from_link, ElementExt};
|
||||
use crate::{ParseError, Result};
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
const SELECTOR_MATCH_COMMENT_AUTHOR: &str = ".row-fluid .col-md-12 span.text-success";
|
||||
const SELECTOR_MATCH_COMMENT: &str = ".row-fluid .col-md-12 > .white-row-light-small > p";
|
||||
const SELECTOR_MATCH_TEAM_LINK: &str = "a[href^=\"team_page\"]:not(.btn-large)";
|
||||
const SELECTOR_MATCH_RESULT_TEAM: &str =
|
||||
".table.table-condensed.table-bordered tr:nth-child(2) td:nth-child(1)";
|
||||
const SELECTOR_MATCH_RESULT_SCORE: &str =
|
||||
".table.table-condensed.table-bordered tr:nth-child(2) td:nth-child(2)";
|
||||
|
||||
pub struct MatchPageParser {
|
||||
selector_author: Selector,
|
||||
selector_comment: Selector,
|
||||
selector_team_link: Selector,
|
||||
selector_result_team: Selector,
|
||||
selector_result_score: Selector,
|
||||
}
|
||||
|
||||
impl Default for MatchPageParser {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl MatchPageParser {
|
||||
pub fn new() -> Self {
|
||||
MatchPageParser {
|
||||
selector_author: Selector::parse(SELECTOR_MATCH_COMMENT_AUTHOR).unwrap(),
|
||||
selector_comment: Selector::parse(SELECTOR_MATCH_COMMENT).unwrap(),
|
||||
selector_team_link: Selector::parse(SELECTOR_MATCH_TEAM_LINK).unwrap(),
|
||||
selector_result_team: Selector::parse(SELECTOR_MATCH_RESULT_TEAM).unwrap(),
|
||||
selector_result_score: Selector::parse(SELECTOR_MATCH_RESULT_SCORE).unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser for MatchPageParser {
|
||||
type Output = MatchInfo;
|
||||
|
||||
fn parse(&self, document: &str) -> Result<Self::Output> {
|
||||
let document = Html::parse_document(document);
|
||||
|
||||
let author = select_text(document.root_element(), &self.selector_author);
|
||||
let comment = select_last_text(document.root_element(), &self.selector_comment);
|
||||
|
||||
let mut team_links = document.select(&self.selector_team_link);
|
||||
let team_link_home = team_links.next().ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_MATCH_TEAM_LINK,
|
||||
role: "home team link",
|
||||
})?;
|
||||
let team_link_away = team_links.next().ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_MATCH_TEAM_LINK,
|
||||
role: "away team link",
|
||||
})?;
|
||||
let home_team_id = team_id_from_link(team_link_home.attr("href").unwrap_or_default())?;
|
||||
let away_team_id = team_id_from_link(team_link_away.attr("href").unwrap_or_default())?;
|
||||
|
||||
let mut team_names = document.select(&self.selector_result_team);
|
||||
let team_name_home = team_names
|
||||
.next()
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_MATCH_RESULT_TEAM,
|
||||
role: "home team link",
|
||||
})?
|
||||
.first_text()
|
||||
.ok_or(ParseError::EmptyText {
|
||||
role: "home team name",
|
||||
selector: SELECTOR_MATCH_RESULT_TEAM,
|
||||
})?
|
||||
.to_string();
|
||||
let team_name_away = team_names
|
||||
.next()
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_MATCH_RESULT_TEAM,
|
||||
role: "away team link",
|
||||
})?
|
||||
.first_text()
|
||||
.ok_or(ParseError::EmptyText {
|
||||
role: "away team name",
|
||||
selector: SELECTOR_MATCH_RESULT_TEAM,
|
||||
})?
|
||||
.to_string();
|
||||
|
||||
let mut team_scores = document.select(&self.selector_result_score);
|
||||
|
||||
let team_score_home = team_scores
|
||||
.next()
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_MATCH_RESULT_SCORE,
|
||||
role: "home team score",
|
||||
})?
|
||||
.first_text()
|
||||
.ok_or(ParseError::EmptyText {
|
||||
role: "home team score",
|
||||
selector: SELECTOR_MATCH_RESULT_SCORE,
|
||||
})?
|
||||
.parse()
|
||||
.map_err(|_| ParseError::InvalidText {
|
||||
role: "away team score",
|
||||
text: "dont have this".to_string(),
|
||||
})?;
|
||||
let team_score_away = team_scores
|
||||
.next()
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_MATCH_RESULT_SCORE,
|
||||
role: "away team link",
|
||||
})?
|
||||
.first_text()
|
||||
.ok_or(ParseError::EmptyText {
|
||||
role: "away team name",
|
||||
selector: SELECTOR_MATCH_RESULT_SCORE,
|
||||
})?
|
||||
.parse()
|
||||
.map_err(|_| ParseError::InvalidText {
|
||||
role: "home team score",
|
||||
text: "dont have this".to_string(),
|
||||
})?;
|
||||
|
||||
Ok(MatchInfo {
|
||||
comment_author: author.map(String::from),
|
||||
comment: comment.map(String::from),
|
||||
score_away: team_score_away,
|
||||
score_home: team_score_home,
|
||||
team_home: TeamRef {
|
||||
name: team_name_home.to_string(),
|
||||
id: home_team_id,
|
||||
},
|
||||
team_away: TeamRef {
|
||||
name: team_name_away.to_string(),
|
||||
id: away_team_id,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@ use steamid_ng::SteamID;
|
|||
use time::format_description::FormatItem;
|
||||
use time::macros::format_description;
|
||||
|
||||
mod match_page;
|
||||
mod player;
|
||||
mod player_details;
|
||||
mod seasons;
|
||||
|
|
@ -12,6 +13,7 @@ mod team_lookup;
|
|||
mod team_matches;
|
||||
mod team_roster_history;
|
||||
|
||||
pub use match_page::*;
|
||||
pub use player::*;
|
||||
pub use player_details::*;
|
||||
pub use seasons::*;
|
||||
|
|
|
|||
3402
tests/data/match_116246.html
Normal file
3402
tests/data/match_116246.html
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -1,8 +1,8 @@
|
|||
use insta::assert_json_snapshot;
|
||||
use std::fs::read_to_string;
|
||||
use ugc_scraper::parser::{
|
||||
Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser, TeamMatchesParser,
|
||||
TeamParser, TeamRosterHistoryParser,
|
||||
MatchPageParser, Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser,
|
||||
TeamMatchesParser, TeamParser, TeamRosterHistoryParser,
|
||||
};
|
||||
|
||||
#[test]
|
||||
|
|
@ -92,3 +92,11 @@ fn test_parse_seasons_2_html() {
|
|||
let parsed = parser.parse(&body).unwrap();
|
||||
assert_json_snapshot!(parsed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_match_html() {
|
||||
let body = read_to_string("tests/data/match_116246.html").unwrap();
|
||||
let parser = MatchPageParser::new();
|
||||
let parsed = parser.parse(&body).unwrap();
|
||||
assert_json_snapshot!(parsed);
|
||||
}
|
||||
|
|
|
|||
18
tests/snapshots/snapshot__parse_match_html.snap
Normal file
18
tests/snapshots/snapshot__parse_match_html.snap
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
---
|
||||
source: tests/snapshot.rs
|
||||
expression: parsed
|
||||
---
|
||||
{
|
||||
"comment": "https://logs.tf/3509421#76561198288857894\r\nhttps://logs.tf/3509435#76561198288857894",
|
||||
"comment_author": "Vkid E-sports:",
|
||||
"team_home": {
|
||||
"name": "Vkid E-Sports",
|
||||
"id": 32033
|
||||
},
|
||||
"team_away": {
|
||||
"name": "Xenon",
|
||||
"id": 7861
|
||||
},
|
||||
"score_home": 4,
|
||||
"score_away": 0
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue