Episode: Update Episode, Season, and Series Name Parsing
Now getting json metadata and parsing that.
This commit is contained in:
parent
b070bde38d
commit
0e8ad8c44d
145
src/episode.cpp
145
src/episode.cpp
|
@ -166,51 +166,61 @@ namespace dropout_dl {
|
||||||
return size * nmemb;
|
return size * nmemb;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string episode::get_meta_data_json(const std::string& html_data) {
|
||||||
|
std::string data_start("window.Page = {");
|
||||||
|
char data_open = '{';
|
||||||
|
char data_close = '}';
|
||||||
|
char current_char;
|
||||||
|
// The current grouping depth. 1 because we only use it after we're inside the data brackets
|
||||||
|
int grouping_depth = 1;
|
||||||
|
for (int i = 0; i < html_data.size(); i++) {
|
||||||
|
if (substr_is(html_data, i, data_start)) {
|
||||||
|
i += data_start.size();
|
||||||
|
for (int j = 0; j + i < html_data.size(); j++) {
|
||||||
|
current_char = html_data[j + i];
|
||||||
|
if (current_char == data_open) {
|
||||||
|
grouping_depth++;
|
||||||
|
}
|
||||||
|
else if(current_char == data_close) {
|
||||||
|
grouping_depth--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (grouping_depth == 0) {
|
||||||
|
return(html_data.substr(i, j));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "ERROR";
|
||||||
|
}
|
||||||
|
|
||||||
// episode statics
|
// episode statics
|
||||||
std::string episode::get_series_name(const std::string& html_data) {
|
std::string episode::get_series_name(const std::string& meta_data) {
|
||||||
std::string series_title("series-title");
|
|
||||||
std::string open_a_tag("<a");
|
|
||||||
std::string close_tag(">");
|
|
||||||
std::string close_a("</a>");
|
|
||||||
|
|
||||||
for (int i = 0; i < html_data.size(); i++) {
|
|
||||||
if (substr_is(html_data, i, series_title)) {
|
|
||||||
for (int j = i + series_title.size(); j < html_data.size(); j++) {
|
|
||||||
if (html_data[j] == '\n' || html_data[j] == ' ' || html_data[j] == '\t') continue;
|
|
||||||
if (substr_is(html_data, j, open_a_tag)) {
|
|
||||||
for (int k = j + open_a_tag.size(); k < html_data.size(); k++) {
|
|
||||||
if (substr_is(html_data, k, close_tag)) {
|
|
||||||
k++;
|
|
||||||
for (int l = 0; l < html_data.size() - k; l++) {
|
|
||||||
if (substr_is(html_data, k + l, close_a)) {
|
|
||||||
return format_name_string(html_data.substr(k, l));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return "ERROR";
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string episode::get_episode_name(const std::string& html_data) {
|
|
||||||
int title_start = -1;
|
int title_start = -1;
|
||||||
std::string video_title("video-title");
|
std::string parent_title("\"parent\"");
|
||||||
std::string open_strong("<strong>");
|
std::string series_title_title("\"name\"");
|
||||||
std::string close_strong("</strong>");
|
for (int i = 0; i < meta_data.size(); i++) {
|
||||||
for (int i = 0; i < html_data.size(); i++) {
|
if (substr_is(meta_data, i, parent_title)) {
|
||||||
if (substr_is(html_data, i, video_title)) {
|
// Skip "VIDEO_TITLE", the following colon, and the opening quotation mark.
|
||||||
for (int j = i; j < html_data.size(); j++) {
|
i += parent_title.size() + 2;
|
||||||
if (substr_is(html_data, j, open_strong)) {
|
|
||||||
title_start = j + open_strong.size();
|
|
||||||
break;
|
int j;
|
||||||
}
|
for (j = 0; meta_data[i + j] != '}' && i + j < meta_data.size(); j++);
|
||||||
}
|
|
||||||
for (int j = 0; j < html_data.size() - title_start; j++) {
|
std::string series_data = meta_data.substr(i, j);
|
||||||
if (substr_is(html_data, title_start + j, close_strong)) {
|
|
||||||
return format_name_string(html_data.substr(title_start, j));
|
std::cout << "series_data: " << series_data << '\n';
|
||||||
|
|
||||||
|
for (j = 0; j < series_data.size(); j++) {
|
||||||
|
if (substr_is(series_data, j, series_title_title)) {
|
||||||
|
// Skip "name", the following colon, and the opening quotation mark.
|
||||||
|
j += series_title_title.size() + 2;
|
||||||
|
|
||||||
|
int k;
|
||||||
|
for (k = 0; j + k < series_data.size() && series_data[j + k] != '"'; k++);
|
||||||
|
|
||||||
|
return series_data.substr(j, k);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -218,22 +228,38 @@ namespace dropout_dl {
|
||||||
return "ERROR";
|
return "ERROR";
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string episode::get_episode_number(const std::string& html_data) {
|
std::string episode::get_season_name(const std::string& meta_data) {
|
||||||
std::string episode("Episode");
|
std::string season_title_title("\"COLLECTION_TITLE\"");
|
||||||
std::string close_a("</a>");
|
for (int i = 0; i < meta_data.size(); i++) {
|
||||||
std::string episode_num;
|
if (substr_is(meta_data, i, season_title_title)) {
|
||||||
for (int i = 0; i < html_data.size(); i++) {
|
// Skip "VIDEO_TITLE", the following colon, and the opening quotation mark.
|
||||||
if (substr_is(html_data, i, episode)) {
|
i += season_title_title.size() + 2;
|
||||||
for (int j = i + 8; j < html_data.size(); j++) {
|
|
||||||
if (html_data[j] == '\n' || html_data[j] == ' ' || html_data[j] == '\t') continue;
|
|
||||||
if (substr_is(html_data, j, close_a)) {
|
int j;
|
||||||
return episode_num;
|
for (j = 0; meta_data[i + j] != '"' && i + j < meta_data.size(); j++);
|
||||||
}
|
|
||||||
episode_num += html_data[j];
|
return meta_data.substr(i, j);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return "ERROR";
|
||||||
}
|
}
|
||||||
return "-1";
|
|
||||||
|
std::string episode::get_episode_name(const std::string& meta_data) {
|
||||||
|
std::string video_title_title("\"VIDEO_TITLE\"");
|
||||||
|
for (int i = 0; i < meta_data.size(); i++) {
|
||||||
|
if (substr_is(meta_data, i, video_title_title)) {
|
||||||
|
// Skip "VIDEO_TITLE", the following colon, and the opening quotation mark.
|
||||||
|
i += video_title_title.size() + 2;
|
||||||
|
|
||||||
|
|
||||||
|
int j;
|
||||||
|
for (j = 0; meta_data[i + j] != '"' && i + j < meta_data.size(); j++);
|
||||||
|
|
||||||
|
return meta_data.substr(i, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "ERROR";
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string episode::get_embed_url(const std::string& html_data) {
|
std::string episode::get_embed_url(const std::string& html_data) {
|
||||||
|
@ -459,8 +485,6 @@ namespace dropout_dl {
|
||||||
if(curl) {
|
if(curl) {
|
||||||
std::string out;
|
std::string out;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
curl_easy_setopt(curl, CURLOPT_URL, get_video_url(quality).c_str());
|
curl_easy_setopt(curl, CURLOPT_URL, get_video_url(quality).c_str());
|
||||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, dropout_dl::WriteCallback);
|
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, dropout_dl::WriteCallback);
|
||||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &out);
|
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &out);
|
||||||
|
@ -478,10 +502,7 @@ namespace dropout_dl {
|
||||||
|
|
||||||
void episode::download(const std::string& quality, const std::string& series_directory, std::string filename) {
|
void episode::download(const std::string& quality, const std::string& series_directory, std::string filename) {
|
||||||
if (filename.empty()) {
|
if (filename.empty()) {
|
||||||
filename = "E" + (this->episode_number.size() < 2 ? "0" + this->episode_number : this->episode_number) + this->name +
|
filename = this->series + " - " + this->season + " - " + this->name + ".mp4";
|
||||||
".mp4";
|
|
||||||
|
|
||||||
filename = format_filename(filename);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (quality == "all") {
|
if (quality == "all") {
|
||||||
|
|
|
@ -256,10 +256,12 @@ namespace dropout_dl {
|
||||||
std::string series;
|
std::string series;
|
||||||
/// The directory for the series
|
/// The directory for the series
|
||||||
std::string series_directory;
|
std::string series_directory;
|
||||||
|
/// The name of the season that the episode belongs to
|
||||||
|
std::string season;
|
||||||
|
/// The json metadata of the episode
|
||||||
|
std::string metadata;
|
||||||
/// The name of the episode
|
/// The name of the episode
|
||||||
std::string name;
|
std::string name;
|
||||||
/// The number of the episode in the season. This can be a number or a string
|
|
||||||
std::string episode_number;
|
|
||||||
/// The url for the main episode page
|
/// The url for the main episode page
|
||||||
std::string episode_url;
|
std::string episode_url;
|
||||||
/// The data of the main episode page
|
/// The data of the main episode page
|
||||||
|
@ -292,33 +294,43 @@ namespace dropout_dl {
|
||||||
*/
|
*/
|
||||||
static std::string get_episode_page(const std::string& url, const std::string& auth_cookie, const std::string& session_cookie, bool verbose = false);
|
static std::string get_episode_page(const std::string& url, const std::string& auth_cookie, const std::string& session_cookie, bool verbose = false);
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param html_data - Episode page data
|
||||||
|
* @return The json data for the episode
|
||||||
|
*
|
||||||
|
* Gets the json metadata of the episode
|
||||||
|
*/
|
||||||
|
static std::string get_meta_data_json(const std::string& html_data);
|
||||||
|
|
||||||
// Parsing
|
// Parsing
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @param html_data - Episode page data
|
* @param meta_data - Episode metadata in json format
|
||||||
* @return The name of the series
|
* @return The name of the series
|
||||||
*
|
*
|
||||||
* Get the name of the series from the episode page
|
* Get the name of the series from the metadata
|
||||||
*/
|
*/
|
||||||
static std::string get_series_name(const std::string& html_data);
|
static std::string get_series_name(const std::string& meta_data);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @param html_data - Episode page data
|
* @param meta_data - Episode metadata in json format
|
||||||
|
* @return The name of the season
|
||||||
|
*
|
||||||
|
* Get the name of the season from the metadata
|
||||||
|
*/
|
||||||
|
static std::string get_season_name(const std::string& meta_data);
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param meta_data - Episode metadata in json format
|
||||||
* @return The name of the episode
|
* @return The name of the episode
|
||||||
*
|
*
|
||||||
* Get the name of the episode from the episode page
|
* Get the name of the episode from the metadata
|
||||||
*/
|
*/
|
||||||
static std::string get_episode_name(const std::string& html_data);
|
static std::string get_episode_name(const std::string& meta_data);
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param html_data - Episode page data
|
|
||||||
* @return The number of the episode
|
|
||||||
*
|
|
||||||
* Get the number of the episode from the episode page
|
|
||||||
*/
|
|
||||||
static std::string get_episode_number(const std::string& html_data);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
@ -393,7 +405,17 @@ namespace dropout_dl {
|
||||||
|
|
||||||
episode_data = get_episode_page(episode_url, cookies[0].value, cookies[1].value);
|
episode_data = get_episode_page(episode_url, cookies[0].value, cookies[1].value);
|
||||||
|
|
||||||
name = get_episode_name(episode_data);
|
if (verbose) {
|
||||||
|
std::cout << "Got page data\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata = get_meta_data_json(episode_data);
|
||||||
|
|
||||||
|
if (verbose) {
|
||||||
|
std::cout << "Got episode metadata: " << metadata << '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
name = get_episode_name(metadata);
|
||||||
|
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
std::cout << "Got name: " << name << '\n';
|
std::cout << "Got name: " << name << '\n';
|
||||||
|
@ -404,18 +426,18 @@ namespace dropout_dl {
|
||||||
exit(6);
|
exit(6);
|
||||||
}
|
}
|
||||||
|
|
||||||
this->episode_number = get_episode_number(episode_data);
|
this->series = get_series_name(metadata);
|
||||||
|
|
||||||
if (verbose) {
|
|
||||||
std::cout << "Got episode: " << this->episode_number << '\n';
|
|
||||||
}
|
|
||||||
|
|
||||||
this->series = get_series_name(episode_data);
|
|
||||||
|
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
std::cout << "Got series: " << this->series << '\n';
|
std::cout << "Got series: " << this->series << '\n';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this->season = get_season_name(metadata);
|
||||||
|
|
||||||
|
if (verbose) {
|
||||||
|
std::cout << "Got season: " << this->season << '\n';
|
||||||
|
}
|
||||||
|
|
||||||
this->series_directory = format_filename(this->series);
|
this->series_directory = format_filename(this->series);
|
||||||
|
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
|
|
|
@ -289,7 +289,7 @@ std::vector<dropout_dl::cookie> get_cookies_from_chrome(const std::filesystem::p
|
||||||
*/
|
*/
|
||||||
std::vector<dropout_dl::cookie> get_cookies(bool verbose = false) {
|
std::vector<dropout_dl::cookie> get_cookies(bool verbose = false) {
|
||||||
|
|
||||||
std::filesystem::path firefox_profile("_firefox_profile");
|
std::filesystem::path firefox_profile("firefox_profile");
|
||||||
std::filesystem::path chrome_profile("chrome_profile");
|
std::filesystem::path chrome_profile("chrome_profile");
|
||||||
|
|
||||||
if (std::filesystem::exists(firefox_profile)) {
|
if (std::filesystem::exists(firefox_profile)) {
|
||||||
|
@ -369,10 +369,6 @@ int main(int argc, char** argv) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (options.filename.empty()) {
|
|
||||||
options.filename = dropout_dl::format_filename(ep.name + ".mp4");
|
|
||||||
}
|
|
||||||
|
|
||||||
ep.download(options.quality, options.output_directory, options.filename);
|
ep.download(options.quality, options.output_directory, options.filename);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,7 @@ namespace dropout_dl {
|
||||||
if (e.episode_url.empty()) {
|
if (e.episode_url.empty()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
std::cout << '\t' << e.episode_number << ": " << e.name << '\n';
|
std::cout << '\t' << e.name << '\n';
|
||||||
out.push_back(e);
|
out.push_back(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue