From fc5cb4367c2b9cdc8a31b521168d96401e76dd6b Mon Sep 17 00:00:00 2001 From: Moss Date: Fri, 30 Sep 2022 01:13:18 -0400 Subject: [PATCH] Episode: Added Episode Documentation --- src/episode.cpp | 104 ++++--------------- src/episode.h | 266 +++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 248 insertions(+), 122 deletions(-) diff --git a/src/episode.cpp b/src/episode.cpp index d495520..8a50378 100644 --- a/src/episode.cpp +++ b/src/episode.cpp @@ -46,9 +46,8 @@ namespace dropout_dl { long current_time; long last_progress_timestamp; - std::string curl_filename; - int curl_progress_func(void* ptr, double total_to_download, double downloaded, double total_to_upload, double uploaded) { + int curl_progress_func(void* filename, double total_to_download, double downloaded, double total_to_upload, double uploaded) { const double number_chars = 50; const char* full_character = "▓"; const char* empty_character = "░"; @@ -57,7 +56,7 @@ namespace dropout_dl { if (current_time - 50 > last_progress_timestamp) { double percent_done = (downloaded / total_to_download) * number_chars; double percent_done_clone = percent_done; - std::cout << curl_filename << " ["; + std::cout << *(std::string*)filename << " ["; while (percent_done_clone-- > 0) { std::cout << full_character; } @@ -290,70 +289,6 @@ namespace dropout_dl { return episode_data; } - std::string episode::get_embedded_page(const std::string& url, const std::string& cookie, bool verbose) { - CURLcode ret; - CURL *hnd; - struct curl_slist *slist1; - std::string embedded_page; - - slist1 = NULL; - slist1 = curl_slist_append(slist1, "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:101.0) Gecko/20100101 Firefox/101.0"); - slist1 = curl_slist_append(slist1, "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"); - slist1 = curl_slist_append(slist1, "Accept-Language: en-US,en;q=0.5"); - slist1 = curl_slist_append(slist1, "Accept-Encoding: utf-8"); - slist1 = curl_slist_append(slist1, "DNT: 1"); - slist1 = curl_slist_append(slist1, "Connection: keep-alive"); - slist1 = curl_slist_append(slist1, "Referer: https://www.dropout.tv/"); - slist1 = curl_slist_append(slist1, "Cookie: __cf_bm=Ayc3uSgUEf9kJ20sfVBLgdo5fvloLmSLWBkJtzzhZR8-1662831290-0-ASVO2Fg9txI6nslt2tle7Y2MjRw4sI8/gFRbMDI8vHIP0nhb1SDk1I7lF5hWK9RMGP9wOFJwyqThLXQkuTj9m2c="); - slist1 = curl_slist_append(slist1, "Upgrade-Insecure-Requests: 1"); - slist1 = curl_slist_append(slist1, "Sec-Fetch-Dest: iframe"); - slist1 = curl_slist_append(slist1, "Sec-Fetch-Mode: navigate"); - slist1 = curl_slist_append(slist1, "Sec-Fetch-Site: cross-site"); - slist1 = curl_slist_append(slist1, "Sec-GPC: 1"); - - hnd = curl_easy_init(); - curl_easy_setopt(hnd, CURLOPT_BUFFERSIZE, 102400L); - curl_easy_setopt(hnd, CURLOPT_URL, url.c_str()); - curl_easy_setopt(hnd, CURLOPT_NOPROGRESS, 1L); - curl_easy_setopt(hnd, CURLOPT_HTTPHEADER, slist1); - curl_easy_setopt(hnd, CURLOPT_USERAGENT, "curl/7.84.0"); - curl_easy_setopt(hnd, CURLOPT_MAXREDIRS, 50L); - curl_easy_setopt(hnd, CURLOPT_HTTP_VERSION, (long)CURL_HTTP_VERSION_2TLS); - curl_easy_setopt(hnd, CURLOPT_FTP_SKIP_PASV_IP, 1L); - curl_easy_setopt(hnd, CURLOPT_TCP_KEEPALIVE, 1L); - curl_easy_setopt(hnd, CURLOPT_VERBOSE, verbose); - - - curl_easy_setopt(hnd, CURLOPT_WRITEFUNCTION, WriteCallback); - curl_easy_setopt(hnd, CURLOPT_WRITEDATA, &embedded_page); - /* Here is a list of options the curl code used that cannot get generated - as source easily. You may choose to either not use them or implement - them yourself. - - CURLOPT_WRITEDATA set to a objectpointer - CURLOPT_INTERLEAVEDATA set to a objectpointer - CURLOPT_WRITEFUNCTION set to a functionpointer - CURLOPT_READDATA set to a objectpointer - CURLOPT_READFUNCTION set to a functionpointer - CURLOPT_SEEKDATA set to a objectpointer - CURLOPT_SEEKFUNCTION set to a functionpointer - CURLOPT_ERRORBUFFER set to a objectpointer - CURLOPT_STDERR set to a objectpointer - CURLOPT_HEADERFUNCTION set to a functionpointer - CURLOPT_HEADERDATA set to a objectpointer - - */ - - ret = curl_easy_perform(hnd); - - curl_easy_cleanup(hnd); - hnd = NULL; - curl_slist_free_all(slist1); - slist1 = NULL; - - return embedded_page; - } - std::string get_generic_page(const std::string& url, bool verbose) { CURL *hnd; struct curl_slist *slist1; @@ -445,11 +380,11 @@ namespace dropout_dl { std::cout << "Got " << this->name << " cookie\n"; } - this->str = tmp; + this->value = tmp; } void cookie::format_from_chrome() { - this->str = this->str.substr(3); + this->value = this->value.substr(3); this->len -= 3; } @@ -481,7 +416,7 @@ namespace dropout_dl { gcry_cipher_setiv(handle, (const void*)&iv, 16); - unsigned long err = gcry_cipher_decrypt(handle, output, this->len, this->str.c_str(), this->len); + unsigned long err = gcry_cipher_decrypt(handle, output, this->len, this->value.c_str(), this->len); if (err) { std::cout << gcry_strerror(err) << std::endl; @@ -489,25 +424,28 @@ namespace dropout_dl { } - this->str = output; - this->str = this->str.substr(0, this->len-7); + this->value = output; + + this->url_decode(); + + this->value = this->value.substr(0, this->len - 7); this->len -= 7; } void cookie::url_decode() { std::string out; - for (int i = 0; i < this->str.size() - 3; i++) { - if (substr_is(this->str, i, "%3D")) { + for (int i = 0; i < this->value.size() - 3; i++) { + if (substr_is(this->value, i, "%3D")) { out += "="; i += 2; } else { - out += this->str[i]; + out += this->value[i]; } } - this->str = out; + this->value = out; this->len = out.size(); } @@ -573,7 +511,7 @@ namespace dropout_dl { exit(6); } - std::string episode::get_video_data(const std::string &quality) { + std::string episode::get_video_data(const std::string &quality, const std::string& filename) { CURL* curl = curl_easy_init(); CURLcode res; if(curl) { @@ -586,6 +524,7 @@ namespace dropout_dl { curl_easy_setopt(curl, CURLOPT_WRITEDATA, &out); curl_easy_setopt(curl, CURLOPT_NOPROGRESS, false); curl_easy_setopt(curl, CURLOPT_PROGRESSFUNCTION, dropout_dl::curl_progress_func); + curl_easy_setopt(curl, CURLOPT_PROGRESSDATA, &filename); res = curl_easy_perform(curl); curl_easy_cleanup(curl); @@ -597,8 +536,7 @@ namespace dropout_dl { void episode::download(const std::string& quality, const std::string& series_directory, std::string filename) { if (filename.empty()) { - filename = "S" + (this->season_number.size() < 2 ? "0" + this->season_number : this->season_number) + "E" + - (this->episode_number.size() < 2 ? "0" + this->episode_number : this->episode_number) + this->name + + filename = "E" + (this->episode_number.size() < 2 ? "0" + this->episode_number : this->episode_number) + this->name + ".mp4"; std::replace(filename.begin(), filename.end(), ' ', '_'); @@ -618,9 +556,7 @@ namespace dropout_dl { std::fstream out(series_directory + "/" + possible_quality + "/" + filename, std::ios_base::in | std::ios_base::out | std::ios_base::trunc); - curl_filename = series_directory + "/" + possible_quality + "/" + filename; - - out << this->get_video_data(possible_quality) << std::endl; + out << this->get_video_data(possible_quality, series_directory + "/" + possible_quality + "/" + filename) << std::endl; } } else { if (!std::filesystem::is_directory(series_directory)) { @@ -633,9 +569,7 @@ namespace dropout_dl { std::fstream out(series_directory + "/" + filename, std::ios_base::in | std::ios_base::out | std::ios_base::trunc); - curl_filename = series_directory + "/" + filename; - - out << this->get_video_data(quality) << std::endl; + out << this->get_video_data(quality, series_directory + "/" + filename) << std::endl; } } } // dropout_dl \ No newline at end of file diff --git a/src/episode.h b/src/episode.h index a501cf4..8b9d805 100644 --- a/src/episode.h +++ b/src/episode.h @@ -18,10 +18,20 @@ namespace dropout_dl { class cookie { public: + /** + * + * @param data - A string to write to return value of the command to + * @param argc - The number of results from the command + * @param argv - The results of the command + * @param azColName - The column names from the command + * @return 0 on success or -1 on failure + * + * Used by sqlite. Write the first result of the sqlite command to the data string. If there are no results print an error and return -1. + */ static int sqlite_write_callback(void* data, int argc, char** argv, char** azColName) { if (argc < 1) { - std::cerr << "ERROR: sqlite could not find dropout.tv cookie" << std::endl; + std::cerr << "SQLITE ERROR: sqlite could not find desired cookie" << std::endl; return -1; } else { @@ -30,55 +40,69 @@ namespace dropout_dl { } } - static int sqlite_write_callback_uchar(void* data, int argc, char** argv, char** azColName) - { - if (argc < 1) { - std::cerr << "ERROR: sqlite could not find dropout.tv cookie" << std::endl; - return -1; - } - else { - - auto* ck = (dropout_dl::cookie*)data; - - for (int i = 0; i < ck->len; i++) { - if (argv[0][i] > 32 && argv[0][i] < 126) { - std::cout << (unsigned char) argv[0][i] << ' '; - } - else { - std::cout << std::hex << ((int)argv[0][i] & 0xFF) << ' '; - } - ck->str[i] = (unsigned char)argv[0][i]; - } - return 0; - } - } - - + /** + * The name of the value from the sqlite database or "?" if not set. + */ std::string name; - std::string str; + /** + * The value of the cookie + */ + std::string value; + /** + * The length of the value of the cookie + */ int len; + /** + * + * @param name - Name of the value from the sqlite database + * + * Create a cookie with no value and length of 0 + */ explicit cookie(const std::string& name) { this->name = name; + this->value = ""; this->len = 0; } + /** + * + * @param name - Name of the value from the sqlite database + * @param cookie - Value of the cookie + * + * Sets the name and value using the parameters and gets the length from the value + */ cookie(const std::string& name, const std::string& cookie) { this->name = name; - this->str = cookie; + this->value = cookie; this->len = cookie.size(); } + /** + * + * @param cookie - Value of the cookie + * @param length - Length of the cookie + * + * Sets the value and length using the parameters and sets the name as "?" + */ cookie(const std::string& cookie, int length) { - this->str = cookie; + this->value = cookie; this->name = "?"; this->len = length; } + /** + * + * @param name - Name of the value from the sqlite database + * @param cookie - Value of the cookie + * @param length - Length of the cookie + * + * Sets the name, value, and length using the parameters leaving nothing unset. + */ cookie(const std::string& name, const std::string& cookie, int length) { this->name = name; - this->str = cookie; + this->value = cookie; this->len = length; } @@ -88,27 +112,54 @@ namespace dropout_dl { * @param sql_query_base - A base without the name search e.g. "FROM cookies" this function would then append the text "SELECT " and "WHERE name=''" * @param value - The name of the value to fill the cookie with * + * Retrieve the value of a cookie from the provided sqlite database. * */ void get_value_from_db(sqlite3* db, const std::string& sql_query_base, const std::string& value, bool verbose = false, int (*callback)(void*,int,char**,char**) = sqlite_write_callback); - void format_from_chrome(); - /** * * @param password - Default is "peanuts". This works for linux. The password should be keychain password on MacOS * @param salt - Salt is "saltysalt" for both MacOS and Linux * @param length - Length of 16 is standard for both MacOS and Linux * @param iterations - 1 on linux and 1003 on MacOS + * + * Decrypt chrome cookies and format them to be usable as regular cookies. Currently this has only been tested for the _session and __cf_bm cookies from dropout.tv but I see no reason this would not work for anything else. */ void chrome_decrypt(const std::string& password = "peanuts", int iterations = 1, const std::string& salt = "saltysalt", int length = 16); + private: + /** + * Remove url encoded text from a cookie. Currently this only checks for %3D ('=') as that is the only thing I've come across in cookies during the entirety of this project. + */ void url_decode(); + + /** + * Remove the leading version (e.g. "v10") from the cookie + */ + void format_from_chrome(); }; + /** + * + * @param string - The string which is being searched + * @param start - The starting index of the substring + * @param test_str - The string which is being tested + * @return whether or not the substring is at the start index + * + * Checks if test_str is a substring of string at the start index + */ bool substr_is(const std::string& string, int start, const std::string& test_str); + /** + * + * @param str - The base string which is being modified + * @param from - what is being replaced + * @param to - what to place it with + * + * Replaces every instance of the from string with the to string. + */ void replace_all(std::string& str, const std::string& from, const std::string& to); #if defined(__WIN32__) @@ -116,67 +167,205 @@ namespace dropout_dl { msec_t time_ms(void); #else #include + /** + * + * @return The time in milliseconds + */ long time_ms(); #endif - static int curl_progress_func(void* ptr, double total_to_download, double downloaded, double total_to_upload, double uploaded); + /** + * + * @param filename - Name of the file that is being downloaded + * @param total_to_download - The total amount of bytes that are being downloaded + * @param downloaded - The current amount of bytes that have been downloaded + * @param total_to_upload - The total amount of bytes that are being uploaded (This project does not upload so this is not used) + * @param uploaded - The current amount of bytes that have been uploaded (This project does not upload so this is not used) + * @return 0 + * + * Used by curl. Displays the filename followed by a bar which show the percent downloaded followed by the number of Mib downloaded out of the total. + * The function takes the upload amount because that is required by curl but they are just ignored for this since we never upload anything. + */ + static int curl_progress_func(void* filename, double total_to_download, double downloaded, double total_to_upload, double uploaded); + /** + * + * @param contents - What we're writing + * @param size - The amount that is being written + * @param nmemb - The number of bytes per unit of size + * @param userp - Where the information in contents is written to + * @return size * nmemb + * + * Used by curl. Writes the information gathered by curl into the userp string. This function was not written by me. + */ size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *userp); + /** + * + * @param url - Url which is being downloaded + * @param verbose - Whether or not to be verbose (not recommended) + * @return The page data as a string + * + * This function downloads the provided url and returns it as a string. Does not use cookies. This was ripped directly from a firefox network request for an episode page and modified minimally. + */ std::string get_generic_page(const std::string& url, bool verbose = false); + /** + * A class for handling all episode information. This class is wildly overkill if downloading an entire series as it gather the series name and season for every episode. This is not an issue here because all the information it gathers it already available while gathering the video url and the majority of the time taken while parsing an episode is from downloading the three required webpages. + */ class episode { public: + /// The name of the series that the episode belongs to std::string series; + /// The name of the episode std::string name; + /// The number of the episode in the season. This can be a number or a string std::string episode_number; + /// The number of the season. This can be a number or a string std::string season_number; + /// The url for the main episode page std::string episode_url; + /// The data of the main episode page std::string episode_data; + /// The url for the main embedded page. This contains page the link to the config page std::string embedded_url; + /// The data of the main embedded page. This contains the link to the config page std::string embedded_page_data; + /// The url for the main config page. This contains page the link to the mp4 video of the episode std::string config_url; + /// The data of the main config page. This contains the link to the mp4 video of the episode std::string config_data; + /// The list of the qualities available for the episode. This is a parallel array with the quality_urls vector std::vector qualities; + /// The list of the urls correlating with the qualities array. std::vector quality_urls; + /// Whether or not to be verbose bool verbose = false; // Curl + /** + * + * @param url - The url of the episode page + * @param auth_cookie - The authentication cookie with name "__cf_bm" + * @param session_cookie - The session cookie with name "_session" + * @param verbose - Whether or not to be verbose (not recommended) + * @return The episode page data + */ static std::string get_episode_page(const std::string& url, const std::string& auth_cookie, const std::string& session_cookie, bool verbose = false); - static std::string get_embedded_page(const std::string& url, const std::string& cookie, bool verbose = false); - // Parsing + /** + * + * @param html_data - Episode page data + * @return The name of the series + * + * Get the name of the series from the episode page + */ static std::string get_series_name(const std::string& html_data); + /** + * + * @param html_data - Episode page data + * @return The name of the episode + * + * Get the name of the episode from the episode page + */ static std::string get_episode_name(const std::string& html_data); + /** + * + * @param html_data - Episode page data + * @return The number of the episode + * + * Get the number of the episode from the episode page + */ static std::string get_episode_number(const std::string& html_data); + /** + * + * @param html_data - Episode page data + * @return The season number + * + * Get the season number from the episode page + */ static std::string get_season_number(const std::string& html_data); + /** + * + * @param html_data - Episode page data + * @return The url of the embedded page + * + * Get the url of the embedded page from the episode page + */ static std::string get_embed_url(const std::string& html_data); + /** + * + * @param html_data - Embedded page data + * @return The url of the config page + * + * Get the url of the config page from the embedded page data + */ static std::string get_config_url(const std::string& html_data); + /** + * + * @return A vector of qualities + * + * Gets the available qualities for the episode and populate the qualities and quality_urls vectors. + * If this function has already been run it simply returns the already populated qualities vector unless said vector has been cleared. + */ std::vector get_qualities(); + /** + * + * @param quality - The quality of the video + * @return The url to the video + * + * Get a link to the video of the episode with the given quality. Quality must be contained in the qualities vector otherwise this function will give an error and exit the program after listing the available qualities. + */ std::string get_video_url(const std::string& quality); - std::string get_video_data(const std::string& quality); + /** + * + * @param quality - The quality of the video + * @param filename - The filename which will be displayed will downloading the video + * @return The video data + * + * Download the episode with the given quality and return the raw video data as a string. The filename parameter is only used for displaying while downloading the video so that the user knows what is being downloaded. The filename argument is entirely optional and this function will not place the video into a file whether the value is given or not. + */ + std::string get_video_data(const std::string& quality, const std::string& filename = ""); + /** + * + * @param quality - The quality of the video + * @param series_directory - The directory which the episode is downloaded into + * @param filename - The name of the file (Will default if empty) + * + * Downloads the episode using the get_video_data function and places it into the filename file in the series_directory directory. + * If the filename parameter is left empty it will default to the E\\.mp4 format. + */ void download(const std::string& quality, const std::string& series_directory, std::string filename = ""); + /** + * + * @param episode_url - Link to the episode + * @param cookies - The current cookies from the browser + * @param verbose - Whether or not be verbose + * + * Create an episode object from the link using to cookies to get all the necessary information. + * This constructor initializes all the object data. + */ episode(const std::string& episode_url, std::vector cookies, bool verbose = false) { this->episode_url = episode_url; this->verbose = verbose; - episode_data = get_episode_page(episode_url, cookies[0].str, cookies[1].str); + episode_data = get_episode_page(episode_url, cookies[0].value, cookies[1].value); name = get_episode_name(episode_data); @@ -219,7 +408,7 @@ namespace dropout_dl { std::cout << "Got embedded url: " << this->embedded_url << '\n'; } - this->embedded_page_data = get_embedded_page(this->embedded_url, cookies[0].str); + this->embedded_page_data = get_generic_page(this->embedded_url); if (this->embedded_page_data.find("you are not authorized") != std::string::npos) { std::cerr << "ERROR: Could not access video. Try refreshing cookies.\n"; @@ -239,6 +428,9 @@ namespace dropout_dl { this->get_qualities(); } + /** + * Creates an episode object with no data. This should only be used for invalid states. + */ episode() = default; };