/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * Additional permission under GNU GPL version 3 section 7 * * If you modify this Program, or any covered work, by linking or combining * it with OpenSSL (or a modified version of that library), containing parts * covered by the terms of OpenSSL License and SSLeay License, the licensors * of this Program grant you additional permission to convey the resulting work. * */ #include #include #include #include #include "console.h" #ifdef _WIN32 #include void thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id) { SetThreadAffinityMask(h, 1ULL << cpu_id); } #else #include #if defined(__APPLE__) #include #include #define SYSCTL_CORE_COUNT "machdep.cpu.core_count" #endif void thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id) { #if defined(__APPLE__) thread_port_t mach_thread; thread_affinity_policy_data_t policy = { cpu_id }; mach_thread = pthread_mach_thread_np(h); thread_policy_set(mach_thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, 1); #else cpu_set_t mn; CPU_ZERO(&mn); CPU_SET(cpu_id, &mn); pthread_setaffinity_np(h, sizeof(cpu_set_t), &mn); #endif } #endif // _WIN32 #include "executor.h" #include "minethd.h" #include "jconf.h" #include "crypto/cryptonight.h" telemetry::telemetry(size_t iThd) { ppHashCounts = new uint64_t*[iThd]; ppTimestamps = new uint64_t*[iThd]; iBucketTop = new uint32_t[iThd]; for (size_t i = 0; i < iThd; i++) { ppHashCounts[i] = new uint64_t[iBucketSize]; ppTimestamps[i] = new uint64_t[iBucketSize]; iBucketTop[i] = 0; memset(ppHashCounts[0], 0, sizeof(uint64_t) * iBucketSize); memset(ppTimestamps[0], 0, sizeof(uint64_t) * iBucketSize); } } double telemetry::calc_telemetry_data(size_t iLastMilisec, size_t iThread) { using namespace std::chrono; uint64_t iTimeNow = time_point_cast(high_resolution_clock::now()).time_since_epoch().count(); uint64_t iEarliestHashCnt = 0; uint64_t iEarliestStamp = 0; uint64_t iLastestStamp = 0; uint64_t iLastestHashCnt = 0; bool bHaveFullSet = false; //Start at 1, buckettop points to next empty for (size_t i = 1; i < iBucketSize; i++) { size_t idx = (iBucketTop[iThread] - i) & iBucketMask; //overflow expected here if (ppTimestamps[iThread][idx] == 0) break; //That means we don't have the data yet if (iLastestStamp == 0) { iLastestStamp = ppTimestamps[iThread][idx]; iLastestHashCnt = ppHashCounts[iThread][idx]; } if (iTimeNow - ppTimestamps[iThread][idx] > iLastMilisec) { bHaveFullSet = true; break; //We are out of the requested time period } iEarliestStamp = ppTimestamps[iThread][idx]; iEarliestHashCnt = ppHashCounts[iThread][idx]; } if (!bHaveFullSet || iEarliestStamp == 0 || iLastestStamp == 0) return nan(""); double fHashes, fTime; fHashes = iLastestHashCnt - iEarliestHashCnt; fTime = iLastestStamp - iEarliestStamp; fTime /= 1000.0; return fHashes / fTime; } void telemetry::push_perf_value(size_t iThd, uint64_t iHashCount, uint64_t iTimestamp) { size_t iTop = iBucketTop[iThd]; ppHashCounts[iThd][iTop] = iHashCount; ppTimestamps[iThd][iTop] = iTimestamp; iBucketTop[iThd] = (iTop + 1) & iBucketMask; } minethd::minethd(miner_work& pWork, size_t iNo, GpuContext* ctx) { oWork = pWork; bQuit = 0; iThreadNo = (uint8_t)iNo; iJobNo = 0; iHashCount = 0; iTimestamp = 0; pGpuCtx = ctx; oWorkThd = std::thread(&minethd::work_main, this); } bool minethd::init_gpus() { size_t i, n = jconf::inst()->GetThreadCount(); printer::inst()->print_msg(L1, "Compiling code and initializing GPUs. This will take a while..."); vGpuData.resize(n); jconf::thd_cfg cfg; for(i = 0; i < n; i++) { jconf::inst()->GetThreadConfig(i, cfg); vGpuData[i].deviceIdx = cfg.index; vGpuData[i].rawIntensity = cfg.intensity; vGpuData[i].workSize = cfg.w_size; } printer::inst()->print_msg(L0, "Test GPU 0"); return InitOpenCL(vGpuData.data(), n, jconf::inst()->GetPlatformIdx()) == ERR_SUCCESS; } std::atomic minethd::iGlobalJobNo; std::atomic minethd::iConsumeCnt; //Threads get jobs as they are initialized minethd::miner_work minethd::oGlobalWork; uint64_t minethd::iThreadCount = 0; std::vector minethd::vGpuData; std::vector* minethd::thread_starter(miner_work& pWork) { iGlobalJobNo = 0; iConsumeCnt = 0; std::vector* pvThreads = new std::vector; size_t i, n = jconf::inst()->GetThreadCount(); pvThreads->reserve(n); jconf::thd_cfg cfg; for (i = 0; i < n; i++) { jconf::inst()->GetThreadConfig(i, cfg); minethd* thd = new minethd(pWork, i, &vGpuData[i]); if(cfg.cpu_aff >= 0) { #if defined(__APPLE__) printer::inst()->print_msg(L1, "WARNING on MacOS thread affinity is only advisory."); #endif thd_setaffinity(thd->oWorkThd.native_handle(), cfg.cpu_aff); } pvThreads->push_back(thd); if(cfg.cpu_aff >= 0) printer::inst()->print_msg(L1, "Starting GPU thread, affinity: %d.", (int)cfg.cpu_aff); else printer::inst()->print_msg(L1, "Starting GPU thread, no affinity."); } iThreadCount = n; return pvThreads; } void minethd::switch_work(miner_work& pWork) { // iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work // faster than threads can consume them. This should never happen in real life. // Pool cant physically send jobs faster than every 250ms or so due to net latency. while (iConsumeCnt.load(std::memory_order_seq_cst) < iThreadCount) std::this_thread::sleep_for(std::chrono::milliseconds(100)); oGlobalWork = pWork; iConsumeCnt.store(0, std::memory_order_seq_cst); iGlobalJobNo++; } void minethd::consume_work() { memcpy(&oWork, &oGlobalWork, sizeof(miner_work)); iJobNo++; iConsumeCnt++; if(!oWork.bStall) { pGpuCtx->Nonce = calc_start_nonce(oWork.iResumeCnt); XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, oWork.iTarget); } } void minethd::work_main() { uint64_t iCount = 0; iConsumeCnt++; while (bQuit == 0) { if (oWork.bStall) { /* We are stalled here because the executor didn't find a job for us yet, either because of network latency, or a socket problem. Since we are raison d'etre of this software it us sensible to just wait until we have something*/ while (iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) std::this_thread::sleep_for(std::chrono::milliseconds(100)); consume_work(); continue; } assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID)); while(iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo) { cl_uint results[0x100] = { 0 }; XMRRunJob(pGpuCtx, results); for(size_t i = 0; i < results[0xFF]; i++) { executor::inst()->push_event(ex_event(job_result(oWork.sJobID, oWork.bWorkBlob, oWork.iWorkSize, oWork.iTarget, results[i]), oWork.iPoolId)); } iCount += pGpuCtx->rawIntensity; using namespace std::chrono; uint64_t iStamp = time_point_cast(high_resolution_clock::now()).time_since_epoch().count(); iHashCount.store(iCount, std::memory_order_relaxed); iTimestamp.store(iStamp, std::memory_order_relaxed); std::this_thread::yield(); } consume_work(); } }