mirror of https://github.com/laurent22/joplin.git
Android: Switch default library used for Whisper voice typing (#11881)
parent
502c929c88
commit
7f51712311
|
@ -811,12 +811,11 @@ packages/app-mobile/services/e2ee/crypto.js
|
|||
packages/app-mobile/services/plugins/PlatformImplementation.js
|
||||
packages/app-mobile/services/profiles/index.js
|
||||
packages/app-mobile/services/voiceTyping/VoiceTyping.js
|
||||
packages/app-mobile/services/voiceTyping/utils/splitWhisperText.test.js
|
||||
packages/app-mobile/services/voiceTyping/utils/splitWhisperText.js
|
||||
packages/app-mobile/services/voiceTyping/utils/unzip.android.js
|
||||
packages/app-mobile/services/voiceTyping/utils/unzip.js
|
||||
packages/app-mobile/services/voiceTyping/vosk.android.js
|
||||
packages/app-mobile/services/voiceTyping/vosk.js
|
||||
packages/app-mobile/services/voiceTyping/whisper.test.js
|
||||
packages/app-mobile/services/voiceTyping/whisper.js
|
||||
packages/app-mobile/setupQuickActions.js
|
||||
packages/app-mobile/tools/buildInjectedJs/BundledFile.js
|
||||
|
|
|
@ -786,12 +786,11 @@ packages/app-mobile/services/e2ee/crypto.js
|
|||
packages/app-mobile/services/plugins/PlatformImplementation.js
|
||||
packages/app-mobile/services/profiles/index.js
|
||||
packages/app-mobile/services/voiceTyping/VoiceTyping.js
|
||||
packages/app-mobile/services/voiceTyping/utils/splitWhisperText.test.js
|
||||
packages/app-mobile/services/voiceTyping/utils/splitWhisperText.js
|
||||
packages/app-mobile/services/voiceTyping/utils/unzip.android.js
|
||||
packages/app-mobile/services/voiceTyping/utils/unzip.js
|
||||
packages/app-mobile/services/voiceTyping/vosk.android.js
|
||||
packages/app-mobile/services/voiceTyping/vosk.js
|
||||
packages/app-mobile/services/voiceTyping/whisper.test.js
|
||||
packages/app-mobile/services/voiceTyping/whisper.js
|
||||
packages/app-mobile/setupQuickActions.js
|
||||
packages/app-mobile/tools/buildInjectedJs/BundledFile.js
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
"/packages/app-desktop/build/",
|
||||
"/packages/app-desktop/utils/checkForUpdatesUtilsTestData.ts",
|
||||
"/packages/app-desktop/vendor/",
|
||||
"/packages/app-mobile/android/vendor/",
|
||||
"/packages/app-mobile/ios/Pods/",
|
||||
"/packages/app-mobile/lib/rnInjectedJs",
|
||||
"/packages/app-mobile/pluginAssets",
|
||||
|
|
|
@ -70,6 +70,13 @@ def enableProguardInReleaseBuilds = false
|
|||
def jscFlavor = 'org.webkit:android-jsc:+'
|
||||
|
||||
android {
|
||||
|
||||
externalNativeBuild {
|
||||
cmake {
|
||||
path file('src/main/cpp/CMakeLists.txt')
|
||||
version '3.22.1'
|
||||
}
|
||||
}
|
||||
ndkVersion rootProject.ext.ndkVersion
|
||||
buildToolsVersion rootProject.ext.buildToolsVersion
|
||||
compileSdk rootProject.ext.compileSdkVersion
|
||||
|
@ -81,12 +88,17 @@ android {
|
|||
targetSdkVersion rootProject.ext.targetSdkVersion
|
||||
versionCode 2097764
|
||||
versionName "3.3.1"
|
||||
ndk {
|
||||
abiFilters "armeabi-v7a", "x86", "arm64-v8a", "x86_64"
|
||||
}
|
||||
ndk {
|
||||
abiFilters "armeabi-v7a", "x86", "arm64-v8a", "x86_64"
|
||||
}
|
||||
|
||||
// Needed to fix: The number of method references in a .dex file cannot exceed 64K
|
||||
multiDexEnabled true
|
||||
externalNativeBuild {
|
||||
cmake {
|
||||
cppFlags '-DCMAKE_BUILD_TYPE=Release'
|
||||
}
|
||||
}
|
||||
}
|
||||
signingConfigs {
|
||||
debug {
|
||||
|
@ -95,14 +107,14 @@ android {
|
|||
keyAlias 'androiddebugkey'
|
||||
keyPassword 'android'
|
||||
}
|
||||
release {
|
||||
if (project.hasProperty('JOPLIN_RELEASE_STORE_FILE')) {
|
||||
storeFile file(JOPLIN_RELEASE_STORE_FILE)
|
||||
storePassword JOPLIN_RELEASE_STORE_PASSWORD
|
||||
keyAlias JOPLIN_RELEASE_KEY_ALIAS
|
||||
keyPassword JOPLIN_RELEASE_KEY_PASSWORD
|
||||
}
|
||||
}
|
||||
release {
|
||||
if (project.hasProperty('JOPLIN_RELEASE_STORE_FILE')) {
|
||||
storeFile file(JOPLIN_RELEASE_STORE_FILE)
|
||||
storePassword JOPLIN_RELEASE_STORE_PASSWORD
|
||||
keyAlias JOPLIN_RELEASE_KEY_ALIAS
|
||||
keyPassword JOPLIN_RELEASE_KEY_PASSWORD
|
||||
}
|
||||
}
|
||||
}
|
||||
buildTypes {
|
||||
debug {
|
||||
|
@ -127,10 +139,6 @@ dependencies {
|
|||
} else {
|
||||
implementation jscFlavor
|
||||
}
|
||||
|
||||
// Needed for Whisper speech-to-text
|
||||
implementation 'com.microsoft.onnxruntime:onnxruntime-android:latest.release'
|
||||
implementation 'com.microsoft.onnxruntime:onnxruntime-extensions-android:latest.release'
|
||||
}
|
||||
|
||||
apply from: file("../../node_modules/@react-native-community/cli-platform-android/native_modules.gradle"); applyNativeModulesAppBuildGradle(project)
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
|
||||
# For more information about using CMake with Android Studio, read the
|
||||
# documentation: https://d.android.com/studio/projects/add-native-code.html.
|
||||
# For more examples on how to use CMake, see https://github.com/android/ndk-samples.
|
||||
|
||||
# Sets the minimum CMake version required for this project.
|
||||
cmake_minimum_required(VERSION 3.22.1)
|
||||
|
||||
# Declares the project name. The project name can be accessed via ${ PROJECT_NAME},
|
||||
# Since this is the top level CMakeLists.txt, the project name is also accessible
|
||||
# with ${CMAKE_PROJECT_NAME} (both CMake variables are in-sync within the top level
|
||||
# build script scope).
|
||||
project("joplin")
|
||||
|
||||
# Creates and names a library, sets it as either STATIC
|
||||
# or SHARED, and provides the relative paths to its source code.
|
||||
# You can define multiple libraries, and CMake builds them for you.
|
||||
# Gradle automatically packages shared libraries with your APK.
|
||||
#
|
||||
# In this top level CMakeLists.txt, ${CMAKE_PROJECT_NAME} is used to define
|
||||
# the target library name; in the sub-module's CMakeLists.txt, ${PROJECT_NAME}
|
||||
# is preferred for the same purpose.
|
||||
#
|
||||
# In order to load a library into your app from Java/Kotlin, you must call
|
||||
# System.loadLibrary() and pass the name of the library defined here;
|
||||
# for GameActivity/NativeActivity derived applications, the same library name must be
|
||||
# used in the AndroidManifest.xml file.
|
||||
add_library(${CMAKE_PROJECT_NAME} SHARED
|
||||
# List C/C++ source files with relative paths to this CMakeLists.txt.
|
||||
whisperWrapper.cpp
|
||||
utils/WhisperSession.cpp
|
||||
utils/findLongestSilence.cpp
|
||||
utils/findLongestSilence_test.cpp
|
||||
)
|
||||
|
||||
|
||||
|
||||
set(WHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../../vendor/whisper.cpp)
|
||||
|
||||
# Based on the Whisper.cpp Android example:
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 ")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fvisibility=hidden -fvisibility-inlines-hidden -ffunction-sections -fdata-sections")
|
||||
|
||||
# Whisper: See https://stackoverflow.com/a/76290722
|
||||
add_subdirectory(${WHISPER_LIB_DIR} ./whisper)
|
||||
|
||||
# Directories for header files
|
||||
target_include_directories(
|
||||
${CMAKE_PROJECT_NAME}
|
||||
PUBLIC
|
||||
${PROJECT_BASE_DIR}/shared
|
||||
${WHISPER_LIB_DIR}/include
|
||||
)
|
||||
|
||||
|
||||
# Specifies libraries CMake should link to your target library. You
|
||||
# can link libraries from various origins, such as libraries defined in this
|
||||
# build script, prebuilt third-party libraries, or Android system libraries.
|
||||
target_link_libraries(${CMAKE_PROJECT_NAME}
|
||||
whisper
|
||||
# List libraries link to the target library
|
||||
android
|
||||
log
|
||||
)
|
|
@ -0,0 +1,154 @@
|
|||
#include "WhisperSession.h"
|
||||
|
||||
#include <utility>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include "whisper.h"
|
||||
#include "findLongestSilence.h"
|
||||
#include "androidUtil.h"
|
||||
|
||||
WhisperSession::WhisperSession(const std::string& modelPath, std::string lang, std::string prompt)
|
||||
: lang_ {std::move(lang)}, prompt_ {std::move(prompt)} {
|
||||
whisper_context_params contextParams = whisper_context_default_params();
|
||||
|
||||
// Lifetime(pModelPath): Whisper.cpp creates a copy of pModelPath and stores it in a std::string.
|
||||
// whisper_init_from_file_with_params doesn't seem to otherwise save pModelPath. As such, it's
|
||||
// safe to pass a pointer to a std::string's representation:
|
||||
const char *pModelPath = modelPath.c_str();
|
||||
pContext_ = whisper_init_from_file_with_params(pModelPath, contextParams);
|
||||
|
||||
if (pContext_ == nullptr) {
|
||||
throw std::runtime_error("Unable to initialize the Whisper context.");
|
||||
}
|
||||
}
|
||||
|
||||
WhisperSession::~WhisperSession() {
|
||||
if (pContext_ != nullptr) {
|
||||
whisper_free(pContext_);
|
||||
}
|
||||
}
|
||||
|
||||
whisper_full_params
|
||||
WhisperSession::buildWhisperParams_() {
|
||||
whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
||||
// WHISPER_SAMPLING_BEAM_SEARCH is an alternative to greedy:
|
||||
// params.beam_search = { .beam_size = 2 };
|
||||
params.print_realtime = false;
|
||||
// Disable timestamps: They make creating custom Whisper models more difficult:
|
||||
params.print_timestamps = false;
|
||||
params.no_timestamps = true;
|
||||
|
||||
params.print_progress = false;
|
||||
params.translate = false;
|
||||
params.offset_ms = 0;
|
||||
params.single_segment = true;
|
||||
// Avoid non-speech tokens (e.g. "(crackle)"). For now, this is disabled because it seems to
|
||||
// cause increased hallucinations (e.g. repeated "Thank you"s).
|
||||
// params.suppress_nst = true;
|
||||
params.temperature = 0; // Initial randomness
|
||||
// There's also a temperature_inc variable, which is used when decoding fails (Whisper increases
|
||||
// the temperature by temperature_inc and retries).
|
||||
|
||||
// Following the whisper streaming example in setting prompt_tokens to nullptr
|
||||
// when using VAD (Voice Activity Detection)
|
||||
params.initial_prompt = prompt_.c_str();
|
||||
params.prompt_tokens = nullptr;
|
||||
params.prompt_n_tokens = 0;
|
||||
|
||||
// Lifetime: lifetime(params) < lifetime(lang_) = lifetime(this).
|
||||
params.language = lang_.c_str();
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
std::string
|
||||
WhisperSession::transcribe_(const std::vector<float>& audio, size_t transcribeCount) {
|
||||
int minTranscribeLength = WHISPER_SAMPLE_RATE / 2; // 0.5s
|
||||
if (transcribeCount < minTranscribeLength) {
|
||||
return "";
|
||||
}
|
||||
|
||||
whisper_full_params params = buildWhisperParams_();
|
||||
whisper_reset_timings(pContext_);
|
||||
|
||||
transcribeCount = std::min(audio.size(), transcribeCount);
|
||||
|
||||
if (whisper_full(pContext_, params, audio.data(), transcribeCount) != 0) {
|
||||
throw std::runtime_error("Failed to run Whisper (non-zero exit status).");
|
||||
} else {
|
||||
whisper_print_timings(pContext_);
|
||||
}
|
||||
|
||||
// Tokens to be used as a prompt for the next run of Whisper
|
||||
unsigned int segmentCount = whisper_full_n_segments(pContext_);
|
||||
|
||||
// Build the results
|
||||
std::stringstream results;
|
||||
for (int i = 0; i < segmentCount; i++) {
|
||||
results << " " << whisper_full_get_segment_text(pContext_, i);
|
||||
}
|
||||
|
||||
std::string result = results.str();
|
||||
LOGD("Transcribed: %s (audio len %.2f)", result.c_str(), audio.size() / (float) WHISPER_SAMPLE_RATE);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string
|
||||
WhisperSession::splitAndTranscribeBefore_(int transcribeUpTo, int trimTo) {
|
||||
std::string result = transcribe_(audioBuffer_, transcribeUpTo);
|
||||
|
||||
// Trim
|
||||
LOGI("Trim to %.2f s, transcribe to %.2f s", (float) trimTo / WHISPER_SAMPLE_RATE, (float) transcribeUpTo / WHISPER_SAMPLE_RATE);
|
||||
audioBuffer_ = std::vector(audioBuffer_.begin() + trimTo, audioBuffer_.end());
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string
|
||||
WhisperSession::transcribeNextChunk(const float *pAudio, int sizeAudio) {
|
||||
std::string finalizedContent;
|
||||
|
||||
// Update the local audio buffer
|
||||
for (int i = 0; i < sizeAudio; i++) {
|
||||
audioBuffer_.push_back(pAudio[i]);
|
||||
}
|
||||
|
||||
// Does the audio buffer need to be split somewhere?
|
||||
int maximumSamples = WHISPER_SAMPLE_RATE * 25;
|
||||
if (audioBuffer_.size() >= maximumSamples) {
|
||||
float minSilenceSeconds = 0.3f;
|
||||
auto silenceRange = findLongestSilence(
|
||||
audioBuffer_, WHISPER_SAMPLE_RATE, minSilenceSeconds, maximumSamples
|
||||
);
|
||||
|
||||
// In this case, the audio is long enough that it needs to be split somewhere. If there's
|
||||
// no suitable pause available, default to splitting in the middle.
|
||||
int halfBufferSize = audioBuffer_.size() / 2;
|
||||
int transcribeTo = silenceRange.isValid ? silenceRange.start : halfBufferSize;
|
||||
int trimTo = silenceRange.isValid ? silenceRange.end : halfBufferSize;
|
||||
|
||||
finalizedContent = splitAndTranscribeBefore_(transcribeTo, trimTo);
|
||||
} else if (audioBuffer_.size() > WHISPER_SAMPLE_RATE * 3) {
|
||||
// Allow brief pauses to create new paragraphs:
|
||||
float minSilenceSeconds = 2.0f;
|
||||
auto splitPoint = findLongestSilence(
|
||||
audioBuffer_, WHISPER_SAMPLE_RATE, minSilenceSeconds, maximumSamples
|
||||
);
|
||||
if (splitPoint.isValid) {
|
||||
int tolerance = WHISPER_SAMPLE_RATE / 20; // 0.05s
|
||||
bool isCompletelySilent = splitPoint.start < tolerance && splitPoint.end > audioBuffer_.size() - tolerance;
|
||||
if (isCompletelySilent) {
|
||||
audioBuffer_.clear();
|
||||
} else {
|
||||
finalizedContent = splitAndTranscribeBefore_(splitPoint.start, splitPoint.end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
previewText_ = transcribe_(audioBuffer_, audioBuffer_.size());
|
||||
return finalizedContent;
|
||||
}
|
||||
|
||||
std::string WhisperSession::getPreview() {
|
||||
return previewText_;
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "whisper.h"
|
||||
|
||||
class WhisperSession {
|
||||
public:
|
||||
WhisperSession(const std::string& modelPath, std::string lang, std::string prompt);
|
||||
~WhisperSession();
|
||||
std::string transcribeNextChunk(const float *pAudio, int sizeAudio);
|
||||
std::string getPreview();
|
||||
|
||||
private:
|
||||
// Current preview state
|
||||
std::string previewText_;
|
||||
|
||||
whisper_full_params buildWhisperParams_();
|
||||
std::string transcribe_(const std::vector<float>& audio, size_t samplesToTranscribe);
|
||||
std::string splitAndTranscribeBefore_(int transcribeUpTo, int trimTo);
|
||||
|
||||
whisper_context *pContext_;
|
||||
const std::string lang_;
|
||||
const std::string prompt_;
|
||||
|
||||
std::vector<float> audioBuffer_;
|
||||
};
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
#pragma once
|
||||
|
||||
#include <android/log.h>
|
||||
|
||||
// Use macros for these rather than functions. Functions generate a "may be unsafe"
|
||||
// warning because the compiler can't check that the first argument is a string
|
||||
// literal.
|
||||
#define LOGW(...) __android_log_print(ANDROID_LOG_WARN, "Whisper::JNI", __VA_ARGS__);
|
||||
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, "Whisper::JNI", __VA_ARGS__);
|
||||
#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, "Whisper::JNI", __VA_ARGS__);
|
|
@ -0,0 +1,111 @@
|
|||
#include "findLongestSilence.h"
|
||||
#include "androidUtil.h"
|
||||
|
||||
static void highpass(std::vector<float>& data, int sampleRate) {
|
||||
// Highpass filter. See https://en.wikipedia.org/wiki/High-pass_filter and
|
||||
// the example in whisper.cpp/streaming.
|
||||
float highpassCutoffHz = 60.0f;
|
||||
float RC = 1.0f / (2 * 3.1416f * highpassCutoffHz);
|
||||
float timePerSample = 1.0f / sampleRate;
|
||||
float alpha = RC / (RC + timePerSample);
|
||||
|
||||
float lastInput = data[0];
|
||||
for (int i = 1; i < data.size(); i++) {
|
||||
float currentInput = data[i];
|
||||
data[i] = alpha * data[i - 1] + alpha * (currentInput - lastInput);
|
||||
lastInput = currentInput;
|
||||
}
|
||||
}
|
||||
|
||||
SilenceRange findLongestSilence(
|
||||
const std::vector<float>& audioData,
|
||||
int sampleRate,
|
||||
float minSilenceLengthSeconds,
|
||||
int maxSilencePosition
|
||||
) {
|
||||
int bestCandidateLength = 0;
|
||||
int bestCandidateStart = -1;
|
||||
int bestCandidateEnd = -1;
|
||||
|
||||
int currentCandidateStart = -1;
|
||||
|
||||
std::vector<float> processedAudio { audioData };
|
||||
highpass(processedAudio, sampleRate);
|
||||
|
||||
// Break into windows of size `windowSize`:
|
||||
int windowSize = 256;
|
||||
int windowsPerSecond = sampleRate / windowSize;
|
||||
int quietWindows = 0;
|
||||
|
||||
// Finishes the current candidate for longest silence
|
||||
auto finalizeCandidate = [&] (int currentOffset) {
|
||||
bool hasCandidate = currentCandidateStart >= 0;
|
||||
if (!hasCandidate) {
|
||||
return;
|
||||
}
|
||||
|
||||
int currentCandidateLength = currentOffset - currentCandidateStart;
|
||||
if (currentCandidateLength > bestCandidateLength && currentCandidateStart <= maxSilencePosition) {
|
||||
bestCandidateLength = currentCandidateLength;
|
||||
bestCandidateStart = currentCandidateStart;
|
||||
bestCandidateEnd = currentOffset;
|
||||
LOGD("New best candidate with length %d", currentCandidateLength);
|
||||
}
|
||||
|
||||
currentCandidateStart = -1;
|
||||
};
|
||||
|
||||
int windowOffset;
|
||||
for (windowOffset = 0; windowOffset < processedAudio.size() && windowOffset <= maxSilencePosition; windowOffset += windowSize) {
|
||||
int rollingAverageSize = 24;
|
||||
float threshold = static_cast<float>(rollingAverageSize) / 80.0f;
|
||||
|
||||
// Count the number of samples that (when averaged with the nearby samples)
|
||||
// are below some threshold value.
|
||||
float absSum = 0;
|
||||
int silentSamples = 0;
|
||||
for (int i = windowOffset; i < windowOffset + windowSize && i < processedAudio.size(); i++) {
|
||||
absSum += abs(processedAudio[i]);
|
||||
|
||||
bool isSumComplete = i - rollingAverageSize >= windowOffset;
|
||||
if (isSumComplete) {
|
||||
absSum -= abs(processedAudio[i - rollingAverageSize]);
|
||||
|
||||
if (absSum < threshold) {
|
||||
silentSamples++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The window should be considered "quiet" if enough samples were below the threshold.
|
||||
// Don't require all of them to be to allow clicks and pops.
|
||||
if (silentSamples >= windowSize * 3 / 4) {
|
||||
quietWindows ++;
|
||||
} else {
|
||||
quietWindows = 0;
|
||||
}
|
||||
|
||||
int minQuietWindows = static_cast<int>(windowsPerSecond * minSilenceLengthSeconds);
|
||||
if (quietWindows >= minQuietWindows && currentCandidateStart == -1) {
|
||||
// Found a candidate. Start it.
|
||||
currentCandidateStart = windowOffset;
|
||||
} else if (quietWindows == 0) {
|
||||
// Ended a candidate. Is it better than the best?
|
||||
finalizeCandidate(windowOffset);
|
||||
}
|
||||
}
|
||||
|
||||
finalizeCandidate(windowOffset);
|
||||
|
||||
// Return the best candidate.
|
||||
if (bestCandidateLength == 0) {
|
||||
return { .isValid = false, .start = 0, .end = 0 };
|
||||
} else {
|
||||
return {
|
||||
.isValid=true,
|
||||
.start=bestCandidateStart,
|
||||
.end=bestCandidateEnd
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
|
||||
struct SilenceRange {
|
||||
bool isValid;
|
||||
int start;
|
||||
int end;
|
||||
};
|
||||
|
||||
SilenceRange findLongestSilence(
|
||||
const std::vector<float>& audioData,
|
||||
int sampleRate,
|
||||
|
||||
// Minimum length of silence in seconds
|
||||
float minSilenceLengthSeconds,
|
||||
|
||||
// Doesn't check for silence at a position greater than maximumSilenceStart
|
||||
int maximumSilenceStart
|
||||
);
|
||||
|
||||
|
|
@ -0,0 +1,169 @@
|
|||
#include "findLongestSilence_test.h"
|
||||
#include "findLongestSilence.h"
|
||||
#include "androidUtil.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <cmath>
|
||||
#include <random>
|
||||
|
||||
static void testTones();
|
||||
static void testToneWithPause();
|
||||
static void testSilence();
|
||||
static void testNoise();
|
||||
|
||||
static void fail(const std::string& message);
|
||||
|
||||
struct GeneratedAudio {
|
||||
std::vector<float> data;
|
||||
int sampleRate;
|
||||
int sampleCount;
|
||||
};
|
||||
|
||||
using AudioGenerator = std::function<const float(float)>;
|
||||
static GeneratedAudio makeAudio(const AudioGenerator& generator, int sampleRate, float duration);
|
||||
static void expectNoSilence(const GeneratedAudio& audio, const std::string& testLabel);
|
||||
static void expectSilenceBetween(const GeneratedAudio& audio, float startTimeSeconds, float stopTimeSeconds, const std::string& testLabel);
|
||||
|
||||
|
||||
void findLongestSilence_test() {
|
||||
testTones();
|
||||
testToneWithPause();
|
||||
testSilence();
|
||||
testNoise();
|
||||
}
|
||||
|
||||
|
||||
static void testTones() {
|
||||
for (int frequency = 440; frequency < 1600; frequency += 300) {
|
||||
std::stringstream messageBuilder;
|
||||
messageBuilder << "Should not find silence in tone with frequency " << frequency << " HZ.";
|
||||
|
||||
auto audioTone = makeAudio([frequency](float t) {
|
||||
// Also set the amplitude to 0.2f (to more closely match mic input).
|
||||
return std::sin(t * static_cast<float>(frequency)) * 0.2f;
|
||||
}, 15000, 10.0f);
|
||||
|
||||
expectNoSilence(audioTone, messageBuilder.str());
|
||||
}
|
||||
|
||||
auto lowFrequencyTone = makeAudio([](float t) {
|
||||
return std::sin(t * 8) * 0.3f;
|
||||
}, 15000, 10.0f);
|
||||
expectSilenceBetween(lowFrequencyTone, 0.0f, 10.0f, "Should find silence in a very low-frequency tone");
|
||||
}
|
||||
|
||||
static void testToneWithPause() {
|
||||
auto audioToneWithPause = makeAudio([](float t) {
|
||||
if (t < 5.0f || t > 6.0f) {
|
||||
return std::sin(t * 880);
|
||||
} else {
|
||||
return 0.0f;
|
||||
}
|
||||
}, 15000, 11.0f);
|
||||
expectSilenceBetween(audioToneWithPause, 5.0f, 6.0f, "Should find silence when completely silent in a region");
|
||||
|
||||
auto audioToneWithTwoPauses = makeAudio([](float t) {
|
||||
if (t < 1.0f || (t > 8.0f && t < 10.0f)) {
|
||||
return 0.0f;
|
||||
} else {
|
||||
return std::sin(t * 880);
|
||||
}
|
||||
}, 15000, 20.0f);
|
||||
expectSilenceBetween(audioToneWithPause, 5.0f, 6.0f, "Should find silence when completely silent in a region");
|
||||
}
|
||||
|
||||
static void testSilence() {
|
||||
auto silence = makeAudio([](float t) {
|
||||
return 0.0f;
|
||||
}, 16000, 10.0f);
|
||||
expectSilenceBetween(silence, 0.0f, 10.0f, "Should find silence in a completely silent signal");
|
||||
}
|
||||
|
||||
static void testNoise() {
|
||||
std::minstd_rand randomness {2};
|
||||
std::uniform_real_distribution noiseGenerator {-1.0, 1.0};
|
||||
auto quietNoise = makeAudio([&](float t) {
|
||||
return noiseGenerator(randomness) * 0.02f;
|
||||
}, 16000, 5.0f);
|
||||
expectSilenceBetween(quietNoise, 0.0f, 5.0f, "Should find silence in a tone with low-amplitude noise");
|
||||
}
|
||||
|
||||
|
||||
static void fail(const std::string& message) {
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
|
||||
static GeneratedAudio makeAudio(const AudioGenerator& generator, int sampleRate, float duration) {
|
||||
std::vector<float> result { };
|
||||
|
||||
int numSamples = static_cast<int>(static_cast<float>(sampleRate) * duration);
|
||||
for (int i = 0; i < numSamples; i++) {
|
||||
float time = static_cast<float>(i) / static_cast<float>(sampleRate);
|
||||
result.push_back(generator(time));
|
||||
}
|
||||
|
||||
return {
|
||||
.data=result,
|
||||
.sampleRate=sampleRate,
|
||||
.sampleCount=numSamples,
|
||||
};
|
||||
}
|
||||
|
||||
static void logTestPass(const std::string& message) {
|
||||
LOGI("Test PASS: %s", message.c_str());
|
||||
}
|
||||
|
||||
static float samplesToSeconds(int samples, int sampleRate) {
|
||||
return static_cast<float>(samples) / static_cast<float>(sampleRate);
|
||||
}
|
||||
|
||||
static void expectNoSilence(const GeneratedAudio& audio, const std::string& testLabel) {
|
||||
auto silence = findLongestSilence(
|
||||
audio.data,
|
||||
audio.sampleRate,
|
||||
0.02f,
|
||||
audio.sampleCount
|
||||
);
|
||||
if (silence.isValid) {
|
||||
std::stringstream errorBuilder;
|
||||
float startSeconds = samplesToSeconds(silence.start, audio.sampleRate);
|
||||
float stopSeconds = samplesToSeconds(silence.end, audio.sampleRate);
|
||||
errorBuilder << "Error: Found silence between " << startSeconds << "s and " << stopSeconds << "s";
|
||||
errorBuilder << ": " << testLabel;
|
||||
fail(errorBuilder.str());
|
||||
}
|
||||
|
||||
logTestPass(testLabel);
|
||||
}
|
||||
|
||||
static void expectSilenceBetween(const GeneratedAudio& audio, float startTimeSeconds, float stopTimeSeconds, const std::string& testLabel) {
|
||||
auto silenceResult = findLongestSilence(
|
||||
audio.data,
|
||||
audio.sampleRate,
|
||||
0.02f,
|
||||
audio.sampleCount
|
||||
);
|
||||
|
||||
if (!silenceResult.isValid) {
|
||||
fail("Error: No silence found: " + testLabel);
|
||||
}
|
||||
|
||||
auto checkEndpoint = [&] (int actualValueSamples, float expectedValueSeconds, const std::string& description) {
|
||||
float actualValueSeconds = samplesToSeconds(actualValueSamples, audio.sampleRate);
|
||||
float tolerance = 0.1f; // 100ms
|
||||
if (std::abs(expectedValueSeconds - actualValueSeconds) > tolerance) {
|
||||
std::stringstream messageBuilder;
|
||||
messageBuilder << "Error: Silence " << description << " mismatch: ";
|
||||
messageBuilder << "got " << actualValueSeconds << "s expected " << expectedValueSeconds << "s. ";
|
||||
messageBuilder << testLabel;
|
||||
fail(messageBuilder.str());
|
||||
}
|
||||
};
|
||||
|
||||
checkEndpoint(silenceResult.start, startTimeSeconds, "start time");
|
||||
checkEndpoint(silenceResult.end, stopTimeSeconds, "stop time");
|
||||
|
||||
logTestPass(testLabel);
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
#pragma once
|
||||
|
||||
void findLongestSilence_test();
|
|
@ -0,0 +1,125 @@
|
|||
// Write C++ code here.
|
||||
//
|
||||
// Do not forget to dynamically load the C++ library into your application.
|
||||
//
|
||||
// For instance,
|
||||
//
|
||||
// In MainActivity.java:
|
||||
// static {
|
||||
// System.loadLibrary("joplin");
|
||||
// }
|
||||
//
|
||||
// Or, in MainActivity.kt:
|
||||
// companion object {
|
||||
// init {
|
||||
// System.loadLibrary("joplin")
|
||||
// }
|
||||
// }
|
||||
#include <jni.h>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <android/log.h>
|
||||
#include "whisper.h"
|
||||
#include "utils/WhisperSession.h"
|
||||
#include "utils/androidUtil.h"
|
||||
#include "utils/findLongestSilence_test.h"
|
||||
|
||||
void log_android(enum ggml_log_level level, const char* message, void* user_data) {
|
||||
android_LogPriority priority = level == 4 ? ANDROID_LOG_ERROR : ANDROID_LOG_INFO;
|
||||
__android_log_print(priority, "Whisper::JNI::cpp", "%s", message);
|
||||
}
|
||||
|
||||
jstring stringToJava(JNIEnv *env, const std::string& source) {
|
||||
return env->NewStringUTF(source.c_str());
|
||||
}
|
||||
|
||||
std::string stringToCXX(JNIEnv *env, jstring jString) {
|
||||
const char *jStringChars = env->GetStringUTFChars(jString, nullptr);
|
||||
std::string result { jStringChars };
|
||||
env->ReleaseStringUTFChars(jString, jStringChars);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void throwException(JNIEnv *env, const std::string& message) {
|
||||
jclass errorClass = env->FindClass("java/lang/Exception");
|
||||
env->ThrowNew(errorClass, message.c_str());
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_net_cozic_joplin_audio_NativeWhisperLib_00024Companion_init(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jstring modelPath,
|
||||
jstring language,
|
||||
jstring prompt
|
||||
) {
|
||||
whisper_log_set(log_android, nullptr);
|
||||
|
||||
try {
|
||||
auto *pSession = new WhisperSession(
|
||||
stringToCXX(env, modelPath), stringToCXX(env, language), stringToCXX(env, prompt)
|
||||
);
|
||||
return (jlong) pSession;
|
||||
} catch (const std::exception& exception) {
|
||||
LOGW("Failed to init whisper: %s", exception.what());
|
||||
throwException(env, exception.what());
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT void JNICALL
|
||||
Java_net_cozic_joplin_audio_NativeWhisperLib_00024Companion_free(JNIEnv *env, jobject thiz,
|
||||
jlong pointer) {
|
||||
std::free(reinterpret_cast<WhisperSession *>(pointer));
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_net_cozic_joplin_audio_NativeWhisperLib_00024Companion_fullTranscribe(JNIEnv *env,
|
||||
jobject thiz,
|
||||
jlong pointer,
|
||||
jfloatArray audio_data) {
|
||||
auto *pSession = reinterpret_cast<WhisperSession *> (pointer);
|
||||
jfloat *pAudioData = env->GetFloatArrayElements(audio_data, nullptr);
|
||||
jsize lenAudioData = env->GetArrayLength(audio_data);
|
||||
std::string result;
|
||||
|
||||
try {
|
||||
LOGD("Starting Whisper, transcribe %d", lenAudioData);
|
||||
result = pSession->transcribeNextChunk(pAudioData, lenAudioData);
|
||||
auto preview = pSession->getPreview();
|
||||
LOGD("Ran Whisper. Got %s (preview %s)", result.c_str(), preview.c_str());
|
||||
} catch (const std::exception& exception) {
|
||||
LOGW("Failed to run whisper: %s", exception.what());
|
||||
throwException(env, exception.what());
|
||||
}
|
||||
|
||||
// JNI_ABORT: "free the buffer without copying back the possible changes", pass 0 to copy
|
||||
// changes (there should be no changes)
|
||||
env->ReleaseFloatArrayElements(audio_data, pAudioData, JNI_ABORT);
|
||||
|
||||
return stringToJava(env, result);
|
||||
}
|
||||
extern "C"
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_net_cozic_joplin_audio_NativeWhisperLib_00024Companion_getPreview(
|
||||
JNIEnv *env, jobject thiz, jlong pointer
|
||||
) {
|
||||
auto *pSession = reinterpret_cast<WhisperSession *> (pointer);
|
||||
return stringToJava(env, pSession->getPreview());
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT void JNICALL
|
||||
Java_net_cozic_joplin_audio_NativeWhisperLib_00024Companion_runTests(JNIEnv *env, jobject thiz) {
|
||||
try {
|
||||
findLongestSilence_test();
|
||||
} catch (const std::exception& exception) {
|
||||
LOGW("Failed to run tests: %s", exception.what());
|
||||
throwException(env, exception.what());
|
||||
}
|
||||
}
|
|
@ -21,7 +21,7 @@ class AudioRecorder(context: Context) : Closeable {
|
|||
private var bufferWriteOffset = 0
|
||||
|
||||
// Accessor must not modify result
|
||||
val bufferedData: FloatArray get() = buffer.sliceArray(0 until bufferWriteOffset)
|
||||
private val bufferedData: FloatArray get() = buffer.sliceArray(0 until bufferWriteOffset)
|
||||
val bufferLengthSeconds: Double get() = bufferWriteOffset.toDouble() / sampleRate
|
||||
|
||||
init {
|
||||
|
@ -74,11 +74,16 @@ class AudioRecorder(context: Context) : Closeable {
|
|||
}
|
||||
|
||||
// Pulls all available data from the audio recorder's buffer
|
||||
fun pullAvailable() {
|
||||
return read(maxBufferSize, AudioRecord.READ_NON_BLOCKING)
|
||||
fun pullAvailable(): FloatArray {
|
||||
read(maxBufferSize, AudioRecord.READ_NON_BLOCKING)
|
||||
|
||||
val result = bufferedData
|
||||
buffer.fill(0.0f, 0, maxBufferSize);
|
||||
bufferWriteOffset = 0
|
||||
return result
|
||||
}
|
||||
|
||||
fun pullNextSeconds(seconds: Double) {
|
||||
fun pullNextSeconds(seconds: Double):FloatArray {
|
||||
val remainingSize = maxBufferSize - bufferWriteOffset
|
||||
val requestedSize = (seconds * sampleRate).toInt()
|
||||
|
||||
|
@ -87,7 +92,8 @@ class AudioRecorder(context: Context) : Closeable {
|
|||
advanceStartBySamples(maxBufferSize / 3)
|
||||
}
|
||||
|
||||
return read(requestedSize, AudioRecord.READ_BLOCKING)
|
||||
read(requestedSize, AudioRecord.READ_BLOCKING)
|
||||
return pullAvailable()
|
||||
}
|
||||
|
||||
override fun close() {
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
package net.cozic.joplin.audio
|
||||
|
||||
import java.io.Closeable
|
||||
|
||||
class NativeWhisperLib(
|
||||
modelPath: String,
|
||||
languageCode: String,
|
||||
prompt: String,
|
||||
) : Closeable {
|
||||
companion object {
|
||||
init {
|
||||
System.loadLibrary("joplin")
|
||||
}
|
||||
|
||||
external fun runTests(): Unit;
|
||||
|
||||
// TODO: The example whisper.cpp project transfers pointers as Longs to the Kotlin code.
|
||||
// This seems unsafe. Try changing how this is managed.
|
||||
private external fun init(modelPath: String, languageCode: String, prompt: String): Long;
|
||||
private external fun free(pointer: Long): Unit;
|
||||
|
||||
private external fun fullTranscribe(pointer: Long, audioData: FloatArray): String;
|
||||
private external fun getPreview(pointer: Long): String;
|
||||
}
|
||||
|
||||
private var closed = false
|
||||
private val pointer: Long = init(modelPath, languageCode, prompt)
|
||||
|
||||
fun transcribe(audioData: FloatArray): String {
|
||||
if (closed) {
|
||||
throw Exception("Cannot transcribe using a closed session")
|
||||
}
|
||||
|
||||
return fullTranscribe(pointer, audioData)
|
||||
}
|
||||
|
||||
fun getPreview(): String {
|
||||
if (closed) {
|
||||
throw Exception("Cannot get preview from a closed session")
|
||||
}
|
||||
|
||||
return getPreview(pointer)
|
||||
}
|
||||
|
||||
override fun close() {
|
||||
if (closed) {
|
||||
throw Exception("Cannot close a whisper session twice")
|
||||
}
|
||||
|
||||
closed = true
|
||||
free(pointer)
|
||||
}
|
||||
|
||||
}
|
|
@ -1,110 +1,33 @@
|
|||
package net.cozic.joplin.audio
|
||||
|
||||
import ai.onnxruntime.OnnxTensor
|
||||
import ai.onnxruntime.OrtEnvironment
|
||||
import ai.onnxruntime.OrtSession
|
||||
import ai.onnxruntime.extensions.OrtxPackage
|
||||
import android.annotation.SuppressLint
|
||||
import android.content.Context
|
||||
import android.util.Log
|
||||
import java.io.Closeable
|
||||
import java.nio.FloatBuffer
|
||||
import java.nio.IntBuffer
|
||||
import kotlin.time.DurationUnit
|
||||
import kotlin.time.measureTimedValue
|
||||
|
||||
class SpeechToTextConverter(
|
||||
modelPath: String,
|
||||
locale: String,
|
||||
prompt: String,
|
||||
recorderFactory: AudioRecorderFactory,
|
||||
private val environment: OrtEnvironment,
|
||||
context: Context,
|
||||
) : Closeable {
|
||||
private val recorder = recorderFactory(context)
|
||||
private val session: OrtSession = environment.createSession(
|
||||
modelPath,
|
||||
OrtSession.SessionOptions().apply {
|
||||
// Needed for audio decoding
|
||||
registerCustomOpLibrary(OrtxPackage.getLibraryPath())
|
||||
},
|
||||
)
|
||||
private val languageCode = Regex("_.*").replace(locale, "")
|
||||
private val decoderInputIds = when (languageCode) {
|
||||
// Add 50363 to the end to omit timestamps
|
||||
"en" -> intArrayOf(50258, 50259, 50359)
|
||||
"fr" -> intArrayOf(50258, 50265, 50359)
|
||||
"es" -> intArrayOf(50258, 50262, 50359)
|
||||
"de" -> intArrayOf(50258, 50261, 50359)
|
||||
"it" -> intArrayOf(50258, 50274, 50359)
|
||||
"nl" -> intArrayOf(50258, 50271, 50359)
|
||||
"ko" -> intArrayOf(50258, 50264, 50359)
|
||||
"th" -> intArrayOf(50258, 50289, 50359)
|
||||
"ru" -> intArrayOf(50258, 50263, 50359)
|
||||
"pt" -> intArrayOf(50258, 50267, 50359)
|
||||
"pl" -> intArrayOf(50258, 50269, 50359)
|
||||
"id" -> intArrayOf(50258, 50275, 50359)
|
||||
"hi" -> intArrayOf(50258, 50276, 50359)
|
||||
// Let Whisper guess the language
|
||||
else -> intArrayOf(50258)
|
||||
}
|
||||
private var whisper = NativeWhisperLib(
|
||||
modelPath,
|
||||
languageCode,
|
||||
prompt,
|
||||
)
|
||||
|
||||
fun start() {
|
||||
recorder.start()
|
||||
}
|
||||
|
||||
private fun getInputs(data: FloatArray): MutableMap<String, OnnxTensor> {
|
||||
fun intTensor(value: Int) = OnnxTensor.createTensor(
|
||||
environment,
|
||||
IntBuffer.wrap(intArrayOf(value)),
|
||||
longArrayOf(1),
|
||||
)
|
||||
fun floatTensor(value: Float) = OnnxTensor.createTensor(
|
||||
environment,
|
||||
FloatBuffer.wrap(floatArrayOf(value)),
|
||||
longArrayOf(1),
|
||||
)
|
||||
val audioPcmTensor = OnnxTensor.createTensor(
|
||||
environment,
|
||||
FloatBuffer.wrap(data),
|
||||
longArrayOf(1, data.size.toLong()),
|
||||
)
|
||||
val decoderInputIdsTensor = OnnxTensor.createTensor(
|
||||
environment,
|
||||
IntBuffer.wrap(decoderInputIds),
|
||||
longArrayOf(1, decoderInputIds.size.toLong())
|
||||
)
|
||||
|
||||
return mutableMapOf(
|
||||
"audio_pcm" to audioPcmTensor,
|
||||
"max_length" to intTensor(412),
|
||||
"min_length" to intTensor(0),
|
||||
"num_return_sequences" to intTensor(1),
|
||||
"num_beams" to intTensor(1),
|
||||
"length_penalty" to floatTensor(1.1f),
|
||||
"repetition_penalty" to floatTensor(3f),
|
||||
"decoder_input_ids" to decoderInputIdsTensor,
|
||||
|
||||
// Required for timestamps
|
||||
"logits_processor" to intTensor(1)
|
||||
)
|
||||
}
|
||||
|
||||
// TODO .get() fails on older Android versions
|
||||
@SuppressLint("NewApi")
|
||||
private fun convert(data: FloatArray): String {
|
||||
val (inputs, convertInputsTime) = measureTimedValue {
|
||||
getInputs(data)
|
||||
}
|
||||
val (outputs, getOutputsTime) = measureTimedValue {
|
||||
session.run(inputs, setOf("str"))
|
||||
}
|
||||
val mainOutput = outputs.get("str").get().value as Array<Array<String>>
|
||||
outputs.close()
|
||||
|
||||
Log.i("Whisper", "Converted ${data.size / 16000}s of data in ${
|
||||
getOutputsTime.toString(DurationUnit.SECONDS, 2)
|
||||
} converted inputs in ${convertInputsTime.inWholeMilliseconds}ms")
|
||||
return mainOutput[0][0]
|
||||
Log.d("Whisper", "Pre-transcribe data of size ${data.size}")
|
||||
val result = whisper.transcribe(data)
|
||||
Log.d("Whisper", "Post transcribe. Got $result")
|
||||
return result;
|
||||
}
|
||||
|
||||
fun dropFirstSeconds(seconds: Double) {
|
||||
|
@ -114,23 +37,26 @@ class SpeechToTextConverter(
|
|||
|
||||
val bufferLengthSeconds: Double get() = recorder.bufferLengthSeconds
|
||||
|
||||
fun expandBufferAndConvert(seconds: Double): String {
|
||||
recorder.pullNextSeconds(seconds)
|
||||
// Also pull any extra available data, in case the speech-to-text converter
|
||||
// is lagging behind the audio recorder.
|
||||
recorder.pullAvailable()
|
||||
|
||||
return convert(recorder.bufferedData)
|
||||
fun convertNext(seconds: Double): String {
|
||||
val buffer = recorder.pullNextSeconds(seconds)
|
||||
val result = convert(buffer)
|
||||
dropFirstSeconds(seconds)
|
||||
return result
|
||||
}
|
||||
|
||||
// Converts as many seconds of buffered data as possible, without waiting
|
||||
fun expandBufferAndConvert(): String {
|
||||
recorder.pullAvailable()
|
||||
return convert(recorder.bufferedData)
|
||||
fun convertRemaining(): String {
|
||||
val buffer = recorder.pullAvailable()
|
||||
return convert(buffer)
|
||||
}
|
||||
|
||||
fun getPreview(): String {
|
||||
return whisper.getPreview()
|
||||
}
|
||||
|
||||
override fun close() {
|
||||
Log.d("Whisper", "Close")
|
||||
recorder.close()
|
||||
session.close()
|
||||
whisper.close()
|
||||
}
|
||||
}
|
|
@ -1,6 +1,5 @@
|
|||
package net.cozic.joplin.audio
|
||||
|
||||
import ai.onnxruntime.OrtEnvironment
|
||||
import com.facebook.react.ReactPackage
|
||||
import com.facebook.react.bridge.LifecycleEventListener
|
||||
import com.facebook.react.bridge.NativeModule
|
||||
|
@ -24,7 +23,6 @@ class SpeechToTextPackage : ReactPackage {
|
|||
class SpeechToTextModule(
|
||||
private var context: ReactApplicationContext,
|
||||
) : ReactContextBaseJavaModule(context), LifecycleEventListener {
|
||||
private var environment: OrtEnvironment? = null
|
||||
private val executorService: ExecutorService = Executors.newFixedThreadPool(1)
|
||||
private val sessionManager = SpeechToTextSessionManager(executorService)
|
||||
|
||||
|
@ -32,21 +30,24 @@ class SpeechToTextPackage : ReactPackage {
|
|||
|
||||
override fun onHostResume() { }
|
||||
override fun onHostPause() { }
|
||||
override fun onHostDestroy() {
|
||||
environment?.close()
|
||||
override fun onHostDestroy() { }
|
||||
|
||||
@ReactMethod
|
||||
fun runTests(promise: Promise) {
|
||||
try {
|
||||
NativeWhisperLib.runTests()
|
||||
promise.resolve(true)
|
||||
} catch (exception: Throwable) {
|
||||
promise.reject(exception)
|
||||
}
|
||||
}
|
||||
|
||||
@ReactMethod
|
||||
fun openSession(modelPath: String, locale: String, promise: Promise) {
|
||||
fun openSession(modelPath: String, locale: String, prompt: String, promise: Promise) {
|
||||
val appContext = context.applicationContext
|
||||
// Initialize environment as late as possible:
|
||||
val ortEnvironment = environment ?: OrtEnvironment.getEnvironment()
|
||||
if (environment != null) {
|
||||
environment = ortEnvironment
|
||||
}
|
||||
|
||||
try {
|
||||
val sessionId = sessionManager.openSession(modelPath, locale, ortEnvironment, appContext)
|
||||
val sessionId = sessionManager.openSession(modelPath, locale, prompt, appContext)
|
||||
promise.resolve(sessionId)
|
||||
} catch (exception: Throwable) {
|
||||
promise.reject(exception)
|
||||
|
@ -69,8 +70,8 @@ class SpeechToTextPackage : ReactPackage {
|
|||
}
|
||||
|
||||
@ReactMethod
|
||||
fun expandBufferAndConvert(sessionId: Int, duration: Double, promise: Promise) {
|
||||
sessionManager.expandBufferAndConvert(sessionId, duration, promise)
|
||||
fun convertNext(sessionId: Int, duration: Double, promise: Promise) {
|
||||
sessionManager.convertNext(sessionId, duration, promise)
|
||||
}
|
||||
|
||||
@ReactMethod
|
||||
|
@ -78,6 +79,11 @@ class SpeechToTextPackage : ReactPackage {
|
|||
sessionManager.convertAvailable(sessionId, promise)
|
||||
}
|
||||
|
||||
@ReactMethod
|
||||
fun getPreview(sessionId: Int, promise: Promise) {
|
||||
sessionManager.getPreview(sessionId, promise)
|
||||
}
|
||||
|
||||
@ReactMethod
|
||||
fun closeSession(sessionId: Int, promise: Promise) {
|
||||
sessionManager.closeSession(sessionId, promise)
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
package net.cozic.joplin.audio
|
||||
|
||||
import ai.onnxruntime.OrtEnvironment
|
||||
import android.content.Context
|
||||
import com.facebook.react.bridge.Promise
|
||||
import java.util.concurrent.Executor
|
||||
|
@ -21,13 +20,13 @@ class SpeechToTextSessionManager(
|
|||
fun openSession(
|
||||
modelPath: String,
|
||||
locale: String,
|
||||
environment: OrtEnvironment,
|
||||
prompt: String,
|
||||
context: Context,
|
||||
): Int {
|
||||
val sessionId = nextSessionId++
|
||||
sessions[sessionId] = SpeechToTextSession(
|
||||
SpeechToTextConverter(
|
||||
modelPath, locale, recorderFactory = AudioRecorder.factory, environment, context,
|
||||
modelPath, locale, prompt, recorderFactory = AudioRecorder.factory, context,
|
||||
)
|
||||
)
|
||||
return sessionId
|
||||
|
@ -87,9 +86,9 @@ class SpeechToTextSessionManager(
|
|||
}
|
||||
|
||||
// Waits for the next [duration] seconds to become available, then converts
|
||||
fun expandBufferAndConvert(sessionId: Int, duration: Double, promise: Promise) {
|
||||
fun convertNext(sessionId: Int, duration: Double, promise: Promise) {
|
||||
this.concurrentWithSession(sessionId, promise::reject) { session ->
|
||||
val result = session.converter.expandBufferAndConvert(duration)
|
||||
val result = session.converter.convertNext(duration)
|
||||
promise.resolve(result)
|
||||
}
|
||||
}
|
||||
|
@ -97,7 +96,14 @@ class SpeechToTextSessionManager(
|
|||
// Converts all available recorded data
|
||||
fun convertAvailable(sessionId: Int, promise: Promise) {
|
||||
this.concurrentWithSession(sessionId, promise::reject) { session ->
|
||||
val result = session.converter.expandBufferAndConvert()
|
||||
val result = session.converter.convertRemaining()
|
||||
promise.resolve(result)
|
||||
}
|
||||
}
|
||||
|
||||
fun getPreview(sessionId: Int, promise: Promise) {
|
||||
this.concurrentWithSession(sessionId, promise::reject) { session ->
|
||||
val result = session.converter.getPreview()
|
||||
promise.resolve(result)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
whisper.cpp/.gitmodules
|
||||
whisper.cpp/scripts/
|
||||
whisper.cpp/samples/
|
||||
whisper.cpp/tests/
|
||||
whisper.cpp/models/
|
||||
whisper.cpp/examples/
|
||||
whisper.cpp/.*/
|
||||
whisper.cpp/bindings/
|
||||
whisper.cpp/**/*.Dockerfile
|
|
@ -0,0 +1,7 @@
|
|||
# Vendored Android packages
|
||||
|
||||
This directory contains upstream packages that can't be added as direct dependencies (e.g. through `npm`).
|
||||
|
||||
## whisper.cpp
|
||||
|
||||
`whisper.cpp` provides voice typing capabilities. It can be updated by replacing the contents of the `whisper.cpp` directory with the latest content from https://github.com/ggerganov/whisper.cpp. To decrease the size of the `whisper.cpp` directory, some files are ignored by the `.gitignore`.
|
|
@ -0,0 +1,60 @@
|
|||
*.o
|
||||
*.a
|
||||
*.d
|
||||
.cache/
|
||||
.coreml/
|
||||
.test/
|
||||
.venv/
|
||||
.vs/
|
||||
.vscode/
|
||||
.DS_Store
|
||||
.vimspector.json
|
||||
/CMakeSettings.json
|
||||
/talk-llama.dSYM/
|
||||
|
||||
build/
|
||||
build-*/
|
||||
|
||||
# SPM
|
||||
.build/
|
||||
.swiftpm
|
||||
*.metallib
|
||||
|
||||
ggml-metal-embed.metal
|
||||
ggml-metal-embed.metal.tmp
|
||||
|
||||
/main
|
||||
/stream
|
||||
/command
|
||||
/talk
|
||||
/talk-llama
|
||||
/bench
|
||||
/quantize
|
||||
/server
|
||||
/lsp
|
||||
|
||||
arm_neon.h
|
||||
sync.sh
|
||||
libwhisper.a
|
||||
libwhisper.so
|
||||
compile_commands.json
|
||||
|
||||
examples/arm_neon.h
|
||||
examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata
|
||||
examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
|
||||
examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata
|
||||
|
||||
extra/bench-gg.txt
|
||||
|
||||
models/*.mlmodel
|
||||
models/*.mlmodelc
|
||||
models/*.mlpackage
|
||||
bindings/java/.gradle/
|
||||
bindings/java/.idea/
|
||||
.idea/
|
||||
|
||||
benchmark_results.csv
|
||||
cmake-build-debug/
|
||||
.cxx/
|
||||
.gradle/
|
||||
local.properties
|
|
@ -0,0 +1,510 @@
|
|||
# date: Tue Feb 4 13:03:35 EET 2025
|
||||
# this file is auto-generated by scripts/gen-authors.sh
|
||||
|
||||
0/0 <zero@imaskeleton.me>
|
||||
0cc4m <picard12@live.de>
|
||||
0xsourcecode <134374803+0xsourcecode@users.noreply.github.com>
|
||||
65a <10104049+65a@users.noreply.github.com>
|
||||
AIWintermuteAI <32562299+AIWintermuteAI@users.noreply.github.com>
|
||||
AT <manyoso@users.noreply.github.com>
|
||||
Aarni Koskela <akx@iki.fi>
|
||||
Aaron Pham <29749331+aarnphm@users.noreply.github.com>
|
||||
Aaron Taylor <aaron@exphat.com>
|
||||
Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
|
||||
Abitofevrything <54505189+abitofevrything@users.noreply.github.com>
|
||||
Adam Jones <domdomegg+git@gmail.com>
|
||||
Adrien Gallouët <adrien@gallouet.fr>
|
||||
Adrien Gallouët <angt@huggingface.co>
|
||||
AfryMask <AfryMask@163.com>
|
||||
Ahmad Bilal <ahmad.bilal@empglabs.com>
|
||||
Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com>
|
||||
AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
|
||||
AidanBeltonS <aidan.belton@codeplay.com>
|
||||
Akarshan Biswas <akarshan.biswas@gmail.com>
|
||||
Akarshan Biswas <akarshanbiswas@fedoraproject.org>
|
||||
Akash Mahajan <akash7190@gmail.com>
|
||||
Akash Mahajan <akashmjn@stanford.edu>
|
||||
Al Hoang <3811822-hoanga@users.noreply.gitlab.com>
|
||||
Alan <unknown>
|
||||
Albert Jin <albert.jin@gmail.com>
|
||||
Alberto Cabrera Pérez <alberto.cabrera@codeplay.com>
|
||||
Alberto Cabrera Pérez <alberto.cabrera@intel.com>
|
||||
Aleksander Andrzejewski <18704749+aleksanderandrzejewski@users.noreply.github.com>
|
||||
Alex Azarov <alex@azarov.by>
|
||||
Alex Bacart <13940752+alex-bacart@users.noreply.github.com>
|
||||
Alex Evgrashin <aevgrashin@yandex.ru>
|
||||
Alex O'Connell <35843486+acon96@users.noreply.github.com>
|
||||
Alexandr Graschenkov <alexandr.graschenkov91@gmail.com>
|
||||
Alexandru Mariuti <alex@mariuti.com>
|
||||
Alexey Kharlamov <alexey@kharlamov.biz>
|
||||
Alfredo Montesinos <alfredo.montesinos@g.austincc.edu>
|
||||
Ali Alameh <ali.alameh@isae.edu.lb>
|
||||
Alter <0x7c48@gmail.com>
|
||||
Ananta Bastola <anantarajbastola@gmail.com>
|
||||
Andreas Kieslinger <47689530+aendk@users.noreply.github.com>
|
||||
Andreas Lubbe <git@lubbe.org>
|
||||
Andreu Huguet <andreuhuguet@gmail.com>
|
||||
Andrew Huynh <a5thuynh@gmail.com>
|
||||
Andrew Minh Nguyen <40281306+amqdn@users.noreply.github.com>
|
||||
Andrew S <andrews54757@gmail.com>
|
||||
Andy Maloney <asmaloney@gmail.com>
|
||||
Anton Kostin <masguit42@users.noreply.github.com>
|
||||
Artyom Mezin <psycho.fading@gmail.com>
|
||||
Asad Memon <asad.lionpk@gmail.com>
|
||||
Ashraful Islam <ashraful.meche@gmail.com>
|
||||
AsukaMinato <asukaminato@nyan.eu.org>
|
||||
AustinMroz <austinmroz@utexas.edu>
|
||||
Avik Sengupta <avik@sengupta.net>
|
||||
Bader-eddine Ouaich <49657842+baderouaich@users.noreply.github.com>
|
||||
Baffin Lee <baffinlee@gmail.com>
|
||||
Ben Ashbaugh <ben.ashbaugh@intel.com>
|
||||
Ben Nortier <bjnortier@gmail.com>
|
||||
Benjamin Heiniger <benjamin.heiniger@bluewin.ch>
|
||||
Bernhard M. Wiedemann <githubbmwprimary@lsmod.de>
|
||||
Binozo <70137898+Binozo@users.noreply.github.com>
|
||||
Bo-Yi Wu <appleboy.tw@gmail.com>
|
||||
Boris Bliznioukov <blib@mail.com>
|
||||
Borislav Stanimirov <b.stanimirov@abv.bg>
|
||||
Brad Murray <59848399+bradmurray-dt@users.noreply.github.com>
|
||||
Brian Murray <brian@bmurray.ca>
|
||||
CRD716 <crd716@gmail.com>
|
||||
Canis Lupus <Canis-UK@users.noreply.github.com>
|
||||
Carlos Zoido <mrgalleta@gmail.com>
|
||||
Carolinabanana <140120812+Carolinabanana@users.noreply.github.com>
|
||||
CarterLi999 <664681047@qq.com>
|
||||
ChangSeok Oh <shivamidow@users.noreply.github.com>
|
||||
Changyeon Kim <cyzero.kim@samsung.com>
|
||||
Chaoqun <27287694+OpenWaygate@users.noreply.github.com>
|
||||
Charles Xu <63788048+chaxu01@users.noreply.github.com>
|
||||
Charles Xu <charles.xu@arm.com>
|
||||
Chen Xi <xi2.chen@intel.com>
|
||||
Chen Xi <xixichen08@foxmail.com>
|
||||
Chenguang Li <87689256+noemotiovon@users.noreply.github.com>
|
||||
Chia-Hsiang Cheng <88014292+garychia@users.noreply.github.com>
|
||||
Chidi Williams <williamschidi1@gmail.com>
|
||||
Chris Elrod <elrodc@gmail.com>
|
||||
Christian <12550267+iceychris@users.noreply.github.com>
|
||||
Christian Kastner <ckk@kvr.at>
|
||||
Clifford Heath <clifford.heath@gmail.com>
|
||||
Clint Herron <hanclinto@gmail.com>
|
||||
Colin <github@whoisc.cc>
|
||||
Conrad Kramer <conrad@conradkramer.com>
|
||||
Corey Earwood <iamcgn+github@gmail.com>
|
||||
CrispStrobe <154636388+CrispStrobe@users.noreply.github.com>
|
||||
DAN™ <dranger003@gmail.com>
|
||||
DGdev91 <DGdev91@users.noreply.github.com>
|
||||
Damian Czaja <trojan295@protonmail.com>
|
||||
Dan Johansson <164997844+eddnjjn@users.noreply.github.com>
|
||||
Dan Johansson <dan.johansson@arm.com>
|
||||
Daniel Bevenius <daniel.bevenius@gmail.com>
|
||||
Daniel Valdivia <18384552+dvaldivia@users.noreply.github.com>
|
||||
Daniel Ziegenberg <daniel@ziegenberg.at>
|
||||
Daniele <57776841+daniandtheweb@users.noreply.github.com>
|
||||
Dave <dave-fl@users.noreply.github.com>
|
||||
Dave Airlie <airlied@gmail.com>
|
||||
Dave Airlie <airlied@redhat.com>
|
||||
Daven Sanassy <daven@vochlea.co.uk>
|
||||
David <dnhkng@gmail.com>
|
||||
David Thorpe <djt@mutablelogic.com>
|
||||
DavidKorczynski <david@adalogics.com>
|
||||
Davidson Francis <davidsondfgl@gmail.com>
|
||||
Dener Stassun <denerstassun@gmail.com>
|
||||
Dibakar Gope <dibakar.gope@arm.com>
|
||||
Didzis Gosko <didzis@users.noreply.github.com>
|
||||
Diego Devesa <slarengh@gmail.com>
|
||||
Digipom <admin@digipom.com>
|
||||
Dimo <dimo@ieee.org>
|
||||
Djip007 <3705339+Djip007@users.noreply.github.com>
|
||||
Djip007 <djip.perois@free.fr>
|
||||
Dody Suria Wijaya <dodysw@gmail.com>
|
||||
Dou Xinpeng <15529241576@163.com>
|
||||
Dou Xinpeng <81913537+Dou-Git@users.noreply.github.com>
|
||||
Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
|
||||
Duncan McConnell <ddmcconnell4@gmail.com>
|
||||
Egor Egorov <me@egorfine.com>
|
||||
Elkana Bardugo <ttv200@gmail.com>
|
||||
Emmanuel Schmidbauer <eschmidbauer@gmail.com>
|
||||
Engininja2 <139037756+Engininja2@users.noreply.github.com>
|
||||
Eric Curtin <ericcurtin17@gmail.com>
|
||||
Eric Swanson <eswanson@alloscomp.com>
|
||||
Eric Tendian <erictendian@gmail.com>
|
||||
Eric Zhang <34133756+EZForever@users.noreply.github.com>
|
||||
Erik Scholz <Green-Sky@users.noreply.github.com>
|
||||
Evan Jones <evan.q.jones@gmail.com>
|
||||
Evan Martin <evan.martin@gmail.com>
|
||||
Eve <139727413+netrunnereve@users.noreply.github.com>
|
||||
Evgeny Kuznetsov <evgeny@kuznetsov.md>
|
||||
F1L1P <78918286+F1L1Pv2@users.noreply.github.com>
|
||||
Faisal Zaghloul <quic_fzaghlou@quicinc.com>
|
||||
Fangjun Kuang <csukuangfj@gmail.com>
|
||||
Felix <stenbackfelix@gmail.com>
|
||||
Finn Voorhees <finnvoorhees@gmail.com>
|
||||
FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com>
|
||||
FlippFuzz <41221030+FlippFuzz@users.noreply.github.com>
|
||||
Frankie Robertson <frankier@users.noreply.github.com>
|
||||
Gang Chen <goncha@gmail.com>
|
||||
Gavin Cai <gavin1818@hotmail.com>
|
||||
George Hindle <george@georgehindle.com>
|
||||
Georgi Gerganov <ggerganov@gmail.com>
|
||||
Gilad S <7817232+giladgd@users.noreply.github.com>
|
||||
Gilad S <giladgd@users.noreply.github.com>
|
||||
Gilad S. <7817232+giladgd@users.noreply.github.com>
|
||||
GitAritron <103900385+GitAritron@users.noreply.github.com>
|
||||
GiviMAD <GiviMAD@users.noreply.github.com>
|
||||
Gleicon Moraes <gleicon@gmail.com>
|
||||
Gregor Jasny <gjasny@googlemail.com>
|
||||
Guillaume Wenzek <gwenzek@users.noreply.github.com>
|
||||
HY. Kelvin Lee <34256578+hykelvinlee42@users.noreply.github.com>
|
||||
Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
|
||||
Hang <bebound@gmail.com>
|
||||
Haus1 <haus.xda@gmail.com>
|
||||
Herman Semenov <GermanAizek@yandex.ru>
|
||||
HimariO <dsfhe49854@gmail.com>
|
||||
Hong Bo PENG <penghb@cn.ibm.com>
|
||||
Hrishikesh Barman <geekodour@users.noreply.github.com>
|
||||
Hugo <hugo@whynothugo.nl>
|
||||
Ian Bicking <ian@ianbicking.org>
|
||||
Ian Bull <irbull@eclipsesource.com>
|
||||
Ihar Hrachyshka <ihrachys@redhat.com>
|
||||
Ikko Ashimine <eltociear@gmail.com>
|
||||
Ikko Eltociear Ashimine <eltociear@gmail.com>
|
||||
InconsolableCellist <23345188+InconsolableCellist@users.noreply.github.com>
|
||||
Ismatulla Mansurov <47342870+sapoepsilon@users.noreply.github.com>
|
||||
Ivan <nekotekina@gmail.com>
|
||||
Ivan Filipov <159561759+vanaka11@users.noreply.github.com>
|
||||
Ivan Gorin <ivangorin21@gmail.com>
|
||||
Ivo von Putzer Reibegg <ivo.putzer@gmail.com>
|
||||
JJ <103335846+computerscienceiscool@users.noreply.github.com>
|
||||
Jack Mousseau <jmousseau@users.noreply.github.com>
|
||||
JacobLinCool <jacoblincool@gmail.com>
|
||||
Jakub Ráček <blizzcz@gmail.com>
|
||||
Jared Van Bortel <jared@nomic.ai>
|
||||
Jay Binks <jaybinks@gmail.com>
|
||||
Jayant <jayantyadav202@gmail.com>
|
||||
Jeff Bolz <jbolz@nvidia.com>
|
||||
Jeroen Mostert <jeroen.mostert@cm.com>
|
||||
Jhen-Jie Hong <developer@jhen.me>
|
||||
Jhen-Jie Hong <iainst0409@gmail.com>
|
||||
JidongZhang-THU <1119708529@qq.com>
|
||||
Jo Liss <joliss42@gmail.com>
|
||||
Joe Todd <joe.todd@codeplay.com>
|
||||
Johan <jr.raffin@gmail.com>
|
||||
Johannes Gäßler <johannesg@5d6.de>
|
||||
John Balis <phobossystems@gmail.com>
|
||||
JohnnyB <jboero@users.noreply.github.com>
|
||||
Jonathan Soo <jcsoo@agora.com>
|
||||
Jonno <1160532+razodactyl@users.noreply.github.com>
|
||||
Joonas Pihlajamaa <joonas.pihlajamaa@iki.fi>
|
||||
Jose <34888496+Jerry-Master@users.noreply.github.com>
|
||||
Josh Bleecher Snyder <josharian@gmail.com>
|
||||
Josscii <jossciiweiyi@gmail.com>
|
||||
Judd <foldl@users.noreply.github.com>
|
||||
Jumper775 <78500318+jumpers775@users.noreply.github.com>
|
||||
Jun Hee Yoo <contact.jhyoo@gmail.com>
|
||||
Junil Kim <logyourself@gmail.com>
|
||||
Justina Cho <justcho5@gmail.com>
|
||||
Justine Tunney <jtunney@gmail.com>
|
||||
Justine Tunney <jtunney@mozilla.com>
|
||||
KITAITI Makoto <KitaitiMakoto@gmail.com>
|
||||
KP Kaiser <kirk@zothcorp.com>
|
||||
Kamilake <exjang0@gmail.com>
|
||||
Karol Kontny <82021046+kkontny@users.noreply.github.com>
|
||||
Karthick <j.karthic2004@gmail.com>
|
||||
Kartik Saranathan <278928+Kartiku@users.noreply.github.com>
|
||||
Kasumi <90275229+kasumi-1@users.noreply.github.com>
|
||||
Kawrakow <48489457+ikawrakow@users.noreply.github.com>
|
||||
Kendrick Taylor <kendrick@circuitsix.com>
|
||||
Kevin Brothaler <admin@digipom.com>
|
||||
Kevin Gibbons <bakkot@gmail.com>
|
||||
Konosuke Sakai <konosuke@konosuke.work>
|
||||
Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>
|
||||
Kreijstal <rainb@tfwno.gf>
|
||||
Kylin <56434533+KyL0N@users.noreply.github.com>
|
||||
LBlue <153975653+lbluep@users.noreply.github.com>
|
||||
Larry Battle <larry.battle.tech@gmail.com>
|
||||
Laytan Laats <laytanlaats@hotmail.com>
|
||||
Leo Moll <leo.moll@yeasoft.com>
|
||||
Lexevolution <31176843+Lexevolution@users.noreply.github.com>
|
||||
LittleLoli <26589867+WhichWho@users.noreply.github.com>
|
||||
Lucas Zanek <57494138+LucasZNK@users.noreply.github.com>
|
||||
Luis Herrera <herrera-luis@users.noreply.github.com>
|
||||
Lukas Rist <glaslos@gmail.com>
|
||||
M. A. Ali <73258591+MightyStud@users.noreply.github.com>
|
||||
M. Eren Akbiyik <erenakbiyik@gmail.com>
|
||||
Ma Mingfei <mingfei.ma@intel.com>
|
||||
Maciek <maciek.mab122@gmail.com>
|
||||
Mahesh Madhav <67384846+heshpdx@users.noreply.github.com>
|
||||
Marcin Mielniczuk <marmistrz.dev@zoho.eu>
|
||||
Mark Karpelès <MagicalTux@users.noreply.github.com>
|
||||
Mark Zhuang <zhuangqiubin@gmail.com>
|
||||
Markus Tavenrath <mtavenrath@users.noreply.github.com>
|
||||
Martin Delille <martin@delille.org>
|
||||
Martin Warnaar <martinwarnaar@gmail.com>
|
||||
Masaya, Kato <62578291+msy-kato@users.noreply.github.com>
|
||||
Matheus de Sousa <23645013+keyehzy@users.noreply.github.com>
|
||||
Mathieu Baudier <mbaudier@argeo.org>
|
||||
Mathijs de Bruin <mathijs@mathijsfietst.nl>
|
||||
Matija Pevec <mightymatth@users.noreply.github.com>
|
||||
Matt Stephenson <mstephenson6@users.noreply.github.com>
|
||||
Max Krasnyansky <max.krasnyansky@gmail.com>
|
||||
Max Krasnyansky <quic_maxk@quicinc.com>
|
||||
Maximiliano Levi <8160966+maxilevi@users.noreply.github.com>
|
||||
Meng, Hengyu <hengyu.meng@intel.com>
|
||||
Mengqing Cao <cmq0113@163.com>
|
||||
Michael Podvitskiy <podvitskiymichael@gmail.com>
|
||||
Michael Rienstra <mrienstra@gmail.com>
|
||||
Mikhail Grigorev <sleuthhound@gmail.com>
|
||||
Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
|
||||
Mohit Agarwal <mohit@sdf.org>
|
||||
Molly Sophia <mollysophia379@gmail.com>
|
||||
Murilo Santana <mvrilo@gmail.com>
|
||||
NETZkultur GmbH <mulholland@netzkultur.de>
|
||||
Natsu <chino@hotococoa.moe>
|
||||
Neil Chudleigh <nchudleigh@users.noreply.github.com>
|
||||
Neo Zhang <14088817+arthw@users.noreply.github.com>
|
||||
Neo Zhang Jianyu <jianyu.zhang@intel.com>
|
||||
Neuman Vong <neuman.vong@gmail.com>
|
||||
Nicholai Tukanov <nicholaitukanov@gmail.com>
|
||||
Nicholas Albion <nalbion@yahoo.com>
|
||||
Nico Bosshard <nico@bosshome.ch>
|
||||
Nicolò Scipione <nicolo.scipione@codeplay.com>
|
||||
Niels Mayer <Niels.Mayer@gmail.com>
|
||||
Nikita Sarychev <42014488+sARY77@users.noreply.github.com>
|
||||
Nikolaj Olsson <nikse.dk@gmail.com>
|
||||
Okabintaro <103938900+Okabintaro@users.noreply.github.com>
|
||||
Oleg Sidorov <me@whitebox.io>
|
||||
Oleg Sidorov <oleg@sidorov.nl>
|
||||
Olivier Chafik <ochafik@users.noreply.github.com>
|
||||
Ondrej Kokes <ondrej.kokes@gmail.com>
|
||||
Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
|
||||
PAB <pierreantoine.bannier@gmail.com>
|
||||
Paul Tsochantaris <ptsochantaris@icloud.com>
|
||||
Pedro Probst <pprobst@insiberia.net>
|
||||
Peng <hzp1024@qq.com>
|
||||
Peter <peter277@users.noreply.github.com>
|
||||
Philipp Zabel <philipp.zabel@gmail.com>
|
||||
Philippe Normand <phil@base-art.net>
|
||||
Philippe Normand <philn@igalia.com>
|
||||
Plamen Minev <pacominev@gmail.com>
|
||||
Prashant Vithule <119530321+Vithulep@users.noreply.github.com>
|
||||
Przemysław Pawełczyk <przemoc@gmail.com>
|
||||
Qianhe Chen <54462604+chenqianhe@users.noreply.github.com>
|
||||
R0CKSTAR <xiaodong.ye@mthreads.com>
|
||||
R0CKSTAR <yeahdongcn@gmail.com>
|
||||
Radoslav Gerganov <rgerganov@gmail.com>
|
||||
Radosław Gryta <radek.gryta@gmail.com>
|
||||
Rahul Vadhyar <107788610+RahulVadhyar@users.noreply.github.com>
|
||||
Raiya Araki <83504221+rai62@users.noreply.github.com>
|
||||
Reinforce-II <fate@eastal.com>
|
||||
Reinis Muiznieks <muiznieks.reinis@gmail.com>
|
||||
RelatedTitle <r3latedtitle@gmail.com>
|
||||
Rémy Oudompheng <oudomphe@phare.normalesup.org>
|
||||
RhinoDevel <RhinoDevel@users.noreply.github.com>
|
||||
Rich Jones <miserlou@gmail.com>
|
||||
Robert Ormandi <52251610+ormandi@users.noreply.github.com>
|
||||
Robin <robin.xw@hotmail.com>
|
||||
Roddur Dasgupta <roddurd@gmail.com>
|
||||
Roland Rabien <figbug@gmail.com>
|
||||
Romain Biessy <romain.biessy@codeplay.com>
|
||||
Ronsor <ronsor@ronsor.pw>
|
||||
Rotem Dan <rotemdan@gmail.com>
|
||||
Ryan Hitchman <hitchmanr@gmail.com>
|
||||
Ryan Metcalfe <107415876+RyanMetcalfeInt8@users.noreply.github.com>
|
||||
RyanChang <ftes90015@gmail.com>
|
||||
SRHMorris <69468379+SRHMorris@users.noreply.github.com>
|
||||
SXX <sxx1136965276@gmail.com>
|
||||
Sacha Arbonel <sacha.arbonel@hotmail.fr>
|
||||
Salman Faroz <stsfaroz@gmail.com>
|
||||
Salvatore Mesoraca <s.mesoraca16@gmail.com>
|
||||
Sam <49637763+Onlyartist9@users.noreply.github.com>
|
||||
Sam Pullara <spullara@gmail.com>
|
||||
Samuel Durante <44513615+samueldurantes@users.noreply.github.com>
|
||||
Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>
|
||||
Sandro Hanea <40202887+sandrohanea@users.noreply.github.com>
|
||||
Sergio López <slp@redhat.com>
|
||||
Sergio López <slp@sinrega.org>
|
||||
Shanshan Shen <467638484@qq.com>
|
||||
Shijie <821898965@qq.com>
|
||||
Shupei Fan <dymarkfan@outlook.com>
|
||||
Siddharth Ramakrishnan <srr2141@columbia.edu>
|
||||
Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
|
||||
Simon Moisselin <simon.moisstoll@gmail.com>
|
||||
Sindre Sorhus <sindresorhus@gmail.com>
|
||||
Slava Primenko <primenko.s@gmail.com>
|
||||
Srihari-mcw <96763064+Srihari-mcw@users.noreply.github.com>
|
||||
Stavros Panakakis <53979866+Stavrospanakakis@users.noreply.github.com>
|
||||
Stefan Sydow <s.sydow@heinlein-video.de>
|
||||
Stefan Sydow <stefan@sydow.email>
|
||||
Syahmi Azhar <prsyahmi@gmail.com>
|
||||
Syed Jafri <syedjafri97@gmail.com>
|
||||
Sơn Phan Trung <phantrungson17@gmail.com>
|
||||
Taisei Mima <bhbstar.me@gmail.com>
|
||||
Takeshi Inoue <inoue.takeshi@gmail.com>
|
||||
Tamotsu Takahashi <ttakah+github@gmail.com>
|
||||
Taras Glek <taras@thegp.com>
|
||||
Tauseef Mohiuddin <35351464+tauseefmohammed2@users.noreply.github.com>
|
||||
Thamster <Thamster@users.noreply.github.com>
|
||||
Thijs Raymakers <thijs@raymakers.nl>
|
||||
Thomas Fitzsimmons <fitzsim@fitzsim.org>
|
||||
Tiago Fassoni <tiagofassoni@users.noreply.github.com>
|
||||
Tienshiao Ma <tienshiao@tienshiao.org>
|
||||
Tim Miller <drasticactions@users.noreply.github.com>
|
||||
Timothy Cronin <40186632+4imothy@users.noreply.github.com>
|
||||
Tobrun <tobrun.van.nuland@gmail.com>
|
||||
Todd <taf2@users.noreply.github.com>
|
||||
Toliver <teejae@gmail.com>
|
||||
Tong Li <31761981+litongjava@users.noreply.github.com>
|
||||
Tony Wasserka <4840017+neobrain@users.noreply.github.com>
|
||||
Topping1 <78745143+Topping1@users.noreply.github.com>
|
||||
Travis Cline <travis.cline@gmail.com>
|
||||
UEXTM.com <84163508+uextm@users.noreply.github.com>
|
||||
UsernamesLame <156965854+UsernamesLame@users.noreply.github.com>
|
||||
Vadim Peretokin <vperetokin@hey.com>
|
||||
Valentin Gosu <1454649+valenting@users.noreply.github.com>
|
||||
Vin Misra <vinith@alum.mit.edu>
|
||||
Vulcan <93451215+trholding@users.noreply.github.com>
|
||||
WhiteOlivierus <36532695+WhiteOlivierus@users.noreply.github.com>
|
||||
William Tambellini <william.tambellini@gmail.com>
|
||||
William Tambellini <wtambellini@sdl.com>
|
||||
Wilson Silva <wilson.dsigns@gmail.com>
|
||||
Xiang (Kevin) Li <kevinli020508@gmail.com>
|
||||
Xiao-Yong Jin <jinxiaoyong@gmail.com>
|
||||
XiaotaoChen <chenxiaotao1234@gmail.com>
|
||||
Xingchen Song(宋星辰) <xingchensong1996@163.com>
|
||||
Xinpeng Dou <81913537+Dou-Git@users.noreply.github.com>
|
||||
Xuan Son Nguyen <thichthat@gmail.com>
|
||||
Yajing Tang <phillis@google.com>
|
||||
Yang Shen <aplshenyang@gmail.com>
|
||||
Yunès <jean.baptiste.yunes@free.fr>
|
||||
Yuri Khrustalev <ykhrustalev@users.noreply.github.com>
|
||||
Yusuf Redžić <48274562+redzic@users.noreply.github.com>
|
||||
ZaBlazzingZephyrus <119159668+blazingzephyr@users.noreply.github.com>
|
||||
Zhenwei Jin <109658203+kylo5aby@users.noreply.github.com>
|
||||
Zhiyuan Li <lizhiyuan@uniartisan.com>
|
||||
Zhiyuan Li <uniartisan2017@gmail.com>
|
||||
Zigfrid Zvezdin <ziggerZZ@gmail.com>
|
||||
Zollner <24618122+Zolliner@users.noreply.github.com>
|
||||
a3sh <38979186+A3shTnT@users.noreply.github.com>
|
||||
ag2s20150909 <19373730+ag2s20150909@users.noreply.github.com>
|
||||
agray3 <agray3@users.noreply.github.com>
|
||||
ai-at-home <149282006+ai-at-home@users.noreply.github.com>
|
||||
aldorof <aldorof@users.noreply.github.com>
|
||||
alonfaraj <alonfaraj@gmail.com>
|
||||
amd-dwang <dong.wang@amd.com>
|
||||
amritahs-ibm <amritahs@linux.vnet.ibm.com>
|
||||
andypayne <apayne@gmail.com>
|
||||
ardfork <134447697+ardfork@users.noreply.github.com>
|
||||
arizhih <40765267+arizhih@users.noreply.github.com>
|
||||
automaticcat <daogiatuank54@gmail.com>
|
||||
bandoti <141645996+bandoti@users.noreply.github.com>
|
||||
be-next <jerome.ramette@gmail.com>
|
||||
bert hubert <bert@hubertnet.nl>
|
||||
billyct <billy_allen@126.com>
|
||||
bmwl <brian.marshall@tolko.com>
|
||||
bobqianic <129547291+bobqianic@users.noreply.github.com>
|
||||
bocytko <bocytko+github@gmail.com>
|
||||
boolemancer <48014766+boolemancer@users.noreply.github.com>
|
||||
boolemancer <boolemancer@gmail.com>
|
||||
bradmit <151883577+bradmit@users.noreply.github.com>
|
||||
brunofaustino <b.fa.amorim@gmail.com>
|
||||
bssrdf <merlintiger@hotmail.com>
|
||||
byte-6174 <88070277+byte-6174@users.noreply.github.com>
|
||||
cdosoftei <ciprian.dosoftei@gmail.com>
|
||||
clach04 <Chris.Clark@actian.com>
|
||||
compilade <113953597+compilade@users.noreply.github.com>
|
||||
compilade <git@compilade.net>
|
||||
conradg <conradjgodfrey@gmail.com>
|
||||
crummyh <elijah@crums.us>
|
||||
ddpasa <112642920+ddpasa@users.noreply.github.com>
|
||||
denersc <denerstassun@gmail.com>
|
||||
dscripka <dscripka@users.noreply.github.com>
|
||||
duthils <duthils@duthils.net>
|
||||
ecneladis <ecneladis@users.noreply.github.com>
|
||||
faker <nspyia2002@gmail.com>
|
||||
fitzsim <fitzsim@fitzsim.org>
|
||||
fj-y-saito <85871716+fj-y-saito@users.noreply.github.com>
|
||||
fraxy-v <65565042+fraxy-v@users.noreply.github.com>
|
||||
genevera (she/her) <genevera@users.noreply.github.com>
|
||||
geniusnut <geniusnut@gmail.com>
|
||||
gilbertgong <gilbert.gong@gmail.com>
|
||||
gn64 <yukikaze.jp@gmail.com>
|
||||
goldwaving <77494627+goldwaving@users.noreply.github.com>
|
||||
greeshmay <greeshmay@gmail.com>
|
||||
haopeng <657407891@qq.com>
|
||||
hipudding <huafengchun@gmail.com>
|
||||
hsinhoyeh <yhh92u@gmail.com>
|
||||
hydai <z54981220@gmail.com>
|
||||
iamthad <thadeus.j.fleming@gmail.com>
|
||||
issixx <46835150+issixx@users.noreply.github.com>
|
||||
james wolf <contractorwolf@hotmail.com>
|
||||
jdomke <28772296+jdomke@users.noreply.github.com>
|
||||
jettoblack <jettoblack@gmail.com>
|
||||
jiez <373447296@qq.com>
|
||||
joecryptotoo <80373433+joecryptotoo@users.noreply.github.com>
|
||||
jorismertz <35079666+jorismertz@users.noreply.github.com>
|
||||
junchao-loongson <68935141+junchao-loongson@users.noreply.github.com>
|
||||
junkfood <69683722+JunkFood02@users.noreply.github.com>
|
||||
jwijffels <jwijffels@bnosac.be>
|
||||
k.h.lai <adrian.k.h.lai@outlook.com>
|
||||
kamranjon <kamranjon@gmail.com>
|
||||
katsu560 <katsu560oo-@docomo.ne.jp>
|
||||
kennethge <57784063+kenneth-ge@users.noreply.github.com>
|
||||
keyehzy <msamuel@aluno.puc-rio.br>
|
||||
kunnis <kunnis@users.noreply.github.com>
|
||||
l3utterfly <gc.pthzfoldr@gmail.com>
|
||||
leejet <leejet714@gmail.com>
|
||||
leo-pony <nengjunma@outlook.com>
|
||||
lhez <quic_lih@quicinc.com>
|
||||
litong <31761981+litongjava@users.noreply.github.com>
|
||||
liuwei-git <14815172+liuwei-git@users.noreply.github.com>
|
||||
lnyan <lkwq007@gmail.com>
|
||||
luoyu-intel <yu.luo@intel.com>
|
||||
m.bell <m.bell@techsmith.com>
|
||||
mahorozte <41834471+mahorozte@users.noreply.github.com>
|
||||
mashizora <30516315+mashizora@users.noreply.github.com>
|
||||
matt23654 <matthew.webber@protonmail.com>
|
||||
matteo <matteogeniaccio@yahoo.it>
|
||||
mgrachten <maarten@grachten.eu>
|
||||
mkiol <mkiol@users.noreply.github.com>
|
||||
mky_coder <47767389+mkycoder@users.noreply.github.com>
|
||||
novag <7754358+novag@users.noreply.github.com>
|
||||
pajowu <pajowu@pajowu.de>
|
||||
pengxin99 <pengxin.yuan@intel.com>
|
||||
petterreinholdtsen <pere-github@hungry.com>
|
||||
polarmoon <90010972+polarmoon@users.noreply.github.com>
|
||||
rlapray <lapray.romain@gmail.com>
|
||||
sandrohanea <40202887+sandrohanea@users.noreply.github.com>
|
||||
semiformal-net <84111142+semiformal-net@users.noreply.github.com>
|
||||
shibukazu <61775791+shibukazu@users.noreply.github.com>
|
||||
shikokuchuo <53399081+shikokuchuo@users.noreply.github.com>
|
||||
slaren <slarengh@gmail.com>
|
||||
slashlib <slashlib@users.noreply.github.com>
|
||||
snadampal <87143774+snadampal@users.noreply.github.com>
|
||||
someone13574 <81528246+someone13574@users.noreply.github.com>
|
||||
st-gr <38470677+st-gr@users.noreply.github.com>
|
||||
stduhpf <stephduh@live.fr>
|
||||
stormofice <58337328+stormofice@users.noreply.github.com>
|
||||
texmex76 <40733439+texmex76@users.noreply.github.com>
|
||||
thefinaldegree <thefinaldegree@gmail.com>
|
||||
thewh1teagle <61390950+thewh1teagle@users.noreply.github.com>
|
||||
toboil-features <160222185+toboil-features@users.noreply.github.com>
|
||||
trixirt <trix@redhat.com>
|
||||
ulatekh <ulatekh@yahoo.com>
|
||||
undef <undefdev@gmail.com>
|
||||
uvos <devnull@uvos.xyz>
|
||||
uvos <philipp@uvos.xyz>
|
||||
valVk <valVk@users.noreply.github.com>
|
||||
venkr <venkateshrameshkumar+1@gmail.com>
|
||||
vicalloy <zbirder@gmail.com>
|
||||
wangshuai09 <391746016@qq.com>
|
||||
woachk <24752637+woachk@users.noreply.github.com>
|
||||
xctan <axunlei@gmail.com>
|
||||
xdrudis <xavierdrudis@yahoo.es>
|
||||
yuri@FreeBSD <yuri@FreeBSD>
|
||||
zhangjixiong <code.zjx@gmail.com>
|
||||
zhentaoyu <zhentao.yu@intel.com>
|
||||
zhouwg <6889919+zhouwg@users.noreply.github.com>
|
||||
zhouwg <zhouwg2000@gmail.com>
|
||||
谢乃闻 <sienaiwun@users.noreply.github.com>
|
||||
布客飞龙 <562826179@qq.com>
|
||||
Артём Земляк <azemlyak@smart-consulting.ru>
|
|
@ -0,0 +1,185 @@
|
|||
cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories.
|
||||
project("whisper.cpp" C CXX)
|
||||
project("whisper.cpp" VERSION 1.7.4)
|
||||
include(CheckIncludeFileCXX)
|
||||
|
||||
set(SOVERSION 1)
|
||||
|
||||
#set(CMAKE_WARN_DEPRECATED YES)
|
||||
set(CMAKE_WARN_UNUSED_CLI YES)
|
||||
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
||||
endif()
|
||||
|
||||
# Add path to modules
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
||||
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||
|
||||
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
||||
set(WHISPER_STANDALONE ON)
|
||||
|
||||
include(git-vars)
|
||||
|
||||
# configure project version
|
||||
configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
|
||||
else()
|
||||
set(WHISPER_STANDALONE OFF)
|
||||
endif()
|
||||
|
||||
if (EMSCRIPTEN)
|
||||
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||
|
||||
option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
|
||||
|
||||
# TODO: without these, we get the following error:
|
||||
# wasm-ld: error: --shared-memory is disallowed by whisper.cpp.o because it was not compiled with 'atomics' or 'bulk-memory' features.
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -s TOTAL_STACK=5242880")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880")
|
||||
else()
|
||||
if (MINGW)
|
||||
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||
else()
|
||||
set(BUILD_SHARED_LIBS_DEFAULT ON)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
|
||||
|
||||
#
|
||||
# option list
|
||||
#
|
||||
|
||||
# general
|
||||
option(WHISPER_CCACHE "whisper: use ccache if available" ON)
|
||||
|
||||
# debug
|
||||
option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
|
||||
option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
|
||||
|
||||
# build
|
||||
option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF)
|
||||
|
||||
# sanitizers
|
||||
option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
|
||||
option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
|
||||
option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
|
||||
|
||||
# extra artifacts
|
||||
option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
|
||||
option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
|
||||
option(WHISPER_BUILD_SERVER "whisper: build server example" ${WHISPER_STANDALONE})
|
||||
|
||||
# 3rd party libs
|
||||
option(WHISPER_CURL "whisper: use libcurl to download model from an URL" OFF)
|
||||
option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)
|
||||
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
option(WHISPER_FFMPEG "whisper: support building and linking with ffmpeg libs (avcodec, swresample, ...)" OFF)
|
||||
endif()
|
||||
|
||||
option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
|
||||
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
|
||||
option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF)
|
||||
|
||||
# Required for relocatable CMake package
|
||||
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
||||
|
||||
# override ggml options
|
||||
set(GGML_CCACHE ${WHISPER_CCACHE})
|
||||
set(GGML_SANITIZE_THREAD ${WHISPER_SANITIZE_THREAD})
|
||||
set(GGML_SANITIZE_ADDRESS ${WHISPER_SANITIZE_ADDRESS})
|
||||
set(GGML_SANITIZE_UNDEFINED ${WHISPER_SANITIZE_UNDEFINED})
|
||||
set(GGML_ALL_WARNINGS ${WHISPER_ALL_WARNINGS})
|
||||
set(GGML_FATAL_WARNINGS ${WHISPER_FATAL_WARNINGS})
|
||||
|
||||
# transition helpers
|
||||
function (whisper_option_depr TYPE OLD NEW)
|
||||
if (${OLD})
|
||||
message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
|
||||
set(${NEW} ON)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
whisper_option_depr(FATAL_ERROR WHISPER_CUBLAS GGML_CUDA)
|
||||
whisper_option_depr(WARNING WHISPER_CUDA GGML_CUDA)
|
||||
whisper_option_depr(WARNING WHISPER_KOMPUTE GGML_KOMPUTE)
|
||||
whisper_option_depr(WARNING WHISPER_METAL GGML_METAL)
|
||||
whisper_option_depr(WARNING WHISPER_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
|
||||
whisper_option_depr(WARNING WHISPER_NATIVE GGML_NATIVE)
|
||||
whisper_option_depr(WARNING WHISPER_OPENMP GGML_OPENMP)
|
||||
whisper_option_depr(WARNING WHISPER_RPC GGML_RPC)
|
||||
whisper_option_depr(WARNING WHISPER_SYCL GGML_SYCL)
|
||||
whisper_option_depr(WARNING WHISPER_SYCL_F16 GGML_SYCL_F16)
|
||||
|
||||
#
|
||||
# build the library
|
||||
#
|
||||
|
||||
if (NOT TARGET ggml)
|
||||
add_subdirectory(ggml)
|
||||
# ... otherwise assume ggml is added by a parent CMakeLists.txt
|
||||
endif()
|
||||
add_subdirectory(src)
|
||||
|
||||
#
|
||||
# install
|
||||
#
|
||||
|
||||
include(GNUInstallDirs)
|
||||
include(CMakePackageConfigHelpers)
|
||||
|
||||
set(WHISPER_BUILD_NUMBER ${BUILD_NUMBER})
|
||||
set(WHISPER_BUILD_COMMIT ${BUILD_COMMIT})
|
||||
set(WHISPER_INSTALL_VERSION ${CMAKE_PROJECT_VERSION})
|
||||
|
||||
set(WHISPER_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
|
||||
set(WHISPER_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
|
||||
set(WHISPER_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
|
||||
|
||||
get_directory_property(WHISPER_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
|
||||
|
||||
set_target_properties(whisper PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/whisper.h)
|
||||
install(TARGETS whisper LIBRARY PUBLIC_HEADER)
|
||||
|
||||
configure_package_config_file(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/whisper-config.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
|
||||
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper
|
||||
PATH_VARS
|
||||
WHISPER_INCLUDE_INSTALL_DIR
|
||||
WHISPER_LIB_INSTALL_DIR
|
||||
WHISPER_BIN_INSTALL_DIR )
|
||||
|
||||
write_basic_package_version_file(
|
||||
${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
|
||||
VERSION ${WHISPER_INSTALL_VERSION}
|
||||
COMPATIBILITY SameMajorVersion)
|
||||
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
|
||||
${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper)
|
||||
|
||||
configure_file(cmake/whisper.pc.in
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
|
||||
@ONLY)
|
||||
|
||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
|
||||
DESTINATION lib/pkgconfig)
|
||||
|
||||
#
|
||||
# programs, examples and tests
|
||||
#
|
||||
|
||||
if (WHISPER_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
||||
#include(CTest)
|
||||
#add_subdirectory(tests)
|
||||
endif ()
|
||||
|
||||
if (WHISPER_BUILD_EXAMPLES)
|
||||
add_subdirectory(examples)
|
||||
endif()
|
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2023-2024 The ggml authors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -0,0 +1,19 @@
|
|||
// swift-tools-version:5.5
|
||||
|
||||
import PackageDescription
|
||||
|
||||
let package = Package(
|
||||
name: "whisper",
|
||||
platforms: [
|
||||
.macOS(.v12),
|
||||
.iOS(.v14),
|
||||
.watchOS(.v4),
|
||||
.tvOS(.v14)
|
||||
],
|
||||
products: [
|
||||
.library(name: "whisper", targets: ["whisper"]),
|
||||
],
|
||||
targets: [
|
||||
.systemLibrary(name: "whisper", pkgConfig: "whisper"),
|
||||
]
|
||||
)
|
|
@ -0,0 +1,679 @@
|
|||
# whisper.cpp
|
||||
|
||||

|
||||
|
||||
[](https://github.com/ggerganov/whisper.cpp/actions)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://conan.io/center/whisper-cpp)
|
||||
[](https://www.npmjs.com/package/whisper.cpp/)
|
||||
|
||||
> [!NOTE]
|
||||
> New maintenance roadmap: https://github.com/ggerganov/whisper.cpp/discussions/2788
|
||||
|
||||
Stable: [v1.7.4](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.7.4) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
||||
|
||||
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
|
||||
|
||||
- Plain C/C++ implementation without dependencies
|
||||
- Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](#core-ml-support)
|
||||
- AVX intrinsics support for x86 architectures
|
||||
- [VSX intrinsics support for POWER architectures](#power-vsx-intrinsics)
|
||||
- Mixed F16 / F32 precision
|
||||
- [Integer quantization support](#quantization)
|
||||
- Zero memory allocations at runtime
|
||||
- [Vulkan support](#vulkan-gpu-support)
|
||||
- Support for CPU-only inference
|
||||
- [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
|
||||
- [OpenVINO Support](#openvino-support)
|
||||
- [Ascend NPU Support](#ascend-npu-support)
|
||||
- [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/include/whisper.h)
|
||||
|
||||
Supported platforms:
|
||||
|
||||
- [x] Mac OS (Intel and Arm)
|
||||
- [x] [iOS](examples/whisper.objc)
|
||||
- [x] [Android](examples/whisper.android)
|
||||
- [x] [Java](bindings/java/README.md)
|
||||
- [x] Linux / [FreeBSD](https://github.com/ggerganov/whisper.cpp/issues/56#issuecomment-1350920264)
|
||||
- [x] [WebAssembly](examples/whisper.wasm)
|
||||
- [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
|
||||
- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166)
|
||||
- [x] [Docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp)
|
||||
|
||||
The entire high-level implementation of the model is contained in [whisper.h](include/whisper.h) and [whisper.cpp](src/whisper.cpp).
|
||||
The rest of the code is part of the [`ggml`](https://github.com/ggerganov/ggml) machine learning library.
|
||||
|
||||
Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
|
||||
As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc)
|
||||
|
||||
https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
|
||||
|
||||
You can also easily make your own offline voice assistant application: [command](examples/command)
|
||||
|
||||
https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
|
||||
|
||||
On Apple Silicon, the inference runs fully on the GPU via Metal:
|
||||
|
||||
https://github.com/ggerganov/whisper.cpp/assets/1991296/c82e8f86-60dc-49f2-b048-d2fdbd6b5225
|
||||
|
||||
## Quick start
|
||||
|
||||
First clone the repository:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/ggerganov/whisper.cpp.git
|
||||
```
|
||||
|
||||
Navigate into the directory:
|
||||
|
||||
```
|
||||
cd whisper.cpp
|
||||
```
|
||||
|
||||
Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example:
|
||||
|
||||
```bash
|
||||
sh ./models/download-ggml-model.sh base.en
|
||||
```
|
||||
|
||||
Now build the [whisper-cli](examples/cli) example and transcribe an audio file like this:
|
||||
|
||||
```bash
|
||||
# build the project
|
||||
cmake -B build
|
||||
cmake --build build --config Release
|
||||
|
||||
# transcribe an audio file
|
||||
./build/bin/whisper-cli -f samples/jfk.wav
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
For a quick demo, simply run `make base.en`.
|
||||
|
||||
The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`.
|
||||
|
||||
For detailed usage instructions, run: `./build/bin/whisper-cli -h`
|
||||
|
||||
Note that the [whisper-cli](examples/cli) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
|
||||
For example, you can use `ffmpeg` like this:
|
||||
|
||||
```bash
|
||||
ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
|
||||
```
|
||||
|
||||
## More audio samples
|
||||
|
||||
If you want some extra audio samples to play with, simply run:
|
||||
|
||||
```
|
||||
make -j samples
|
||||
```
|
||||
|
||||
This will download a few more audio files from Wikipedia and convert them to 16-bit WAV format via `ffmpeg`.
|
||||
|
||||
You can download and run the other models as follows:
|
||||
|
||||
```
|
||||
make -j tiny.en
|
||||
make -j tiny
|
||||
make -j base.en
|
||||
make -j base
|
||||
make -j small.en
|
||||
make -j small
|
||||
make -j medium.en
|
||||
make -j medium
|
||||
make -j large-v1
|
||||
make -j large-v2
|
||||
make -j large-v3
|
||||
make -j large-v3-turbo
|
||||
```
|
||||
|
||||
## Memory usage
|
||||
|
||||
| Model | Disk | Mem |
|
||||
| ------ | ------- | ------- |
|
||||
| tiny | 75 MiB | ~273 MB |
|
||||
| base | 142 MiB | ~388 MB |
|
||||
| small | 466 MiB | ~852 MB |
|
||||
| medium | 1.5 GiB | ~2.1 GB |
|
||||
| large | 2.9 GiB | ~3.9 GB |
|
||||
|
||||
## POWER VSX Intrinsics
|
||||
|
||||
`whisper.cpp` supports POWER architectures and includes code which
|
||||
significantly speeds operation on Linux running on POWER9/10, making it
|
||||
capable of faster-than-realtime transcription on underclocked Raptor
|
||||
Talos II. Ensure you have a BLAS package installed, and replace the
|
||||
standard cmake setup with:
|
||||
|
||||
```bash
|
||||
# build with GGML_BLAS defined
|
||||
cmake -B build -DGGML_BLAS=1
|
||||
cmake --build build --config Release
|
||||
./build/bin/whisper-cli [ .. etc .. ]
|
||||
|
||||
## Quantization
|
||||
|
||||
`whisper.cpp` supports integer quantization of the Whisper `ggml` models.
|
||||
Quantized models require less memory and disk space and depending on the hardware can be processed more efficiently.
|
||||
|
||||
Here are the steps for creating and using a quantized model:
|
||||
|
||||
```bash
|
||||
# quantize a model with Q5_0 method
|
||||
cmake -B build
|
||||
cmake --build build --config Release
|
||||
./build/bin/quantize models/ggml-base.en.bin models/ggml-base.en-q5_0.bin q5_0
|
||||
|
||||
# run the examples as usual, specifying the quantized model file
|
||||
./build/bin/whisper-cli -m models/ggml-base.en-q5_0.bin ./samples/gb0.wav
|
||||
```
|
||||
|
||||
## Core ML support
|
||||
|
||||
On Apple Silicon devices, the Encoder inference can be executed on the Apple Neural Engine (ANE) via Core ML. This can result in significant
|
||||
speed-up - more than x3 faster compared with CPU-only execution. Here are the instructions for generating a Core ML model and using it with `whisper.cpp`:
|
||||
|
||||
- Install Python dependencies needed for the creation of the Core ML model:
|
||||
|
||||
```bash
|
||||
pip install ane_transformers
|
||||
pip install openai-whisper
|
||||
pip install coremltools
|
||||
```
|
||||
|
||||
- To ensure `coremltools` operates correctly, please confirm that [Xcode](https://developer.apple.com/xcode/) is installed and execute `xcode-select --install` to install the command-line tools.
|
||||
- Python 3.10 is recommended.
|
||||
- MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination.
|
||||
- [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step:
|
||||
- To create an environment, use: `conda create -n py310-whisper python=3.10 -y`
|
||||
- To activate the environment, use: `conda activate py310-whisper`
|
||||
|
||||
- Generate a Core ML model. For example, to generate a `base.en` model, use:
|
||||
|
||||
```bash
|
||||
./models/generate-coreml-model.sh base.en
|
||||
```
|
||||
|
||||
This will generate the folder `models/ggml-base.en-encoder.mlmodelc`
|
||||
|
||||
- Build `whisper.cpp` with Core ML support:
|
||||
|
||||
```bash
|
||||
# using CMake
|
||||
cmake -B build -DWHISPER_COREML=1
|
||||
cmake --build build -j --config Release
|
||||
```
|
||||
|
||||
- Run the examples as usual. For example:
|
||||
|
||||
```text
|
||||
$ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav
|
||||
|
||||
...
|
||||
|
||||
whisper_init_state: loading Core ML model from 'models/ggml-base.en-encoder.mlmodelc'
|
||||
whisper_init_state: first run on a device may take a while ...
|
||||
whisper_init_state: Core ML model loaded
|
||||
|
||||
system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | COREML = 1 |
|
||||
|
||||
...
|
||||
```
|
||||
|
||||
The first run on a device is slow, since the ANE service compiles the Core ML model to some device-specific format.
|
||||
Next runs are faster.
|
||||
|
||||
For more information about the Core ML implementation please refer to PR [#566](https://github.com/ggerganov/whisper.cpp/pull/566).
|
||||
|
||||
## OpenVINO support
|
||||
|
||||
On platforms that support [OpenVINO](https://github.com/openvinotoolkit/openvino), the Encoder inference can be executed
|
||||
on OpenVINO-supported devices including x86 CPUs and Intel GPUs (integrated & discrete).
|
||||
|
||||
This can result in significant speedup in encoder performance. Here are the instructions for generating the OpenVINO model and using it with `whisper.cpp`:
|
||||
|
||||
- First, setup python virtual env. and install python dependencies. Python 3.10 is recommended.
|
||||
|
||||
Windows:
|
||||
|
||||
```powershell
|
||||
cd models
|
||||
python -m venv openvino_conv_env
|
||||
openvino_conv_env\Scripts\activate
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements-openvino.txt
|
||||
```
|
||||
|
||||
Linux and macOS:
|
||||
|
||||
```bash
|
||||
cd models
|
||||
python3 -m venv openvino_conv_env
|
||||
source openvino_conv_env/bin/activate
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements-openvino.txt
|
||||
```
|
||||
|
||||
- Generate an OpenVINO encoder model. For example, to generate a `base.en` model, use:
|
||||
|
||||
```
|
||||
python convert-whisper-to-openvino.py --model base.en
|
||||
```
|
||||
|
||||
This will produce ggml-base.en-encoder-openvino.xml/.bin IR model files. It's recommended to relocate these to the same folder as `ggml` models, as that
|
||||
is the default location that the OpenVINO extension will search at runtime.
|
||||
|
||||
- Build `whisper.cpp` with OpenVINO support:
|
||||
|
||||
Download OpenVINO package from [release page](https://github.com/openvinotoolkit/openvino/releases). The recommended version to use is [2023.0.0](https://github.com/openvinotoolkit/openvino/releases/tag/2023.0.0).
|
||||
|
||||
After downloading & extracting package onto your development system, set up required environment by sourcing setupvars script. For example:
|
||||
|
||||
Linux:
|
||||
|
||||
```bash
|
||||
source /path/to/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/setupvars.sh
|
||||
```
|
||||
|
||||
Windows (cmd):
|
||||
|
||||
```powershell
|
||||
C:\Path\To\w_openvino_toolkit_windows_2023.0.0.10926.b4452d56304_x86_64\setupvars.bat
|
||||
```
|
||||
|
||||
And then build the project using cmake:
|
||||
|
||||
```bash
|
||||
cmake -B build -DWHISPER_OPENVINO=1
|
||||
cmake --build build -j --config Release
|
||||
```
|
||||
|
||||
- Run the examples as usual. For example:
|
||||
|
||||
```text
|
||||
$ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav
|
||||
|
||||
...
|
||||
|
||||
whisper_ctx_init_openvino_encoder: loading OpenVINO model from 'models/ggml-base.en-encoder-openvino.xml'
|
||||
whisper_ctx_init_openvino_encoder: first run on a device may take a while ...
|
||||
whisper_openvino_init: path_model = models/ggml-base.en-encoder-openvino.xml, device = GPU, cache_dir = models/ggml-base.en-encoder-openvino-cache
|
||||
whisper_ctx_init_openvino_encoder: OpenVINO model loaded
|
||||
|
||||
system_info: n_threads = 4 / 8 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | OPENVINO = 1 |
|
||||
|
||||
...
|
||||
```
|
||||
|
||||
The first time run on an OpenVINO device is slow, since the OpenVINO framework will compile the IR (Intermediate Representation) model to a device-specific 'blob'. This device-specific blob will get
|
||||
cached for the next run.
|
||||
|
||||
For more information about the OpenVINO implementation please refer to PR [#1037](https://github.com/ggerganov/whisper.cpp/pull/1037).
|
||||
|
||||
## NVIDIA GPU support
|
||||
|
||||
With NVIDIA cards the processing of the models is done efficiently on the GPU via cuBLAS and custom CUDA kernels.
|
||||
First, make sure you have installed `cuda`: https://developer.nvidia.com/cuda-downloads
|
||||
|
||||
Now build `whisper.cpp` with CUDA support:
|
||||
|
||||
```
|
||||
cmake -B build -DGGML_CUDA=1
|
||||
cmake --build build -j --config Release
|
||||
```
|
||||
|
||||
## Vulkan GPU support
|
||||
Cross-vendor solution which allows you to accelerate workload on your GPU.
|
||||
First, make sure your graphics card driver provides support for Vulkan API.
|
||||
|
||||
Now build `whisper.cpp` with Vulkan support:
|
||||
```
|
||||
cmake -B build -DGGML_VULKAN=1
|
||||
cmake --build build -j --config Release
|
||||
```
|
||||
|
||||
## BLAS CPU support via OpenBLAS
|
||||
|
||||
Encoder processing can be accelerated on the CPU via OpenBLAS.
|
||||
First, make sure you have installed `openblas`: https://www.openblas.net/
|
||||
|
||||
Now build `whisper.cpp` with OpenBLAS support:
|
||||
|
||||
```
|
||||
cmake -B build -DGGML_BLAS=1
|
||||
cmake --build build -j --config Release
|
||||
```
|
||||
|
||||
## Ascend NPU support
|
||||
|
||||
Ascend NPU provides inference acceleration via [`CANN`](https://www.hiascend.com/en/software/cann) and AI cores.
|
||||
|
||||
First, check if your Ascend NPU device is supported:
|
||||
|
||||
**Verified devices**
|
||||
| Ascend NPU | Status |
|
||||
|:-----------------------------:|:-------:|
|
||||
| Atlas 300T A2 | Support |
|
||||
|
||||
Then, make sure you have installed [`CANN toolkit`](https://www.hiascend.com/en/software/cann/community) . The lasted version of CANN is recommanded.
|
||||
|
||||
Now build `whisper.cpp` with CANN support:
|
||||
|
||||
```
|
||||
cmake -B build -DGGML_CANN=1
|
||||
cmake --build build -j --config Release
|
||||
```
|
||||
|
||||
Run the inference examples as usual, for example:
|
||||
|
||||
```
|
||||
./build/bin/whisper-cli -f samples/jfk.wav -m models/ggml-base.en.bin -t 8
|
||||
```
|
||||
|
||||
*Notes:*
|
||||
|
||||
- If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
|
||||
- If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
|
||||
|
||||
## Docker
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Docker must be installed and running on your system.
|
||||
- Create a folder to store big models & intermediate files (ex. /whisper/models)
|
||||
|
||||
### Images
|
||||
|
||||
We have two Docker images available for this project:
|
||||
|
||||
1. `ghcr.io/ggerganov/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
|
||||
2. `ghcr.io/ggerganov/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
|
||||
|
||||
### Usage
|
||||
|
||||
```shell
|
||||
# download model and persist it in a local folder
|
||||
docker run -it --rm \
|
||||
-v path/to/models:/models \
|
||||
whisper.cpp:main "./models/download-ggml-model.sh base /models"
|
||||
# transcribe an audio file
|
||||
docker run -it --rm \
|
||||
-v path/to/models:/models \
|
||||
-v path/to/audios:/audios \
|
||||
whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
|
||||
# transcribe an audio file in samples folder
|
||||
docker run -it --rm \
|
||||
-v path/to/models:/models \
|
||||
whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
|
||||
```
|
||||
|
||||
## Installing with Conan
|
||||
|
||||
You can install pre-built binaries for whisper.cpp or build it from source using [Conan](https://conan.io/). Use the following command:
|
||||
|
||||
```
|
||||
conan install --requires="whisper-cpp/[*]" --build=missing
|
||||
```
|
||||
|
||||
For detailed instructions on how to use Conan, please refer to the [Conan documentation](https://docs.conan.io/2/).
|
||||
|
||||
## Limitations
|
||||
|
||||
- Inference only
|
||||
|
||||
## Real-time audio input example
|
||||
|
||||
This is a naive example of performing real-time inference on audio from your microphone.
|
||||
The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continuously.
|
||||
More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
|
||||
|
||||
```bash
|
||||
cmake -B build -DWHISPER_SDL2=ON
|
||||
cmake --build build --config Release
|
||||
./build/bin/whisper-stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
|
||||
```
|
||||
|
||||
https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
|
||||
|
||||
## Confidence color-coding
|
||||
|
||||
Adding the `--print-colors` argument will print the transcribed text using an experimental color coding strategy
|
||||
to highlight words with high or low confidence:
|
||||
|
||||
```bash
|
||||
./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/gb0.wav --print-colors
|
||||
```
|
||||
|
||||
<img width="965" alt="image" src="https://user-images.githubusercontent.com/1991296/197356445-311c8643-9397-4e5e-b46e-0b4b4daa2530.png">
|
||||
|
||||
## Controlling the length of the generated text segments (experimental)
|
||||
|
||||
For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`:
|
||||
|
||||
```text
|
||||
$ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16
|
||||
|
||||
whisper_model_load: loading model from './models/ggml-base.en.bin'
|
||||
...
|
||||
system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
|
||||
|
||||
main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
|
||||
|
||||
[00:00:00.000 --> 00:00:00.850] And so my
|
||||
[00:00:00.850 --> 00:00:01.590] fellow
|
||||
[00:00:01.590 --> 00:00:04.140] Americans, ask
|
||||
[00:00:04.140 --> 00:00:05.660] not what your
|
||||
[00:00:05.660 --> 00:00:06.840] country can do
|
||||
[00:00:06.840 --> 00:00:08.430] for you, ask
|
||||
[00:00:08.430 --> 00:00:09.440] what you can do
|
||||
[00:00:09.440 --> 00:00:10.020] for your
|
||||
[00:00:10.020 --> 00:00:11.000] country.
|
||||
```
|
||||
|
||||
## Word-level timestamp (experimental)
|
||||
|
||||
The `--max-len` argument can be used to obtain word-level timestamps. Simply use `-ml 1`:
|
||||
|
||||
```text
|
||||
$ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1
|
||||
|
||||
whisper_model_load: loading model from './models/ggml-base.en.bin'
|
||||
...
|
||||
system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
|
||||
|
||||
main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
|
||||
|
||||
[00:00:00.000 --> 00:00:00.320]
|
||||
[00:00:00.320 --> 00:00:00.370] And
|
||||
[00:00:00.370 --> 00:00:00.690] so
|
||||
[00:00:00.690 --> 00:00:00.850] my
|
||||
[00:00:00.850 --> 00:00:01.590] fellow
|
||||
[00:00:01.590 --> 00:00:02.850] Americans
|
||||
[00:00:02.850 --> 00:00:03.300] ,
|
||||
[00:00:03.300 --> 00:00:04.140] ask
|
||||
[00:00:04.140 --> 00:00:04.990] not
|
||||
[00:00:04.990 --> 00:00:05.410] what
|
||||
[00:00:05.410 --> 00:00:05.660] your
|
||||
[00:00:05.660 --> 00:00:06.260] country
|
||||
[00:00:06.260 --> 00:00:06.600] can
|
||||
[00:00:06.600 --> 00:00:06.840] do
|
||||
[00:00:06.840 --> 00:00:07.010] for
|
||||
[00:00:07.010 --> 00:00:08.170] you
|
||||
[00:00:08.170 --> 00:00:08.190] ,
|
||||
[00:00:08.190 --> 00:00:08.430] ask
|
||||
[00:00:08.430 --> 00:00:08.910] what
|
||||
[00:00:08.910 --> 00:00:09.040] you
|
||||
[00:00:09.040 --> 00:00:09.320] can
|
||||
[00:00:09.320 --> 00:00:09.440] do
|
||||
[00:00:09.440 --> 00:00:09.760] for
|
||||
[00:00:09.760 --> 00:00:10.020] your
|
||||
[00:00:10.020 --> 00:00:10.510] country
|
||||
[00:00:10.510 --> 00:00:11.000] .
|
||||
```
|
||||
|
||||
## Speaker segmentation via tinydiarize (experimental)
|
||||
|
||||
More information about this approach is available here: https://github.com/ggerganov/whisper.cpp/pull/1058
|
||||
|
||||
Sample usage:
|
||||
|
||||
```py
|
||||
# download a tinydiarize compatible model
|
||||
./models/download-ggml-model.sh small.en-tdrz
|
||||
|
||||
# run as usual, adding the "-tdrz" command-line argument
|
||||
./build/bin/whisper-cli -f ./samples/a13.wav -m ./models/ggml-small.en-tdrz.bin -tdrz
|
||||
...
|
||||
main: processing './samples/a13.wav' (480000 samples, 30.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, tdrz = 1, timestamps = 1 ...
|
||||
...
|
||||
[00:00:00.000 --> 00:00:03.800] Okay Houston, we've had a problem here. [SPEAKER_TURN]
|
||||
[00:00:03.800 --> 00:00:06.200] This is Houston. Say again please. [SPEAKER_TURN]
|
||||
[00:00:06.200 --> 00:00:08.260] Uh Houston we've had a problem.
|
||||
[00:00:08.260 --> 00:00:11.320] We've had a main beam up on a volt. [SPEAKER_TURN]
|
||||
[00:00:11.320 --> 00:00:13.820] Roger main beam interval. [SPEAKER_TURN]
|
||||
[00:00:13.820 --> 00:00:15.100] Uh uh [SPEAKER_TURN]
|
||||
[00:00:15.100 --> 00:00:18.020] So okay stand, by thirteen we're looking at it. [SPEAKER_TURN]
|
||||
[00:00:18.020 --> 00:00:25.740] Okay uh right now uh Houston the uh voltage is uh is looking good um.
|
||||
[00:00:27.620 --> 00:00:29.940] And we had a a pretty large bank or so.
|
||||
```
|
||||
|
||||
## Karaoke-style movie generation (experimental)
|
||||
|
||||
The [whisper-cli](examples/cli) example provides support for output of karaoke-style movies, where the
|
||||
currently pronounced word is highlighted. Use the `-wts` argument and run the generated bash script.
|
||||
This requires to have `ffmpeg` installed.
|
||||
|
||||
Here are a few _"typical"_ examples:
|
||||
|
||||
```bash
|
||||
./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts
|
||||
source ./samples/jfk.wav.wts
|
||||
ffplay ./samples/jfk.wav.mp4
|
||||
```
|
||||
|
||||
https://user-images.githubusercontent.com/1991296/199337465-dbee4b5e-9aeb-48a3-b1c6-323ac4db5b2c.mp4
|
||||
|
||||
---
|
||||
|
||||
```bash
|
||||
./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts
|
||||
source ./samples/mm0.wav.wts
|
||||
ffplay ./samples/mm0.wav.mp4
|
||||
```
|
||||
|
||||
https://user-images.githubusercontent.com/1991296/199337504-cc8fd233-0cb7-4920-95f9-4227de3570aa.mp4
|
||||
|
||||
---
|
||||
|
||||
```bash
|
||||
./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts
|
||||
source ./samples/gb0.wav.wts
|
||||
ffplay ./samples/gb0.wav.mp4
|
||||
```
|
||||
|
||||
https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a0cd-f28a317987ba.mp4
|
||||
|
||||
---
|
||||
|
||||
## Video comparison of different models
|
||||
|
||||
Use the [scripts/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format:
|
||||
|
||||
```bash
|
||||
./scripts/bench-wts.sh samples/jfk.wav
|
||||
ffplay ./samples/jfk.wav.all.mp4
|
||||
```
|
||||
|
||||
https://user-images.githubusercontent.com/1991296/223206245-2d36d903-cf8e-4f09-8c3b-eb9f9c39d6fc.mp4
|
||||
|
||||
---
|
||||
|
||||
## Benchmarks
|
||||
|
||||
In order to have an objective comparison of the performance of the inference across different system configurations,
|
||||
use the [whisper-bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it
|
||||
took to execute it. The results are summarized in the following Github issue:
|
||||
|
||||
[Benchmark results](https://github.com/ggerganov/whisper.cpp/issues/89)
|
||||
|
||||
Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](scripts/bench.py).
|
||||
|
||||
You can run it with the following command, by default it will run against any standard model in the models folder.
|
||||
|
||||
```bash
|
||||
python3 scripts/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2
|
||||
```
|
||||
|
||||
It is written in python with the intention of being easy to modify and extend for your benchmarking use case.
|
||||
|
||||
It outputs a csv file with the results of the benchmarking.
|
||||
|
||||
## `ggml` format
|
||||
|
||||
The original models are converted to a custom binary format. This allows to pack everything needed into a single file:
|
||||
|
||||
- model parameters
|
||||
- mel filters
|
||||
- vocabulary
|
||||
- weights
|
||||
|
||||
You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script
|
||||
or manually from here:
|
||||
|
||||
- https://huggingface.co/ggerganov/whisper.cpp
|
||||
- https://ggml.ggerganov.com
|
||||
|
||||
For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or [models/README.md](models/README.md).
|
||||
|
||||
## [Bindings](https://github.com/ggerganov/whisper.cpp/discussions/categories/bindings)
|
||||
|
||||
- [x] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggerganov/whisper.cpp/discussions/310)
|
||||
- [x] JavaScript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggerganov/whisper.cpp/discussions/309)
|
||||
- React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn)
|
||||
- [x] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggerganov/whisper.cpp/discussions/312)
|
||||
- [x] Java:
|
||||
- [GiviMAD/whisper-jni](https://github.com/GiviMAD/whisper-jni)
|
||||
- [x] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggerganov/whisper.cpp/discussions/507)
|
||||
- [x] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) | [#313](https://github.com/ggerganov/whisper.cpp/discussions/313)
|
||||
- [exPHAT/SwiftWhisper](https://github.com/exPHAT/SwiftWhisper)
|
||||
- [x] .NET: | [#422](https://github.com/ggerganov/whisper.cpp/discussions/422)
|
||||
- [sandrohanea/whisper.net](https://github.com/sandrohanea/whisper.net)
|
||||
- [NickDarvey/whisper](https://github.com/NickDarvey/whisper)
|
||||
- [x] Python: | [#9](https://github.com/ggerganov/whisper.cpp/issues/9)
|
||||
- [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython)
|
||||
- [AIWintermuteAI/whispercpp](https://github.com/AIWintermuteAI/whispercpp) (Updated fork of aarnphm/whispercpp)
|
||||
- [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11)
|
||||
- [abdeladim-s/pywhispercpp](https://github.com/abdeladim-s/pywhispercpp) (Pybind11)
|
||||
- [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper)
|
||||
- [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity)
|
||||
|
||||
## Examples
|
||||
|
||||
There are various examples of using the library for different projects in the [examples](examples) folder.
|
||||
Some of the examples are even ported to run in the browser using WebAssembly. Check them out!
|
||||
|
||||
| Example | Web | Description |
|
||||
| --------------------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| [whisper-cli](examples/cli) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper |
|
||||
| [whisper-bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine |
|
||||
| [whisper-stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
|
||||
| [whisper-command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
|
||||
| [whisper-server](examples/server) | | HTTP transcription server with OAI-like API |
|
||||
| [whisper-talk-llama](examples/talk-llama) | | Talk with a LLaMA bot |
|
||||
| [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
|
||||
| [whisper.swiftui](examples/whisper.swiftui) | | SwiftUI iOS / macOS application using whisper.cpp |
|
||||
| [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp |
|
||||
| [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
|
||||
| [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
|
||||
| [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
|
||||
| [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
|
||||
| [wchess](examples/wchess) | [wchess.wasm](examples/wchess) | Voice-controlled chess |
|
||||
|
||||
## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions)
|
||||
|
||||
If you have any kind of feedback about this project feel free to use the Discussions section and open a new topic.
|
||||
You can use the [Show and tell](https://github.com/ggerganov/whisper.cpp/discussions/categories/show-and-tell) category
|
||||
to share your own projects that use `whisper.cpp`. If you have a question, make sure to check the
|
||||
[Frequently asked questions (#126)](https://github.com/ggerganov/whisper.cpp/discussions/126) discussion.
|
|
@ -0,0 +1,249 @@
|
|||
# whisper.cpp for SYCL
|
||||
|
||||
[Background](#background)
|
||||
|
||||
[OS](#os)
|
||||
|
||||
[Intel GPU](#intel-gpu)
|
||||
|
||||
[Linux](#linux)
|
||||
|
||||
[Environment Variable](#environment-variable)
|
||||
|
||||
[Known Issue](#known-issue)
|
||||
|
||||
[Todo](#todo)
|
||||
|
||||
## Background
|
||||
|
||||
SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators—such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17.
|
||||
|
||||
oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms.
|
||||
|
||||
Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs.
|
||||
|
||||
To avoid re-inventing the wheel, this code refers other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL.
|
||||
|
||||
The whisper.cpp for SYCL is used to support Intel GPUs.
|
||||
|
||||
For Intel CPU, recommend to use whisper.cpp for X86 (Intel MKL build).
|
||||
|
||||
## OS
|
||||
|
||||
|OS|Status|Verified|
|
||||
|-|-|-|
|
||||
|Linux|Support|Ubuntu 22.04|
|
||||
|Windows|Ongoing| |
|
||||
|
||||
|
||||
## Intel GPU
|
||||
|
||||
|Intel GPU| Status | Verified Model|
|
||||
|-|-|-|
|
||||
|Intel Data Center Max Series| Support| Max 1550|
|
||||
|Intel Data Center Flex Series| Support| Flex 170|
|
||||
|Intel Arc Series| Support| Arc 770|
|
||||
|Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake|
|
||||
|Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7|
|
||||
|
||||
|
||||
## Linux
|
||||
|
||||
### Setup Environment
|
||||
|
||||
1. Install Intel GPU driver.
|
||||
|
||||
a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html).
|
||||
|
||||
Note: for iGPU, please install the client GPU driver.
|
||||
|
||||
b. Add user to group: video, render.
|
||||
|
||||
```
|
||||
sudo usermod -aG render username
|
||||
sudo usermod -aG video username
|
||||
```
|
||||
|
||||
Note: re-login to enable it.
|
||||
|
||||
c. Check
|
||||
|
||||
```
|
||||
sudo apt install clinfo
|
||||
sudo clinfo -l
|
||||
```
|
||||
|
||||
Output (example):
|
||||
|
||||
```
|
||||
Platform #0: Intel(R) OpenCL Graphics
|
||||
`-- Device #0: Intel(R) Arc(TM) A770 Graphics
|
||||
|
||||
|
||||
Platform #0: Intel(R) OpenCL HD Graphics
|
||||
`-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49]
|
||||
```
|
||||
|
||||
2. Install Intel® oneAPI Base toolkit.
|
||||
|
||||
|
||||
a. Please follow the procedure in [Get the Intel® oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html).
|
||||
|
||||
Recommend to install to default folder: **/opt/intel/oneapi**.
|
||||
|
||||
Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder.
|
||||
|
||||
b. Check
|
||||
|
||||
```
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
|
||||
sycl-ls
|
||||
```
|
||||
|
||||
There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**.
|
||||
|
||||
Output (example):
|
||||
```
|
||||
[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000]
|
||||
[opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
|
||||
[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50]
|
||||
[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
|
||||
|
||||
```
|
||||
|
||||
2. Build locally:
|
||||
|
||||
```
|
||||
mkdir -p build
|
||||
cd build
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
|
||||
#for FP16
|
||||
#cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON
|
||||
|
||||
#for FP32
|
||||
cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||
|
||||
#build example/main only
|
||||
#cmake --build . --config Release --target main
|
||||
|
||||
#build all binary
|
||||
cmake --build . --config Release -v
|
||||
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```
|
||||
./examples/sycl/build.sh
|
||||
```
|
||||
|
||||
Note:
|
||||
|
||||
- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.
|
||||
|
||||
### Run
|
||||
|
||||
1. Put model file to folder **models**
|
||||
|
||||
2. Enable oneAPI running environment
|
||||
|
||||
```
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
```
|
||||
|
||||
3. List device ID
|
||||
|
||||
Run without parameter:
|
||||
|
||||
```
|
||||
./build/bin/ls-sycl-device
|
||||
|
||||
or
|
||||
|
||||
./build/bin/main
|
||||
```
|
||||
|
||||
Check the ID in startup log, like:
|
||||
|
||||
```
|
||||
found 4 SYCL devices:
|
||||
Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
|
||||
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
||||
Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
|
||||
max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
|
||||
Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
|
||||
max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
|
||||
Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
|
||||
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
||||
|
||||
```
|
||||
|
||||
|Attribute|Note|
|
||||
|-|-|
|
||||
|compute capability 1.3|Level-zero running time, recommended |
|
||||
|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
|
||||
|
||||
4. Set device ID and execute whisper.cpp
|
||||
|
||||
Set device ID = 0 by **GGML_SYCL_DEVICE=0**
|
||||
|
||||
```
|
||||
GGML_SYCL_DEVICE=0 ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav
|
||||
```
|
||||
or run by script:
|
||||
|
||||
```
|
||||
./examples/sycl/run_whisper.sh
|
||||
```
|
||||
|
||||
|
||||
|
||||
5. Check the device ID in output
|
||||
|
||||
Like:
|
||||
```
|
||||
Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
|
||||
```
|
||||
|
||||
|
||||
## Environment Variable
|
||||
|
||||
#### Build
|
||||
|
||||
|Name|Value|Function|
|
||||
|-|-|-|
|
||||
|WHISPER_SYCL|ON (mandatory)|Enable build with SYCL code path. <br>For FP32/FP16, WHISPER_SYCL=ON is mandatory.|
|
||||
|WHISPER_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path.For FP32, do not set it.|
|
||||
|CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path|
|
||||
|CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path|
|
||||
|
||||
#### Running
|
||||
|
||||
|
||||
|Name|Value|Function|
|
||||
|-|-|-|
|
||||
|GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output|
|
||||
|GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG|
|
||||
|
||||
## Known Issue
|
||||
|
||||
- Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
|
||||
|
||||
Miss to enable oneAPI running environment.
|
||||
|
||||
Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`.
|
||||
|
||||
|
||||
- Hang during startup
|
||||
|
||||
llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block.
|
||||
|
||||
Solution: add **--no-mmap**.
|
||||
|
||||
## Todo
|
||||
|
||||
- Support to build in Windows.
|
||||
|
||||
- Support multiple cards.
|
5
packages/app-mobile/android/vendor/whisper.cpp/Sources/whisper/module.modulemap
vendored
Normal file
5
packages/app-mobile/android/vendor/whisper.cpp/Sources/whisper/module.modulemap
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
module whisper [system] {
|
||||
header "whisper.h"
|
||||
link "whisper"
|
||||
export *
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
#pragma once
|
||||
|
||||
#include <whisper.h>
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
name: Close inactive issues
|
||||
on:
|
||||
schedule:
|
||||
- cron: "42 0 * * *"
|
||||
|
||||
# Fine-grant permission
|
||||
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
close-issues:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
issues: write
|
||||
pull-requests: write
|
||||
steps:
|
||||
- uses: actions/stale@v5
|
||||
with:
|
||||
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap"
|
||||
days-before-issue-stale: 30
|
||||
days-before-issue-close: 14
|
||||
stale-issue-label: "stale"
|
||||
close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
|
||||
days-before-pr-stale: -1
|
||||
days-before-pr-close: -1
|
||||
operations-per-run: 10000
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
16
packages/app-mobile/android/vendor/whisper.cpp/cmake/DefaultTargetOptions.cmake
vendored
Normal file
16
packages/app-mobile/android/vendor/whisper.cpp/cmake/DefaultTargetOptions.cmake
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
# Set the default compile features and properties for a target.
|
||||
|
||||
if (NOT TARGET)
|
||||
message(FATAL_ERROR "TARGET not set before including DefaultTargetOptions")
|
||||
endif()
|
||||
|
||||
target_compile_features(${TARGET}
|
||||
PRIVATE
|
||||
cxx_std_11
|
||||
)
|
||||
|
||||
set_target_properties(${TARGET}
|
||||
PROPERTIES
|
||||
EXPORT_COMPILE_COMMANDS ON
|
||||
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin"
|
||||
)
|
|
@ -0,0 +1,163 @@
|
|||
# From
|
||||
# https://github.com/snikulov/cmake-modules/blob/master/FindFFmpeg.cmake
|
||||
#
|
||||
# vim: ts=2 sw=2
|
||||
# - Try to find the required ffmpeg components(default: AVFORMAT, AVUTIL, AVCODEC)
|
||||
#
|
||||
# Once done this will define
|
||||
# FFMPEG_FOUND - System has the all required components.
|
||||
# FFMPEG_INCLUDE_DIRS - Include directory necessary for using the required components headers.
|
||||
# FFMPEG_LIBRARIES - Link these to use the required ffmpeg components.
|
||||
# FFMPEG_DEFINITIONS - Compiler switches required for using the required ffmpeg components.
|
||||
#
|
||||
# For each of the components it will additionally set.
|
||||
# - AVCODEC
|
||||
# - AVDEVICE
|
||||
# - AVFORMAT
|
||||
# - AVFILTER
|
||||
# - AVUTIL
|
||||
# - POSTPROC
|
||||
# - SWSCALE
|
||||
# the following variables will be defined
|
||||
# <component>_FOUND - System has <component>
|
||||
# <component>_INCLUDE_DIRS - Include directory necessary for using the <component> headers
|
||||
# <component>_LIBRARIES - Link these to use <component>
|
||||
# <component>_DEFINITIONS - Compiler switches required for using <component>
|
||||
# <component>_VERSION - The components version
|
||||
#
|
||||
# Copyright (c) 2006, Matthias Kretz, <kretz@kde.org>
|
||||
# Copyright (c) 2008, Alexander Neundorf, <neundorf@kde.org>
|
||||
# Copyright (c) 2011, Michael Jansen, <kde@michael-jansen.biz>
|
||||
#
|
||||
# Redistribution and use is allowed according to the terms of the BSD license.
|
||||
# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
# The default components were taken from a survey over other FindFFMPEG.cmake files
|
||||
if (NOT FFmpeg_FIND_COMPONENTS)
|
||||
set(FFmpeg_FIND_COMPONENTS AVFORMAT AVCODEC AVUTIL SWRESAMPLE)
|
||||
endif()
|
||||
|
||||
#
|
||||
### Macro: set_component_found
|
||||
#
|
||||
# Marks the given component as found if both *_LIBRARIES AND *_INCLUDE_DIRS is present.
|
||||
#
|
||||
macro(set_component_found _component )
|
||||
if (${_component}_LIBRARIES AND ${_component}_INCLUDE_DIRS)
|
||||
message(DEBUG " - ${_component} found.")
|
||||
set(${_component}_FOUND TRUE)
|
||||
else ()
|
||||
message(DEBUG " - ${_component} not found.")
|
||||
endif ()
|
||||
endmacro()
|
||||
|
||||
#
|
||||
### Macro: find_component
|
||||
#
|
||||
# Checks for the given component by invoking pkgconfig and then looking up the libraries and
|
||||
# include directories.
|
||||
#
|
||||
macro(find_component _component _pkgconfig _library _header)
|
||||
|
||||
if (NOT WIN32)
|
||||
# use pkg-config to get the directories and then use these values
|
||||
# in the FIND_PATH() and FIND_LIBRARY() calls
|
||||
find_package(PkgConfig)
|
||||
if (PKG_CONFIG_FOUND)
|
||||
pkg_check_modules(PC_${_component} ${_pkgconfig})
|
||||
message(STATUS "Pkgconfig found: ${PC_${_component}_INCLUDEDIR}")
|
||||
message(STATUS "Pkgconfig found: ${PC_${_component}_INCLUDE_DIRS}")
|
||||
message(STATUS "${PC_${_component}_CFLAGS}")
|
||||
endif ()
|
||||
endif (NOT WIN32)
|
||||
|
||||
|
||||
find_path(${_component}_INCLUDE_DIRS ${_header}
|
||||
HINTS
|
||||
${PC_${_component}_INCLUDEDIR}
|
||||
${PC_${_component}_INCLUDE_DIRS}
|
||||
PATH_SUFFIXES
|
||||
ffmpeg
|
||||
)
|
||||
|
||||
# CMake's default is to search first for shared libraries and then for static libraries.
|
||||
# Todo later: add option to prefer static libs over dynamic:
|
||||
find_library(${_component}_LIBRARIES NAMES ${_library} lib${_library}.a
|
||||
HINTS
|
||||
${PC_${_component}_LIBDIR}
|
||||
${PC_${_component}_LIBRARY_DIRS}
|
||||
)
|
||||
|
||||
set(${_component}_DEFINITIONS ${PC_${_component}_CFLAGS_OTHER} CACHE STRING "The ${_component} CFLAGS.")
|
||||
set(${_component}_VERSION ${PC_${_component}_VERSION} CACHE STRING "The ${_component} version number.")
|
||||
|
||||
set_component_found(${_component})
|
||||
|
||||
mark_as_advanced(
|
||||
${_component}_INCLUDE_DIRS
|
||||
${_component}_LIBRARIES
|
||||
${_component}_DEFINITIONS
|
||||
${_component}_VERSION)
|
||||
|
||||
endmacro()
|
||||
|
||||
|
||||
# Check for cached results. If there are skip the costly part.
|
||||
if (NOT FFMPEG_LIBRARIES)
|
||||
|
||||
# Check for all possible component.
|
||||
find_component(AVCODEC libavcodec avcodec libavcodec/avcodec.h)
|
||||
find_component(AVFORMAT libavformat avformat libavformat/avformat.h)
|
||||
find_component(AVDEVICE libavdevice avdevice libavdevice/avdevice.h)
|
||||
#find_component(AVRESAMPLE libavresample avresample libavresample/avresample.h) # old name for swresample
|
||||
find_component(AVUTIL libavutil avutil libavutil/avutil.h)
|
||||
find_component(AVFILTER libavfilter avfilter libavfilter/avfilter.h)
|
||||
find_component(SWSCALE libswscale swscale libswscale/swscale.h)
|
||||
find_component(POSTPROC libpostproc postproc libpostproc/postprocess.h)
|
||||
find_component(SWRESAMPLE libswresample swresample libswresample/swresample.h)
|
||||
|
||||
# Check if the required components were found and add their stuff to the FFMPEG_* vars.
|
||||
foreach (_component ${FFmpeg_FIND_COMPONENTS})
|
||||
if (${_component}_FOUND)
|
||||
# message(STATUS "Required component ${_component} present.")
|
||||
set(FFMPEG_LIBRARIES ${FFMPEG_LIBRARIES} ${${_component}_LIBRARIES})
|
||||
set(FFMPEG_DEFINITIONS ${FFMPEG_DEFINITIONS} ${${_component}_DEFINITIONS})
|
||||
list(APPEND FFMPEG_INCLUDE_DIRS ${${_component}_INCLUDE_DIRS})
|
||||
else ()
|
||||
# message(STATUS "Required component ${_component} missing.")
|
||||
endif ()
|
||||
endforeach ()
|
||||
|
||||
# Build the include path with duplicates removed.
|
||||
if (FFMPEG_INCLUDE_DIRS)
|
||||
list(REMOVE_DUPLICATES FFMPEG_INCLUDE_DIRS)
|
||||
endif ()
|
||||
|
||||
# cache the vars.
|
||||
set(FFMPEG_INCLUDE_DIRS ${FFMPEG_INCLUDE_DIRS} CACHE STRING "The FFmpeg include directories." FORCE)
|
||||
set(FFMPEG_LIBRARIES ${FFMPEG_LIBRARIES} CACHE STRING "The FFmpeg libraries." FORCE)
|
||||
set(FFMPEG_DEFINITIONS ${FFMPEG_DEFINITIONS} CACHE STRING "The FFmpeg cflags." FORCE)
|
||||
|
||||
mark_as_advanced(FFMPEG_INCLUDE_DIRS
|
||||
FFMPEG_LIBRARIES
|
||||
FFMPEG_DEFINITIONS)
|
||||
|
||||
endif ()
|
||||
|
||||
# Now set the noncached _FOUND vars for the components.
|
||||
# whisper.cpp does not need SWSCALE
|
||||
foreach (_component AVCODEC AVDEVICE AVFORMAT AVRESAMPLE AVUTIL POSTPROCESS)
|
||||
set_component_found(${_component})
|
||||
endforeach ()
|
||||
|
||||
# Compile the list of required vars
|
||||
set(_FFmpeg_REQUIRED_VARS FFMPEG_LIBRARIES FFMPEG_INCLUDE_DIRS)
|
||||
foreach (_component ${FFmpeg_FIND_COMPONENTS})
|
||||
list(APPEND _FFmpeg_REQUIRED_VARS ${_component}_LIBRARIES ${_component}_INCLUDE_DIRS)
|
||||
endforeach ()
|
||||
|
||||
# Give a nice error message if some of the required vars are missing.
|
||||
find_package_handle_standard_args(FFmpeg DEFAULT_MSG ${_FFmpeg_REQUIRED_VARS})
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
set(BUILD_NUMBER 0)
|
||||
set(BUILD_COMMIT "unknown")
|
||||
set(BUILD_COMPILER "unknown")
|
||||
set(BUILD_TARGET "unknown")
|
||||
|
||||
# Look for git
|
||||
find_package(Git)
|
||||
if(NOT Git_FOUND)
|
||||
find_program(GIT_EXECUTABLE NAMES git git.exe)
|
||||
if(GIT_EXECUTABLE)
|
||||
set(Git_FOUND TRUE)
|
||||
message(STATUS "Found Git: ${GIT_EXECUTABLE}")
|
||||
else()
|
||||
message(WARNING "Git not found. Build info will not be accurate.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Get the commit count and hash
|
||||
if(Git_FOUND)
|
||||
execute_process(
|
||||
COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE HEAD
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
RESULT_VARIABLE RES
|
||||
)
|
||||
if (RES EQUAL 0)
|
||||
set(BUILD_COMMIT ${HEAD})
|
||||
endif()
|
||||
execute_process(
|
||||
COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE COUNT
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
RESULT_VARIABLE RES
|
||||
)
|
||||
if (RES EQUAL 0)
|
||||
set(BUILD_NUMBER ${COUNT})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
|
||||
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
|
||||
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>")
|
||||
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>")
|
||||
else()
|
||||
execute_process(
|
||||
COMMAND sh -c "$@ --version | head -1" _ ${CMAKE_C_COMPILER}
|
||||
OUTPUT_VARIABLE OUT
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
set(BUILD_COMPILER ${OUT})
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_C_COMPILER} -dumpmachine
|
||||
OUTPUT_VARIABLE OUT
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
set(BUILD_TARGET ${OUT})
|
||||
endif()
|
|
@ -0,0 +1,22 @@
|
|||
find_package(Git)
|
||||
|
||||
# the commit's SHA1
|
||||
execute_process(COMMAND
|
||||
"${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
|
||||
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||
OUTPUT_VARIABLE GIT_SHA1
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
# the date of the commit
|
||||
execute_process(COMMAND
|
||||
"${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
|
||||
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||
OUTPUT_VARIABLE GIT_DATE
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
# the subject of the commit
|
||||
execute_process(COMMAND
|
||||
"${GIT_EXECUTABLE}" log -1 --format=%s
|
||||
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||
OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
65
packages/app-mobile/android/vendor/whisper.cpp/cmake/whisper-config.cmake.in
vendored
Normal file
65
packages/app-mobile/android/vendor/whisper.cpp/cmake/whisper-config.cmake.in
vendored
Normal file
|
@ -0,0 +1,65 @@
|
|||
set(WHISPER_VERSION @WHISPER_INSTALL_VERSION@)
|
||||
set(WHISPER_BUILD_COMMIT @WHISPER_BUILD_COMMIT@)
|
||||
set(WHISPER_BUILD_NUMBER @WHISPER_BUILD_NUMBER@)
|
||||
set(WHISPER_SHARED_LIB @BUILD_SHARED_LIBS@)
|
||||
|
||||
set(GGML_BLAS @GGML_BLAS@)
|
||||
set(GGML_CUDA @GGML_CUDA@)
|
||||
set(GGML_METAL @GGML_METAL@)
|
||||
set(GGML_HIPBLAS @GGML_HIPBLAS@)
|
||||
set(GGML_ACCELERATE @GGML_ACCELERATE@)
|
||||
|
||||
@PACKAGE_INIT@
|
||||
|
||||
set_and_check(WHISPER_INCLUDE_DIR "@PACKAGE_WHISPER_INCLUDE_INSTALL_DIR@")
|
||||
set_and_check(WHISPER_LIB_DIR "@PACKAGE_WHISPER_LIB_INSTALL_DIR@")
|
||||
set_and_check(WHISPER_BIN_DIR "@PACKAGE_WHISPER_BIN_INSTALL_DIR@")
|
||||
|
||||
# Ensure transient dependencies satisfied
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
if (APPLE AND GGML_ACCELERATE)
|
||||
find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
|
||||
endif()
|
||||
|
||||
if (GGML_BLAS)
|
||||
find_package(BLAS REQUIRED)
|
||||
endif()
|
||||
|
||||
if (GGML_CUDA)
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
endif()
|
||||
|
||||
if (GGML_METAL)
|
||||
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
|
||||
find_library(METAL_FRAMEWORK Metal REQUIRED)
|
||||
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
||||
endif()
|
||||
|
||||
if (GGML_HIPBLAS)
|
||||
find_package(hip REQUIRED)
|
||||
find_package(hipblas REQUIRED)
|
||||
find_package(rocblas REQUIRED)
|
||||
endif()
|
||||
|
||||
find_library(whisper_LIBRARY whisper
|
||||
REQUIRED
|
||||
HINTS ${WHISPER_LIB_DIR})
|
||||
|
||||
set(_whisper_link_deps "Threads::Threads" "@WHISPER_EXTRA_LIBS@")
|
||||
set(_whisper_transient_defines "@WHISPER_TRANSIENT_DEFINES@")
|
||||
|
||||
add_library(whisper UNKNOWN IMPORTED)
|
||||
|
||||
set_target_properties(whisper
|
||||
PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${WHISPER_INCLUDE_DIR}"
|
||||
INTERFACE_LINK_LIBRARIES "${_whisper_link_deps}"
|
||||
INTERFACE_COMPILE_DEFINITIONS "${_whisper_transient_defines}"
|
||||
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
|
||||
IMPORTED_LOCATION "${whisper_LIBRARY}"
|
||||
INTERFACE_COMPILE_FEATURES cxx_std_11
|
||||
POSITION_INDEPENDENT_CODE ON )
|
||||
|
||||
check_required_components(whisper)
|
|
@ -0,0 +1,10 @@
|
|||
prefix=@CMAKE_INSTALL_PREFIX@
|
||||
exec_prefix=${prefix}
|
||||
libdir=${exec_prefix}/lib
|
||||
includedir=${prefix}/include
|
||||
|
||||
Name: whisper
|
||||
Description: Port of OpenAI's Whisper model in C/C++
|
||||
Version: @PROJECT_VERSION@
|
||||
Libs: -L${libdir} -lggml -lggml-base -lwhisper
|
||||
Cflags: -I${includedir}
|
|
@ -0,0 +1 @@
|
|||
src/ggml-metal-embed.metal
|
|
@ -0,0 +1,343 @@
|
|||
cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
|
||||
project("ggml" C CXX)
|
||||
include(CheckIncludeFileCXX)
|
||||
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
||||
endif()
|
||||
|
||||
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
||||
set(GGML_STANDALONE ON)
|
||||
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||
|
||||
# configure project version
|
||||
# TODO
|
||||
else()
|
||||
set(GGML_STANDALONE OFF)
|
||||
endif()
|
||||
|
||||
if (EMSCRIPTEN)
|
||||
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||
|
||||
option(GGML_WASM_SINGLE_FILE "ggml: embed WASM inside the generated ggml.js" ON)
|
||||
else()
|
||||
if (MINGW)
|
||||
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||
else()
|
||||
set(BUILD_SHARED_LIBS_DEFAULT ON)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# remove the lib prefix on win32 mingw
|
||||
if (WIN32)
|
||||
set(CMAKE_STATIC_LIBRARY_PREFIX "")
|
||||
set(CMAKE_SHARED_LIBRARY_PREFIX "")
|
||||
set(CMAKE_SHARED_MODULE_PREFIX "")
|
||||
endif()
|
||||
|
||||
option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
|
||||
option(GGML_BACKEND_DL "ggml: build backends as dynamic libraries (requires BUILD_SHARED_LIBS)" OFF)
|
||||
|
||||
#
|
||||
# option list
|
||||
#
|
||||
|
||||
# TODO: mark all options as advanced when not GGML_STANDALONE
|
||||
|
||||
if (APPLE)
|
||||
set(GGML_METAL_DEFAULT ON)
|
||||
set(GGML_BLAS_DEFAULT ON)
|
||||
set(GGML_BLAS_VENDOR_DEFAULT "Apple")
|
||||
else()
|
||||
set(GGML_METAL_DEFAULT OFF)
|
||||
set(GGML_BLAS_DEFAULT OFF)
|
||||
set(GGML_BLAS_VENDOR_DEFAULT "Generic")
|
||||
endif()
|
||||
|
||||
if (CMAKE_CROSSCOMPILING OR DEFINED ENV{SOURCE_DATE_EPOCH})
|
||||
message(STATUS "Setting GGML_NATIVE_DEFAULT to OFF")
|
||||
set(GGML_NATIVE_DEFAULT OFF)
|
||||
else()
|
||||
set(GGML_NATIVE_DEFAULT ON)
|
||||
endif()
|
||||
|
||||
# defaults
|
||||
if (NOT GGML_LLAMAFILE_DEFAULT)
|
||||
set(GGML_LLAMAFILE_DEFAULT OFF)
|
||||
endif()
|
||||
|
||||
if (NOT GGML_CUDA_GRAPHS_DEFAULT)
|
||||
set(GGML_CUDA_GRAPHS_DEFAULT OFF)
|
||||
endif()
|
||||
|
||||
# general
|
||||
option(GGML_STATIC "ggml: static link libraries" OFF)
|
||||
option(GGML_NATIVE "ggml: optimize the build for the current system" ${GGML_NATIVE_DEFAULT})
|
||||
option(GGML_LTO "ggml: enable link time optimization" OFF)
|
||||
option(GGML_CCACHE "ggml: use ccache if available" ON)
|
||||
|
||||
# debug
|
||||
option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
|
||||
option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF)
|
||||
option(GGML_GPROF "ggml: enable gprof" OFF)
|
||||
|
||||
# build
|
||||
option(GGML_FATAL_WARNINGS "ggml: enable -Werror flag" OFF)
|
||||
|
||||
# sanitizers
|
||||
option(GGML_SANITIZE_THREAD "ggml: enable thread sanitizer" OFF)
|
||||
option(GGML_SANITIZE_ADDRESS "ggml: enable address sanitizer" OFF)
|
||||
option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF)
|
||||
|
||||
# instruction set specific
|
||||
if (GGML_NATIVE OR NOT GGML_NATIVE_DEFAULT)
|
||||
set(INS_ENB OFF)
|
||||
else()
|
||||
set(INS_ENB ON)
|
||||
endif()
|
||||
|
||||
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
|
||||
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
|
||||
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
|
||||
option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
|
||||
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
|
||||
option(GGML_AVX512 "ggml: enable AVX512F" OFF)
|
||||
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
|
||||
option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
|
||||
option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF)
|
||||
if (NOT MSVC)
|
||||
# in MSVC F16C and FMA is implied with AVX2/AVX512
|
||||
option(GGML_FMA "ggml: enable FMA" ${INS_ENB})
|
||||
option(GGML_F16C "ggml: enable F16C" ${INS_ENB})
|
||||
# MSVC does not seem to support AMX
|
||||
option(GGML_AMX_TILE "ggml: enable AMX-TILE" OFF)
|
||||
option(GGML_AMX_INT8 "ggml: enable AMX-INT8" OFF)
|
||||
option(GGML_AMX_BF16 "ggml: enable AMX-BF16" OFF)
|
||||
endif()
|
||||
option(GGML_LASX "ggml: enable lasx" ON)
|
||||
option(GGML_LSX "ggml: enable lsx" ON)
|
||||
option(GGML_RVV "ggml: enable rvv" ON)
|
||||
|
||||
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
|
||||
set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
|
||||
|
||||
|
||||
if (WIN32)
|
||||
set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows version")
|
||||
endif()
|
||||
|
||||
# ggml core
|
||||
set(GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline parallelism")
|
||||
option(GGML_CPU "ggml: enable CPU backend" ON)
|
||||
|
||||
# 3rd party libs / backends
|
||||
option(GGML_ACCELERATE "ggml: enable Accelerate framework" ON)
|
||||
option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})
|
||||
set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
|
||||
"ggml: BLAS library vendor")
|
||||
option(GGML_LLAMAFILE "ggml: use LLAMAFILE" ${GGML_LLAMAFILE_DEFAULT})
|
||||
|
||||
option(GGML_CUDA "ggml: use CUDA" OFF)
|
||||
option(GGML_MUSA "ggml: use MUSA" OFF)
|
||||
option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF)
|
||||
option(GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF)
|
||||
option(GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF)
|
||||
set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
|
||||
"ggml: max. batch size for using peer access")
|
||||
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
|
||||
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
|
||||
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
|
||||
option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
|
||||
|
||||
option(GGML_HIP "ggml: use HIP" OFF)
|
||||
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
|
||||
option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
|
||||
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
|
||||
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
||||
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
|
||||
option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
|
||||
option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF)
|
||||
option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
|
||||
option(GGML_VULKAN_PERF "ggml: enable Vulkan perf output" OFF)
|
||||
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
|
||||
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
|
||||
option(GGML_KOMPUTE "ggml: use Kompute" OFF)
|
||||
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
|
||||
option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF)
|
||||
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
|
||||
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
|
||||
option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" ${GGML_METAL})
|
||||
set (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING
|
||||
"ggml: metal minimum macOS version")
|
||||
set (GGML_METAL_STD "" CACHE STRING "ggml: metal standard version (-std flag)")
|
||||
option(GGML_OPENMP "ggml: use OpenMP" ON)
|
||||
option(GGML_RPC "ggml: use RPC" OFF)
|
||||
option(GGML_SYCL "ggml: use SYCL" OFF)
|
||||
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
|
||||
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
|
||||
"ggml: sycl target device")
|
||||
set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
|
||||
"ggml: sycl device architecture")
|
||||
|
||||
option(GGML_OPENCL "ggml: use OpenCL" OFF)
|
||||
option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF)
|
||||
option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
|
||||
option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
|
||||
|
||||
# toolchain for vulkan-shaders-gen
|
||||
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
|
||||
|
||||
# extra artifacts
|
||||
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
|
||||
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
|
||||
|
||||
#
|
||||
# dependencies
|
||||
#
|
||||
|
||||
set(CMAKE_C_STANDARD 11)
|
||||
set(CMAKE_C_STANDARD_REQUIRED true)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED true)
|
||||
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
#
|
||||
# build the library
|
||||
#
|
||||
|
||||
add_subdirectory(src)
|
||||
|
||||
#
|
||||
# tests and examples
|
||||
#
|
||||
|
||||
if (GGML_BUILD_TESTS)
|
||||
enable_testing()
|
||||
add_subdirectory(tests)
|
||||
endif ()
|
||||
|
||||
if (GGML_BUILD_EXAMPLES)
|
||||
add_subdirectory(examples)
|
||||
endif ()
|
||||
|
||||
#
|
||||
# install
|
||||
#
|
||||
|
||||
include(GNUInstallDirs)
|
||||
include(CMakePackageConfigHelpers)
|
||||
|
||||
# all public headers
|
||||
set(GGML_PUBLIC_HEADERS
|
||||
include/ggml.h
|
||||
include/ggml-cpu.h
|
||||
include/ggml-alloc.h
|
||||
include/ggml-backend.h
|
||||
include/ggml-blas.h
|
||||
include/ggml-cann.h
|
||||
include/ggml-cuda.h
|
||||
include/ggml-kompute.h
|
||||
include/ggml-opt.h
|
||||
include/ggml-metal.h
|
||||
include/ggml-rpc.h
|
||||
include/ggml-sycl.h
|
||||
include/ggml-vulkan.h
|
||||
include/gguf.h)
|
||||
|
||||
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
|
||||
#if (GGML_METAL)
|
||||
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
|
||||
#endif()
|
||||
install(TARGETS ggml LIBRARY PUBLIC_HEADER)
|
||||
install(TARGETS ggml-base LIBRARY)
|
||||
|
||||
if (GGML_STANDALONE)
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ggml.pc.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
|
||||
@ONLY)
|
||||
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
|
||||
DESTINATION share/pkgconfig)
|
||||
endif()
|
||||
|
||||
#
|
||||
# Create CMake package
|
||||
#
|
||||
|
||||
# Generate version info based on git commit.
|
||||
|
||||
if(NOT DEFINED GGML_BUILD_NUMBER)
|
||||
find_program(GIT_EXE NAMES git git.exe REQUIRED NO_CMAKE_FIND_ROOT_PATH)
|
||||
execute_process(COMMAND ${GIT_EXE} rev-list --count HEAD
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GGML_BUILD_NUMBER
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
|
||||
if(GGML_BUILD_NUMBER EQUAL 1)
|
||||
message(WARNING "GGML build version fixed at 1 likely due to a shallow clone.")
|
||||
endif()
|
||||
|
||||
execute_process(COMMAND ${GIT_EXE} rev-parse --short HEAD
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GGML_BUILD_COMMIT
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
# Capture variables prefixed with GGML_.
|
||||
|
||||
set(variable_set_statements
|
||||
"
|
||||
####### Expanded from @GGML_VARIABLES_EXPANED@ by configure_package_config_file() #######
|
||||
####### Any changes to this file will be overwritten by the next CMake run #######
|
||||
|
||||
")
|
||||
|
||||
set(GGML_SHARED_LIB ${BUILD_SHARED_LIBS})
|
||||
|
||||
get_cmake_property(all_variables VARIABLES)
|
||||
foreach(variable_name IN LISTS all_variables)
|
||||
if(variable_name MATCHES "^GGML_")
|
||||
string(REPLACE ";" "\\;"
|
||||
variable_value "${${variable_name}}")
|
||||
|
||||
set(variable_set_statements
|
||||
"${variable_set_statements}set(${variable_name} \"${variable_value}\")\n")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(GGML_VARIABLES_EXPANDED ${variable_set_statements})
|
||||
|
||||
# Create the CMake package and set install location.
|
||||
|
||||
set(GGML_INSTALL_VERSION 0.0.${GGML_BUILD_NUMBER})
|
||||
set(GGML_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
|
||||
set(GGML_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
|
||||
set(GGML_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
|
||||
|
||||
configure_package_config_file(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/ggml-config.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake
|
||||
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml
|
||||
PATH_VARS GGML_INCLUDE_INSTALL_DIR
|
||||
GGML_LIB_INSTALL_DIR
|
||||
GGML_BIN_INSTALL_DIR)
|
||||
|
||||
write_basic_package_version_file(
|
||||
${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake
|
||||
VERSION ${GGML_INSTALL_VERSION}
|
||||
COMPATIBILITY SameMajorVersion)
|
||||
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake
|
||||
${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml)
|
|
@ -0,0 +1,54 @@
|
|||
# Add new build types
|
||||
|
||||
# ReleaseGG - Release with enabled asserts
|
||||
|
||||
SET(CMAKE_CXX_FLAGS_RELEASEGG
|
||||
"-O3"
|
||||
CACHE STRING "Flags used by the c++ compiler during release builds with enabled asserts."
|
||||
FORCE )
|
||||
SET(CMAKE_C_FLAGS_RELEASEGG
|
||||
"-O3"
|
||||
CACHE STRING "Flags used by the compiler during release builds with enabled asserts."
|
||||
FORCE )
|
||||
SET(CMAKE_EXE_LINKER_FLAGS_RELEASEGG
|
||||
""
|
||||
CACHE STRING "Flags used for linking binaries during release builds with enabled asserts."
|
||||
FORCE )
|
||||
SET(CMAKE_SHARED_LINKER_FLAGS_RELEASEGG
|
||||
""
|
||||
CACHE STRING "Flags used by the shared libraries linker during release builds with enabled asserts."
|
||||
FORCE )
|
||||
MARK_AS_ADVANCED(
|
||||
CMAKE_CXX_FLAGS_RELEASEGG
|
||||
CMAKE_C_FLAGS_RELEASEGG
|
||||
CMAKE_EXE_LINKER_FLAGS_RELEASEGG
|
||||
CMAKE_SHARED_LINKER_FLAGS_RELEASEGG )
|
||||
|
||||
# RelWithDebInfoGG - RelWithDebInfo with enabled asserts
|
||||
|
||||
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
|
||||
"-O2 -g"
|
||||
CACHE STRING "Flags used by the c++ compiler during release builds with debug symbols and enabled asserts."
|
||||
FORCE )
|
||||
SET(CMAKE_C_FLAGS_RELWITHDEBINFOGG
|
||||
"-O2 -g"
|
||||
CACHE STRING "Flags used by the compiler during release builds with debug symbols and enabled asserts."
|
||||
FORCE )
|
||||
SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
|
||||
""
|
||||
CACHE STRING "Flags used for linking binaries during release builds with debug symbols and enabled asserts."
|
||||
FORCE )
|
||||
SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG
|
||||
""
|
||||
CACHE STRING "Flags used by the shared libraries linker during release builds with debug symbols and enabled asserts."
|
||||
FORCE )
|
||||
MARK_AS_ADVANCED(
|
||||
CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
|
||||
CMAKE_C_FLAGS_RELWITHDEBINFOGG
|
||||
CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
|
||||
CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG )
|
||||
|
||||
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "ReleaseGG" "RelWithDebInfoGG")
|
||||
endif()
|
|
@ -0,0 +1,22 @@
|
|||
find_package(Git)
|
||||
|
||||
# the commit's SHA1
|
||||
execute_process(COMMAND
|
||||
"${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
|
||||
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||
OUTPUT_VARIABLE GIT_SHA1
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
# the date of the commit
|
||||
execute_process(COMMAND
|
||||
"${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
|
||||
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||
OUTPUT_VARIABLE GIT_DATE
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
# the subject of the commit
|
||||
execute_process(COMMAND
|
||||
"${GIT_EXECUTABLE}" log -1 --format=%s
|
||||
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||
OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
147
packages/app-mobile/android/vendor/whisper.cpp/ggml/cmake/ggml-config.cmake.in
vendored
Normal file
147
packages/app-mobile/android/vendor/whisper.cpp/ggml/cmake/ggml-config.cmake.in
vendored
Normal file
|
@ -0,0 +1,147 @@
|
|||
|
||||
@GGML_VARIABLES_EXPANDED@
|
||||
|
||||
@PACKAGE_INIT@
|
||||
|
||||
set_and_check(GGML_INCLUDE_DIR "@PACKAGE_GGML_INCLUDE_INSTALL_DIR@")
|
||||
set_and_check(GGML_LIB_DIR "@PACKAGE_GGML_LIB_INSTALL_DIR@")
|
||||
set_and_check(GGML_BIN_DIR "@PACKAGE_GGML_BIN_INSTALL_DIR@")
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
find_library(GGML_LIBRARY ggml
|
||||
REQUIRED
|
||||
HINTS ${GGML_LIB_DIR}
|
||||
NO_CMAKE_FIND_ROOT_PATH)
|
||||
|
||||
add_library(ggml::ggml UNKNOWN IMPORTED)
|
||||
set_target_properties(ggml::ggml
|
||||
PROPERTIES
|
||||
IMPORTED_LOCATION "${GGML_LIBRARY}")
|
||||
|
||||
find_library(GGML_BASE_LIBRARY ggml-base
|
||||
REQUIRED
|
||||
HINTS ${GGML_LIB_DIR}
|
||||
NO_CMAKE_FIND_ROOT_PATH)
|
||||
|
||||
add_library(ggml::ggml-base UNKNOWN IMPORTED)
|
||||
set_target_properties(ggml::ggml-base
|
||||
PROPERTIES
|
||||
IMPORTED_LOCATION "${GGML_BASE_LIBRARY}")
|
||||
|
||||
if (NOT GGML_SHARED_LIB)
|
||||
if (APPLE AND GGML_ACCELERATE)
|
||||
find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
|
||||
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES ${ACCELERATE_FRAMEWORK})
|
||||
endif()
|
||||
|
||||
if (GGML_OPENMP)
|
||||
find_package(OpenMP REQUIRED)
|
||||
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
|
||||
endif()
|
||||
|
||||
if (GGML_CPU_HBM)
|
||||
find_library(memkind memkind REQUIRED)
|
||||
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES memkind)
|
||||
endif()
|
||||
|
||||
if (GGML_BLAS)
|
||||
find_package(BLAS REQUIRED)
|
||||
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES ${BLAS_LIBRARIES})
|
||||
list(APPEND GGML_CPU_INTERFACE_LINK_OPTIONS ${BLAS_LINKER_FLAGS})
|
||||
endif()
|
||||
|
||||
if (GGML_CUDA)
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
endif()
|
||||
|
||||
if (GGML_METAL)
|
||||
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
|
||||
find_library(METAL_FRAMEWORK Metal REQUIRED)
|
||||
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
||||
|
||||
list(APPEND GGML_METAL_INTERFACE_LINK_LIBRARIES
|
||||
${FOUNDATION_LIBRARY} ${METAL_FRAMEWORK} ${METALKIT_FRAMEWORK})
|
||||
endif()
|
||||
|
||||
if (GGML_VULKAN)
|
||||
find_package(Vulkan REQUIRED)
|
||||
list(APPEND GGML_VULKAN_INTERFACE_LINK_LIBRARIES Vulkan::Vulkan)
|
||||
endif()
|
||||
|
||||
if (GGML_HIP)
|
||||
find_package(hip REQUIRED)
|
||||
find_package(hipblas REQUIRED)
|
||||
find_package(rocblas REQUIRED)
|
||||
list(APPEND GGML_HIP_INTERFACE_LINK_LIBRARIES hip::host roc::rocblas roc::hipblas)
|
||||
endif()
|
||||
|
||||
if (GGML_SYCL)
|
||||
find_package(DNNL)
|
||||
if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
|
||||
list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES DNNL::dnnl)
|
||||
endif()
|
||||
if (WIN32)
|
||||
find_package(IntelSYCL REQUIRED)
|
||||
find_package(MKL REQUIRED)
|
||||
list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(_ggml_all_targets "")
|
||||
foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
|
||||
string(REPLACE "-" "_" _ggml_backend_pfx "${_ggml_backend}")
|
||||
string(TOUPPER "${_ggml_backend_pfx}" _ggml_backend_pfx)
|
||||
|
||||
find_library(${_ggml_backend_pfx}_LIBRARY ${_ggml_backend}
|
||||
REQUIRED
|
||||
HINTS ${GGML_LIB_DIR}
|
||||
NO_CMAKE_FIND_ROOT_PATH)
|
||||
|
||||
message(STATUS "Found ${${_ggml_backend_pfx}_LIBRARY}")
|
||||
|
||||
add_library(ggml::${_ggml_backend} UNKNOWN IMPORTED)
|
||||
set_target_properties(ggml::${_ggml_backend}
|
||||
PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${GGML_INCLUDE_DIR}"
|
||||
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
|
||||
IMPORTED_LOCATION "${${_ggml_backend_pfx}_LIBRARY}"
|
||||
INTERFACE_COMPILE_FEATURES c_std_90
|
||||
POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
string(REGEX MATCH "^ggml-cpu" is_cpu_variant "${_ggml_backend}")
|
||||
if(is_cpu_variant)
|
||||
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES "ggml::ggml" "ggml::ggml-base")
|
||||
set_target_properties(ggml::${_ggml_backend}
|
||||
PROPERTIES
|
||||
INTERFACE_LINK_LIBRARIES "${GGML_CPU_INTERFACE_LINK_LIBRARIES}")
|
||||
|
||||
if(GGML_CPU_INTERFACE_LINK_OPTIONS)
|
||||
set_target_properties(ggml::${_ggml_backend}
|
||||
PROPERTIES
|
||||
INTERFACE_LINK_OPTIONS "${GGML_CPU_INTERFACE_LINK_OPTIONS}")
|
||||
endif()
|
||||
|
||||
else()
|
||||
list(APPEND ${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES "ggml::ggml" "ggml::ggml-base")
|
||||
set_target_properties(ggml::${_ggml_backend}
|
||||
PROPERTIES
|
||||
INTERFACE_LINK_LIBRARIES "${${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES}")
|
||||
|
||||
if(${_ggml_backend_pfx}_INTERFACE_LINK_OPTIONS)
|
||||
set_target_properties(ggml::${_ggml_backend}
|
||||
PROPERTIES
|
||||
INTERFACE_LINK_OPTIONS "${${_ggml_backend_pfx}_INTERFACE_LINK_OPTIONS}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
list(APPEND _ggml_all_targets ggml::${_ggml_backend})
|
||||
endforeach()
|
||||
|
||||
add_library(ggml::all INTERFACE IMPORTED)
|
||||
set_target_properties(ggml::all
|
||||
PROPERTIES
|
||||
INTERFACE_LINK_LIBRARIES "${_ggml_all_targets}")
|
||||
|
||||
check_required_components(ggml)
|
|
@ -0,0 +1,76 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
|
||||
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
|
||||
typedef struct ggml_backend * ggml_backend_t;
|
||||
|
||||
// Tensor allocator
|
||||
struct ggml_tallocr {
|
||||
ggml_backend_buffer_t buffer;
|
||||
void * base;
|
||||
size_t alignment;
|
||||
size_t offset;
|
||||
};
|
||||
|
||||
GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
|
||||
GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
|
||||
|
||||
// Graph allocator
|
||||
/*
|
||||
Example usage:
|
||||
ggml_gallocr_t galloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type());
|
||||
|
||||
// optional: create a worst-case graph and reserve the buffers to avoid reallocations
|
||||
ggml_gallocr_reserve(galloc, build_graph(max_batch));
|
||||
|
||||
// allocate the graph
|
||||
struct ggml_cgraph * graph = build_graph(batch);
|
||||
ggml_gallocr_alloc_graph(galloc, graph);
|
||||
|
||||
printf("compute buffer size: %zu bytes\n", ggml_gallocr_get_buffer_size(galloc, 0));
|
||||
|
||||
// evaluate the graph
|
||||
ggml_backend_graph_compute(backend, graph);
|
||||
*/
|
||||
|
||||
// special tensor flags for use with the graph allocator:
|
||||
// ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses
|
||||
// ggml_set_output(): output tensors are never freed and never overwritten
|
||||
|
||||
typedef struct ggml_gallocr * ggml_gallocr_t;
|
||||
|
||||
GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft);
|
||||
GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs);
|
||||
GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc);
|
||||
|
||||
// pre-allocate buffers from a measure graph - does not allocate or modify the graph
|
||||
// call with a worst-case graph to avoid buffer reallocations
|
||||
// not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
|
||||
// returns false if the buffer allocation failed
|
||||
GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
|
||||
GGML_API bool ggml_gallocr_reserve_n(
|
||||
ggml_gallocr_t galloc,
|
||||
struct ggml_cgraph * graph,
|
||||
const int * node_buffer_ids,
|
||||
const int * leaf_buffer_ids);
|
||||
|
||||
// automatic reallocation if the topology changes when using a single buffer
|
||||
// returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers)
|
||||
GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
|
||||
|
||||
GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id);
|
||||
|
||||
// Utils
|
||||
// Create a buffer and allocate all the tensors in a ggml_context
|
||||
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
|
||||
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,354 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-alloc.h"
|
||||
|
||||
#ifdef GGML_BACKEND_SHARED
|
||||
# if defined(_WIN32) && !defined(__MINGW32__)
|
||||
# ifdef GGML_BACKEND_BUILD
|
||||
# define GGML_BACKEND_API __declspec(dllexport) extern
|
||||
# else
|
||||
# define GGML_BACKEND_API __declspec(dllimport) extern
|
||||
# endif
|
||||
# else
|
||||
# define GGML_BACKEND_API __attribute__ ((visibility ("default"))) extern
|
||||
# endif
|
||||
#else
|
||||
# define GGML_BACKEND_API extern
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
|
||||
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
|
||||
typedef struct ggml_backend_event * ggml_backend_event_t;
|
||||
typedef struct ggml_backend * ggml_backend_t;
|
||||
typedef void * ggml_backend_graph_plan_t;
|
||||
typedef struct ggml_backend_reg * ggml_backend_reg_t;
|
||||
typedef struct ggml_backend_device * ggml_backend_dev_t;
|
||||
|
||||
|
||||
//
|
||||
// Backend buffer type
|
||||
//
|
||||
|
||||
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
|
||||
GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
|
||||
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
|
||||
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
|
||||
GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
|
||||
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
|
||||
GGML_API ggml_backend_dev_t ggml_backend_buft_get_device (ggml_backend_buffer_type_t buft);
|
||||
|
||||
//
|
||||
// Backend buffer
|
||||
//
|
||||
|
||||
enum ggml_backend_buffer_usage {
|
||||
GGML_BACKEND_BUFFER_USAGE_ANY = 0,
|
||||
GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
|
||||
GGML_BACKEND_BUFFER_USAGE_COMPUTE = 2,
|
||||
};
|
||||
|
||||
GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer);
|
||||
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
||||
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
||||
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
||||
GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
||||
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
|
||||
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||
GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
|
||||
GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
|
||||
GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
||||
GGML_API enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage (ggml_backend_buffer_t buffer);
|
||||
GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
|
||||
GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
|
||||
|
||||
// tensor copy between different backends
|
||||
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||
|
||||
//
|
||||
// Backend (stream)
|
||||
//
|
||||
|
||||
GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend);
|
||||
GGML_API const char * ggml_backend_name(ggml_backend_t backend);
|
||||
GGML_API void ggml_backend_free(ggml_backend_t backend);
|
||||
|
||||
GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
|
||||
GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
|
||||
GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
|
||||
GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend);
|
||||
|
||||
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||
|
||||
// "offset" refers to the offset in tensor->data for setting/getting data
|
||||
GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||
GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||
GGML_API void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
|
||||
|
||||
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
|
||||
|
||||
GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
|
||||
GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
|
||||
// NOTE: will be removed, use device version instead
|
||||
GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||
GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
|
||||
GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||
|
||||
// asynchronous copy
|
||||
// the copy is performed after all the currently queued operations in backend_src
|
||||
// backend_dst will wait for the copy to complete before performing other operations
|
||||
// automatic fallback to sync copy if async is not supported
|
||||
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||
|
||||
GGML_API ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend);
|
||||
|
||||
//
|
||||
// Events
|
||||
//
|
||||
|
||||
GGML_API ggml_backend_event_t ggml_backend_event_new(ggml_backend_dev_t device);
|
||||
GGML_API void ggml_backend_event_free(ggml_backend_event_t event);
|
||||
GGML_API void ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend);
|
||||
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
|
||||
GGML_API void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event);
|
||||
|
||||
//
|
||||
// Backend device
|
||||
//
|
||||
|
||||
enum ggml_backend_dev_type {
|
||||
// CPU device using system memory
|
||||
GGML_BACKEND_DEVICE_TYPE_CPU,
|
||||
// GPU device using dedicated memory
|
||||
GGML_BACKEND_DEVICE_TYPE_GPU,
|
||||
// accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
|
||||
GGML_BACKEND_DEVICE_TYPE_ACCEL
|
||||
};
|
||||
|
||||
// functionality supported by the device
|
||||
struct ggml_backend_dev_caps {
|
||||
// asynchronous operations
|
||||
bool async;
|
||||
// pinned host buffer
|
||||
bool host_buffer;
|
||||
// creating buffers from host ptr
|
||||
bool buffer_from_host_ptr;
|
||||
// event synchronization
|
||||
bool events;
|
||||
};
|
||||
|
||||
// all the device properties
|
||||
struct ggml_backend_dev_props {
|
||||
const char * name;
|
||||
const char * description;
|
||||
size_t memory_free;
|
||||
size_t memory_total;
|
||||
enum ggml_backend_dev_type type;
|
||||
struct ggml_backend_dev_caps caps;
|
||||
};
|
||||
|
||||
GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device);
|
||||
GGML_API const char * ggml_backend_dev_description(ggml_backend_dev_t device);
|
||||
GGML_API void ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total);
|
||||
GGML_API enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device);
|
||||
GGML_API void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props);
|
||||
GGML_API ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device);
|
||||
GGML_API ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * params);
|
||||
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t device);
|
||||
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device);
|
||||
GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size);
|
||||
|
||||
GGML_API bool ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
|
||||
GGML_API bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft);
|
||||
GGML_API bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
|
||||
|
||||
//
|
||||
// Backend (reg)
|
||||
//
|
||||
|
||||
GGML_API const char * ggml_backend_reg_name(ggml_backend_reg_t reg);
|
||||
GGML_API size_t ggml_backend_reg_dev_count(ggml_backend_reg_t reg);
|
||||
GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index);
|
||||
GGML_API void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name);
|
||||
|
||||
// Common functions that may be obtained using ggml_backend_reg_get_proc_address
|
||||
|
||||
// Split buffer type for tensor parallelism
|
||||
typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(int main_device, const float * tensor_split);
|
||||
// Set the number of threads for the backend
|
||||
typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads);
|
||||
// Get additional buffer types provided by the device (returns a NULL-terminated array)
|
||||
typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device);
|
||||
// Set the abort callback for the backend
|
||||
typedef void (*ggml_backend_set_abort_callback_t)(ggml_backend_t backend, ggml_abort_callback abort_callback, void * abort_callback_data);
|
||||
// Get a list of feature flags supported by the backend (returns a NULL-terminated array)
|
||||
struct ggml_backend_feature {
|
||||
const char * name;
|
||||
const char * value;
|
||||
};
|
||||
typedef struct ggml_backend_feature * (*ggml_backend_get_features_t)(ggml_backend_reg_t reg);
|
||||
|
||||
//
|
||||
// Backend registry
|
||||
//
|
||||
|
||||
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
|
||||
|
||||
// Backend (reg) enumeration
|
||||
GGML_API size_t ggml_backend_reg_count(void);
|
||||
GGML_API ggml_backend_reg_t ggml_backend_reg_get(size_t index);
|
||||
GGML_API ggml_backend_reg_t ggml_backend_reg_by_name(const char * name);
|
||||
|
||||
// Device enumeration
|
||||
GGML_API size_t ggml_backend_dev_count(void);
|
||||
GGML_API ggml_backend_dev_t ggml_backend_dev_get(size_t index);
|
||||
GGML_API ggml_backend_dev_t ggml_backend_dev_by_name(const char * name);
|
||||
GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type);
|
||||
|
||||
// Direct backend (stream) initialization
|
||||
// = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params)
|
||||
GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params);
|
||||
// = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params)
|
||||
GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params);
|
||||
// = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL)
|
||||
GGML_API ggml_backend_t ggml_backend_init_best(void);
|
||||
|
||||
// Load a backend from a dynamic library and register it
|
||||
GGML_API ggml_backend_reg_t ggml_backend_load(const char * path);
|
||||
// Unload a backend if loaded dynamically and unregister it
|
||||
GGML_API void ggml_backend_unload(ggml_backend_reg_t reg);
|
||||
// Load all known backends from dynamic libraries
|
||||
GGML_API void ggml_backend_load_all(void);
|
||||
GGML_API void ggml_backend_load_all_from_path(const char * dir_path);
|
||||
|
||||
//
|
||||
// Backend scheduler
|
||||
//
|
||||
|
||||
// The backend scheduler allows for multiple backend devices to be used together
|
||||
// Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
|
||||
// The backends are selected based on:
|
||||
// - the backend that supports the operation
|
||||
// - the location of the pre-allocated tensors (e.g. the weights)
|
||||
/*
|
||||
Example usage:
|
||||
|
||||
// operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned
|
||||
// preferrably to run on the same backend as the buffer
|
||||
ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
|
||||
|
||||
sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false);
|
||||
|
||||
// initialize buffers from a max size graph (optional)
|
||||
reserve_graph = build_graph(sched, max_batch_size);
|
||||
|
||||
// manually assign nodes to a backend (optional, should not be needed in most cases)
|
||||
struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
|
||||
ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu);
|
||||
|
||||
ggml_backend_sched_reserve(sched, reserve_graph);
|
||||
|
||||
// compute
|
||||
graph = build_graph(sched); // the graph and its tensors are single-use in terms of allocation, multi-use in terms of computation
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
ggml_backend_sched_graph_compute(sched, graph); // on the first iteration the graph is allocated automatically
|
||||
}
|
||||
|
||||
// if there are graph inputs:
|
||||
graph = build_graph(sched); // get a new graph that is not allocated (the metadata for the old graph is freed once ggml_free is called)
|
||||
ggml_backend_sched_reset(sched); // clear the allocation of the previous graph
|
||||
ggml_backend_sched_alloc_graph(sched, graph); // explicitly allocate the new graph but do not execute it
|
||||
ggml_backend_tensor_set(input_tensor, ...); // copy data to the newly allocated graph tensors
|
||||
ggml_backend_sched_graph_compute(sched, graph); // execute the graph
|
||||
|
||||
// as an alternative to the above it is also possible to assign the inputs to a dedicated context and
|
||||
// allocate them statically via ggml_backend_alloc_ctx_tensors
|
||||
}
|
||||
*/
|
||||
|
||||
typedef struct ggml_backend_sched * ggml_backend_sched_t;
|
||||
|
||||
// Evaluation callback for each node in the graph (set with ggml_backend_sched_set_eval_callback)
|
||||
// when ask == true, the scheduler wants to know if the user wants to observe this node
|
||||
// this allows the scheduler to batch nodes together in order to evaluate them in a single call
|
||||
//
|
||||
// when ask == false, the scheduler is passing the node tensor to the user for observation
|
||||
// if the user returns false, the scheduler will cancel the graph compute
|
||||
//
|
||||
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
|
||||
|
||||
// Initialize a backend scheduler, backends with low index are given priority over backends with high index
|
||||
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
|
||||
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
||||
|
||||
// Initialize backend buffers from a measure graph
|
||||
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); // returns success
|
||||
|
||||
GGML_API int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched);
|
||||
GGML_API ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i);
|
||||
|
||||
// Get the number of splits of the last graph
|
||||
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
|
||||
GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
|
||||
|
||||
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
|
||||
|
||||
GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
|
||||
GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
|
||||
|
||||
// Allocate and compute graph on the backend scheduler
|
||||
GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph); // returns success
|
||||
GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||
GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||
GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
|
||||
|
||||
// Reset all assignments and allocators - must be called before changing the node backends or allocating a new graph.
|
||||
// This in effect deallocates all tensors that were previously allocated and leaves them with dangling pointers.
|
||||
// The correct way to use this API is to discard the deallocated tensors and create new ones.
|
||||
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
|
||||
|
||||
// Set a callback to be called for each resulting node during graph compute
|
||||
GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
|
||||
|
||||
//
|
||||
// Utils
|
||||
//
|
||||
|
||||
struct ggml_backend_graph_copy {
|
||||
ggml_backend_buffer_t buffer;
|
||||
struct ggml_context * ctx_allocated;
|
||||
struct ggml_context * ctx_unallocated;
|
||||
struct ggml_cgraph * graph;
|
||||
};
|
||||
|
||||
// Copy a graph to a different backend
|
||||
GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
|
||||
GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
|
||||
|
||||
typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
||||
|
||||
// Compare the output of two backends
|
||||
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
||||
|
||||
// Tensor initialization
|
||||
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
||||
GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
|
||||
|
||||
// CPU buffer types are always available
|
||||
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
||||
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,25 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// backend API
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void);
|
||||
|
||||
GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend);
|
||||
|
||||
// number of threads used for conversion to float
|
||||
// for openblas and blis, this will also set the number of threads used for blas operations
|
||||
GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,123 @@
|
|||
/*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Maximum number of CANN devices supported.
|
||||
*/
|
||||
#define GGML_CANN_MAX_DEVICES 16
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cann_reg(void);
|
||||
|
||||
/**
|
||||
* @brief Initializes the CANN backend for a specified device.
|
||||
*
|
||||
* This function initializes the CANN backend for the given device.
|
||||
* It verifies the device index, allocates a context, and creates a backend
|
||||
* instance.
|
||||
*
|
||||
* @param device The index of the device to initialize.
|
||||
* @return A pointer to the initialized backend instance, or nullptr on failure.
|
||||
*/
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_cann_init(int32_t device);
|
||||
|
||||
/**
|
||||
* @brief Checks if a given backend is a CANN backend.
|
||||
*
|
||||
* This function verifies if the provided backend is a CANN backend by comparing
|
||||
* its GUID with the CANN backend's GUID.
|
||||
*
|
||||
* @param backend The backend instance to check.
|
||||
* @return True if the backend is a CANN backend, false otherwise.
|
||||
*/
|
||||
GGML_BACKEND_API bool ggml_backend_is_cann(ggml_backend_t backend);
|
||||
|
||||
/**
|
||||
* @brief Retrieves the CANN buffer type for a specified device.
|
||||
*
|
||||
* This function initializes and returns the buffer type interface associated
|
||||
* with the given device. It ensures thread-safe access using a mutex.
|
||||
*
|
||||
* @param device The device index for which to retrieve the buffer type.
|
||||
* @return A pointer to the buffer type interface for the specified device, or
|
||||
* nullptr if the device index is out of range.
|
||||
*/
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t
|
||||
ggml_backend_cann_buffer_type(int32_t device);
|
||||
|
||||
/**
|
||||
* @brief Retrieves the number of CANN devices available.
|
||||
*
|
||||
* This function returns the number of CANN devices available based on
|
||||
* information obtained from `ggml_cann_info()`.
|
||||
*
|
||||
* @return The number of CANN devices available.
|
||||
*/
|
||||
GGML_BACKEND_API int32_t ggml_backend_cann_get_device_count(void);
|
||||
|
||||
/**
|
||||
* @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
|
||||
*
|
||||
* @return A pointer to the host buffer type interface.
|
||||
*/
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
|
||||
|
||||
/**
|
||||
* @brief Retrieves the description of a specific CANN device.
|
||||
*
|
||||
* This function sets the specified device, retrieves the SoC name,
|
||||
* and writes it into the provided description buffer.
|
||||
*
|
||||
* @param device The device index to retrieve the description for.
|
||||
* @param description Pointer to a buffer where the description will be written.
|
||||
* @param description_size Size of the description buffer.
|
||||
*/
|
||||
GGML_BACKEND_API void ggml_backend_cann_get_device_description(
|
||||
int32_t device, char* description, size_t description_size);
|
||||
|
||||
/**
|
||||
* @brief Retrieves the memory information of a specific CANN device.
|
||||
*
|
||||
* This function sets the specified device, retrieves the free and total
|
||||
* memory information of the specified type (ACL_HBM_MEM), and stores them
|
||||
* in the provided pointers.
|
||||
*
|
||||
* @param device The device index to retrieve memory information for.
|
||||
* @param free Pointer to a variable where the free memory size will be stored.
|
||||
* @param total Pointer to a variable where the total memory size will be
|
||||
* stored.
|
||||
*/
|
||||
GGML_BACKEND_API void ggml_backend_cann_get_device_memory(int32_t device,
|
||||
size_t* free,
|
||||
size_t* total);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,39 @@
|
|||
#pragma once
|
||||
|
||||
#ifndef __cplusplus
|
||||
#error "This header is for C++ only"
|
||||
#endif
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-alloc.h"
|
||||
#include "ggml-backend.h"
|
||||
#include "gguf.h"
|
||||
#include <memory>
|
||||
|
||||
// Smart pointers for ggml types
|
||||
|
||||
// ggml
|
||||
|
||||
struct ggml_context_deleter { void operator()(ggml_context * ctx) { ggml_free(ctx); } };
|
||||
struct gguf_context_deleter { void operator()(gguf_context * ctx) { gguf_free(ctx); } };
|
||||
|
||||
typedef std::unique_ptr<ggml_context, ggml_context_deleter> ggml_context_ptr;
|
||||
typedef std::unique_ptr<gguf_context, gguf_context_deleter> gguf_context_ptr;
|
||||
|
||||
// ggml-alloc
|
||||
|
||||
struct ggml_gallocr_deleter { void operator()(ggml_gallocr_t galloc) { ggml_gallocr_free(galloc); } };
|
||||
|
||||
typedef std::unique_ptr<ggml_gallocr_t, ggml_gallocr_deleter> ggml_gallocr_ptr;
|
||||
|
||||
// ggml-backend
|
||||
|
||||
struct ggml_backend_deleter { void operator()(ggml_backend_t backend) { ggml_backend_free(backend); } };
|
||||
struct ggml_backend_buffer_deleter { void operator()(ggml_backend_buffer_t buffer) { ggml_backend_buffer_free(buffer); } };
|
||||
struct ggml_backend_event_deleter { void operator()(ggml_backend_event_t event) { ggml_backend_event_free(event); } };
|
||||
struct ggml_backend_sched_deleter { void operator()(ggml_backend_sched_t sched) { ggml_backend_sched_free(sched); } };
|
||||
|
||||
typedef std::unique_ptr<ggml_backend, ggml_backend_deleter> ggml_backend_ptr;
|
||||
typedef std::unique_ptr<ggml_backend_buffer, ggml_backend_buffer_deleter> ggml_backend_buffer_ptr;
|
||||
typedef std::unique_ptr<ggml_backend_event, ggml_backend_event_deleter> ggml_backend_event_ptr;
|
||||
typedef std::unique_ptr<ggml_backend_sched, ggml_backend_sched_deleter> ggml_backend_sched_ptr;
|
|
@ -0,0 +1,135 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// the compute plan that needs to be prepared for ggml_graph_compute()
|
||||
// since https://github.com/ggerganov/ggml/issues/287
|
||||
struct ggml_cplan {
|
||||
size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
|
||||
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
|
||||
|
||||
int n_threads;
|
||||
struct ggml_threadpool * threadpool;
|
||||
|
||||
// abort ggml_graph_compute when true
|
||||
ggml_abort_callback abort_callback;
|
||||
void * abort_callback_data;
|
||||
};
|
||||
|
||||
// numa strategies
|
||||
enum ggml_numa_strategy {
|
||||
GGML_NUMA_STRATEGY_DISABLED = 0,
|
||||
GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
|
||||
GGML_NUMA_STRATEGY_ISOLATE = 2,
|
||||
GGML_NUMA_STRATEGY_NUMACTL = 3,
|
||||
GGML_NUMA_STRATEGY_MIRROR = 4,
|
||||
GGML_NUMA_STRATEGY_COUNT
|
||||
};
|
||||
|
||||
GGML_BACKEND_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
|
||||
GGML_BACKEND_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
|
||||
|
||||
GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
|
||||
GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
|
||||
|
||||
GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
|
||||
GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
|
||||
|
||||
GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
|
||||
GGML_BACKEND_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
|
||||
|
||||
GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
||||
GGML_BACKEND_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
|
||||
|
||||
GGML_BACKEND_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
|
||||
GGML_BACKEND_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
|
||||
|
||||
GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
||||
GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
|
||||
|
||||
GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
|
||||
GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
|
||||
GGML_BACKEND_API int ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool);
|
||||
GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
|
||||
GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
|
||||
|
||||
// ggml_graph_plan() has to be called before ggml_graph_compute()
|
||||
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
||||
GGML_BACKEND_API struct ggml_cplan ggml_graph_plan(
|
||||
const struct ggml_cgraph * cgraph,
|
||||
int n_threads, /* = GGML_DEFAULT_N_THREADS */
|
||||
struct ggml_threadpool * threadpool /* = NULL */ );
|
||||
GGML_BACKEND_API enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
|
||||
|
||||
// same as ggml_graph_compute() but the work data is allocated as a part of the context
|
||||
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
|
||||
GGML_BACKEND_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
|
||||
|
||||
//
|
||||
// system info
|
||||
//
|
||||
|
||||
// x86
|
||||
GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_avx (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_f16c (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_fma (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_amx_int8 (void);
|
||||
// ARM
|
||||
GGML_BACKEND_API int ggml_cpu_has_neon (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_arm_fma (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_fp16_va (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_dotprod (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_sve (void);
|
||||
GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
|
||||
// other
|
||||
GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_vsx (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
|
||||
GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
|
||||
|
||||
// Internal types and functions exposed for tests and benchmarks
|
||||
|
||||
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
|
||||
const void * GGML_RESTRICT y, size_t by, int nrc);
|
||||
|
||||
struct ggml_type_traits_cpu {
|
||||
ggml_from_float_t from_float;
|
||||
ggml_vec_dot_t vec_dot;
|
||||
enum ggml_type vec_dot_type;
|
||||
int64_t nrows; // number of rows to process simultaneously
|
||||
};
|
||||
|
||||
GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
|
||||
|
||||
GGML_BACKEND_API void ggml_cpu_init(void);
|
||||
|
||||
//
|
||||
// CPU backend
|
||||
//
|
||||
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void);
|
||||
|
||||
GGML_BACKEND_API bool ggml_backend_is_cpu (ggml_backend_t backend);
|
||||
GGML_BACKEND_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
|
||||
GGML_BACKEND_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
|
||||
GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,47 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_HIP
|
||||
#define GGML_CUDA_NAME "ROCm"
|
||||
#define GGML_CUBLAS_NAME "hipBLAS"
|
||||
#elif defined(GGML_USE_MUSA)
|
||||
#define GGML_CUDA_NAME "MUSA"
|
||||
#define GGML_CUBLAS_NAME "muBLAS"
|
||||
#else
|
||||
#define GGML_CUDA_NAME "CUDA"
|
||||
#define GGML_CUBLAS_NAME "cuBLAS"
|
||||
#endif
|
||||
#define GGML_CUDA_MAX_DEVICES 16
|
||||
|
||||
// backend API
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_cuda_init(int device);
|
||||
|
||||
GGML_BACKEND_API bool ggml_backend_is_cuda(ggml_backend_t backend);
|
||||
|
||||
// device buffer
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
|
||||
|
||||
// split tensor buffer that splits matrices by rows across multiple devices
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(int main_device, const float * tensor_split);
|
||||
|
||||
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
|
||||
|
||||
GGML_BACKEND_API int ggml_backend_cuda_get_device_count(void);
|
||||
GGML_BACKEND_API void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
|
||||
GGML_BACKEND_API void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
|
||||
|
||||
GGML_BACKEND_API bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
|
||||
GGML_BACKEND_API void ggml_backend_cuda_unregister_host_buffer(void * buffer);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cuda_reg(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,50 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define GGML_KOMPUTE_MAX_DEVICES 16
|
||||
|
||||
struct ggml_vk_device {
|
||||
int index;
|
||||
int type; // same as VkPhysicalDeviceType
|
||||
size_t heapSize;
|
||||
const char * name;
|
||||
const char * vendor;
|
||||
int subgroupSize;
|
||||
uint64_t bufferAlignment;
|
||||
uint64_t maxAlloc;
|
||||
};
|
||||
|
||||
struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
|
||||
bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
|
||||
bool ggml_vk_has_vulkan(void);
|
||||
bool ggml_vk_has_device(void);
|
||||
struct ggml_vk_device ggml_vk_current_device(void);
|
||||
|
||||
//
|
||||
// backend API
|
||||
//
|
||||
|
||||
// forward declaration
|
||||
typedef struct ggml_backend * ggml_backend_t;
|
||||
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_kompute_init(int device);
|
||||
|
||||
GGML_BACKEND_API bool ggml_backend_is_kompute(ggml_backend_t backend);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_kompute_reg(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,66 @@
|
|||
// Note: this description is outdated
|
||||
//
|
||||
// An interface allowing to compute ggml_cgraph with Metal
|
||||
//
|
||||
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
|
||||
// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, etc.)
|
||||
//
|
||||
// How it works?
|
||||
//
|
||||
// As long as your program can create and evaluate a ggml_cgraph on the CPU, you can use this
|
||||
// interface to evaluate the same graph on the GPU. Instead of using ggml_graph_compute(), you
|
||||
// use ggml_metal_graph_compute() (or ggml_vulkan_graph_compute(), etc.)
|
||||
//
|
||||
// You only need to make sure that all memory buffers that you used during the graph creation
|
||||
// are mapped to the device memory with the ggml_metal_add_buffer() function. This mapping is
|
||||
// used during the graph evaluation to determine the arguments of the compute kernels.
|
||||
//
|
||||
// Synchronization between device and host memory (for example for input and output tensors)
|
||||
// is done with the ggml_metal_set_tensor() and ggml_metal_get_tensor() functions.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
struct ggml_tensor;
|
||||
struct ggml_cgraph;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//
|
||||
// backend API
|
||||
// user-code should use only these functions
|
||||
//
|
||||
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_metal_init(void);
|
||||
|
||||
GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);
|
||||
|
||||
GGML_DEPRECATED(
|
||||
GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
|
||||
"obsoleted by the new device interface - https://github.com/ggerganov/llama.cpp/pull/9713");
|
||||
|
||||
GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
|
||||
|
||||
// helper to check if the device supports a specific family
|
||||
// ideally, the user code should be doing these checks
|
||||
// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
||||
GGML_BACKEND_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
|
||||
|
||||
// capture all command buffers committed the next time `ggml_backend_graph_compute` is called
|
||||
GGML_BACKEND_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_metal_reg(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,26 @@
|
|||
#ifndef GGML_OPENCL_H
|
||||
#define GGML_OPENCL_H
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//
|
||||
// backend API
|
||||
//
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_opencl_init(void);
|
||||
GGML_BACKEND_API bool ggml_backend_is_opencl(ggml_backend_t backend);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void);
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_opencl_reg(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // GGML_OPENCL_H
|
|
@ -0,0 +1,216 @@
|
|||
// This file contains functionality for training models using GGML.
|
||||
// It is not strictly needed vs. just vanilla GGML but it provides a more high-level interface for common needs such as datasets.
|
||||
// At the bottom of this file especially there are relatively high-level functions that are suitable use or adaptation in user code.
|
||||
//
|
||||
// Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct ggml_opt_dataset;
|
||||
struct ggml_opt_context;
|
||||
struct ggml_opt_result;
|
||||
|
||||
typedef struct ggml_opt_dataset * ggml_opt_dataset_t;
|
||||
typedef struct ggml_opt_context * ggml_opt_context_t;
|
||||
typedef struct ggml_opt_result * ggml_opt_result_t;
|
||||
|
||||
// ====== Loss ======
|
||||
|
||||
// built-in loss types, i.e. the built-in quantities minimized by the optimizer
|
||||
// custom loss types can be defined via mean or sum which simply reduce the outputs for all datapoints to a single value
|
||||
enum ggml_opt_loss_type {
|
||||
GGML_OPT_LOSS_TYPE_MEAN,
|
||||
GGML_OPT_LOSS_TYPE_SUM,
|
||||
GGML_OPT_LOSS_TYPE_CROSS_ENTROPY,
|
||||
GGML_OPT_LOSS_TYPE_MEAN_SQUARED_ERROR,
|
||||
};
|
||||
|
||||
// ====== Dataset ======
|
||||
|
||||
GGML_API ggml_opt_dataset_t ggml_opt_dataset_init(
|
||||
int64_t ne_datapoint, // number of elements per datapoint
|
||||
int64_t ne_label, // number of elements per label
|
||||
int64_t ndata, // total number of datapoints/labels
|
||||
int64_t ndata_shard); // number of datapoints/labels per shard (unit at which the dataset is shuffled/copied)
|
||||
GGML_API void ggml_opt_dataset_free(ggml_opt_dataset_t dataset);
|
||||
|
||||
// get underlying tensors that store the data
|
||||
GGML_API struct ggml_tensor * ggml_opt_dataset_data (ggml_opt_dataset_t dataset); // shape = [ne_datapoint, ndata]
|
||||
GGML_API struct ggml_tensor * ggml_opt_dataset_labels(ggml_opt_dataset_t dataset); // shape = [nd_label, ndata]
|
||||
|
||||
// shuffle idata first datapoints from dataset with RNG from opt_ctx, shuffle all datapoints if idata is negative
|
||||
GGML_API void ggml_opt_dataset_shuffle(ggml_opt_context_t opt_ctx, ggml_opt_dataset_t dataset, int64_t idata);
|
||||
|
||||
// get batch at position ibatch from dataset and copy the data to data_batch and labels_batch
|
||||
GGML_API void ggml_opt_dataset_get_batch(
|
||||
ggml_opt_dataset_t dataset,
|
||||
struct ggml_tensor * data_batch, // shape = [ne_datapoint, ndata_batch]
|
||||
struct ggml_tensor * labels_batch, // shape = [ne_label, ndata_batch]
|
||||
int64_t ibatch);
|
||||
|
||||
// ====== Model / Context ======
|
||||
|
||||
enum ggml_opt_build_type {
|
||||
GGML_OPT_BUILD_TYPE_FORWARD,
|
||||
GGML_OPT_BUILD_TYPE_GRAD,
|
||||
GGML_OPT_BUILD_TYPE_OPT,
|
||||
};
|
||||
|
||||
// parameters that control which optimizer is used and how said optimizer tries to find the minimal loss
|
||||
struct ggml_opt_optimizer_params {
|
||||
// AdamW optimizer parameters
|
||||
struct {
|
||||
float alpha; // learning rate
|
||||
float beta1;
|
||||
float beta2;
|
||||
float eps; // epsilon for numerical stability
|
||||
float wd; // weight decay for AdamW, use 0.0f to disable
|
||||
} adamw;
|
||||
};
|
||||
|
||||
// callback to calculate optimizer parameters prior to a backward pass
|
||||
// userdata can be used to pass arbitrary data
|
||||
typedef struct ggml_opt_optimizer_params (*ggml_opt_get_optimizer_params)(void * userdata);
|
||||
|
||||
// returns the default optimizer params (constant)
|
||||
// userdata is not used
|
||||
GGML_API struct ggml_opt_optimizer_params ggml_opt_get_default_optimizer_params(void * userdata);
|
||||
|
||||
// parameters for initializing a new optimization context
|
||||
struct ggml_opt_params {
|
||||
ggml_backend_sched_t backend_sched; // defines which backends are used to construct the compute graphs
|
||||
|
||||
struct ggml_context * ctx_compute; // created in user code, holds non-static tensors
|
||||
|
||||
// the forward graph is defined by inputs and outputs
|
||||
// those tensors and all tensors inbetween are not intended to be reusable between multiple optimization contexts
|
||||
struct ggml_tensor * inputs;
|
||||
struct ggml_tensor * outputs;
|
||||
|
||||
enum ggml_opt_loss_type loss_type;
|
||||
enum ggml_opt_build_type build_type;
|
||||
|
||||
int32_t opt_period; // after how many gradient accumulation steps an optimizer step should be done
|
||||
|
||||
ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
|
||||
void * get_opt_pars_ud; // userdata for calculating optimizer parameters
|
||||
};
|
||||
|
||||
// get parameters for an optimization context with defaults set where possible
|
||||
// parameters for which no sensible defaults exist are supplied as arguments to this function
|
||||
GGML_API ggml_opt_params ggml_opt_default_params(
|
||||
ggml_backend_sched_t backend_sched,
|
||||
struct ggml_context * ctx_compute,
|
||||
struct ggml_tensor * inputs,
|
||||
struct ggml_tensor * outputs,
|
||||
enum ggml_opt_loss_type loss_type);
|
||||
|
||||
GGML_API ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params);
|
||||
GGML_API void ggml_opt_free(ggml_opt_context_t opt_ctx);
|
||||
|
||||
// set gradients to zero, initilize loss, and optionally reset the optimizer
|
||||
GGML_API void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer);
|
||||
|
||||
// get underlying tensors that store data
|
||||
GGML_API struct ggml_tensor * ggml_opt_inputs( ggml_opt_context_t opt_ctx); // forward graph input tensor
|
||||
GGML_API struct ggml_tensor * ggml_opt_outputs( ggml_opt_context_t opt_ctx); // forward graph output tensor
|
||||
GGML_API struct ggml_tensor * ggml_opt_labels( ggml_opt_context_t opt_ctx); // labels to compare outputs against
|
||||
GGML_API struct ggml_tensor * ggml_opt_loss( ggml_opt_context_t opt_ctx); // scalar tensor that contains the loss
|
||||
GGML_API struct ggml_tensor * ggml_opt_pred( ggml_opt_context_t opt_ctx); // predictions made by outputs
|
||||
GGML_API struct ggml_tensor * ggml_opt_ncorrect(ggml_opt_context_t opt_ctx); // number of matching predictions between outputs and labels
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, struct ggml_tensor * node);
|
||||
|
||||
// ====== Optimization Result ======
|
||||
|
||||
GGML_API ggml_opt_result_t ggml_opt_result_init();
|
||||
GGML_API void ggml_opt_result_free(ggml_opt_result_t result);
|
||||
GGML_API void ggml_opt_result_reset(ggml_opt_result_t result);
|
||||
|
||||
// get data from result, uncertainties are optional and can be ignored by passing NULL
|
||||
GGML_API void ggml_opt_result_ndata( ggml_opt_result_t result, int64_t * ndata); // writes 1 value, number of datapoints
|
||||
GGML_API void ggml_opt_result_loss( ggml_opt_result_t result, double * loss, double * unc); // writes 1 value
|
||||
GGML_API void ggml_opt_result_pred( ggml_opt_result_t result, int32_t * pred); // writes ndata values
|
||||
GGML_API void ggml_opt_result_accuracy(ggml_opt_result_t result, double * accuracy, double * unc); // writes 1 value
|
||||
|
||||
// ====== Computation ======
|
||||
|
||||
// do forward pass, increment result if not NULL
|
||||
GGML_API void ggml_opt_forward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
|
||||
|
||||
// do forward pass, increment result if not NULL, do backward pass
|
||||
GGML_API void ggml_opt_forward_backward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
|
||||
|
||||
// ############################################################################
|
||||
// ## The high-level functions start here. They do not depend on any private ##
|
||||
// ## functions or structs and can be copied to and adapted for user code. ##
|
||||
// ############################################################################
|
||||
|
||||
// ====== Intended Usage ======
|
||||
//
|
||||
// 1. Select the appropriate loss for your problem.
|
||||
// 2. Create a dataset and set the data for the "data" tensor. Also set the "labels" tensor if your loss needs them.
|
||||
// Setting the shard size to 1 will be fine, it's the granularity with which data is shuffled/loaded (bigger values are faster).
|
||||
// 3. Create a GGML graph for your model with no_alloc == true. Use two separate contexts for the tensors.
|
||||
// The first context should contain the model parameters and inputs and be allocated statically in user code.
|
||||
// The second context should contain all other tensors and will be (re)allocated automatically.
|
||||
// Due to this automated allocation the data of the second context is not defined when accessed in user code.
|
||||
// Note that the second dimension of the inputs/outputs are interpreted as the number of datapoints in those tensors.
|
||||
// 4. Call ggml_opt_fit. If you need more control you can use ggml_opt_epoch instead.
|
||||
|
||||
// signature for a callback while evaluating opt_ctx on dataset, called after an evaluation
|
||||
typedef void (*ggml_opt_epoch_callback)(
|
||||
bool train, // true after training evaluation, false after validation evaluation
|
||||
ggml_opt_context_t opt_ctx,
|
||||
ggml_opt_dataset_t dataset,
|
||||
ggml_opt_result_t result, // result associated with the dataset subsection
|
||||
int64_t ibatch, // number of batches that have been evaluated so far
|
||||
int64_t ibatch_max, // total number of batches in this dataset subsection
|
||||
int64_t t_start_us); // time at which the evaluation on the dataset subsection was started
|
||||
|
||||
// do training on front of dataset, do evaluation only on back of dataset
|
||||
GGML_API void ggml_opt_epoch(
|
||||
ggml_opt_context_t opt_ctx,
|
||||
ggml_opt_dataset_t dataset,
|
||||
ggml_opt_result_t result_train, // result to increment during training, ignored if NULL
|
||||
ggml_opt_result_t result_eval, // result to increment during evaluation, ignored if NULL
|
||||
int64_t idata_split, // data index at which to split training and evaluation
|
||||
ggml_opt_epoch_callback callback_train,
|
||||
ggml_opt_epoch_callback callback_eval);
|
||||
|
||||
// callback that prints a progress bar on stderr
|
||||
GGML_API void ggml_opt_epoch_callback_progress_bar(
|
||||
bool train,
|
||||
ggml_opt_context_t opt_ctx,
|
||||
ggml_opt_dataset_t dataset,
|
||||
ggml_opt_result_t result,
|
||||
int64_t ibatch,
|
||||
int64_t ibatch_max,
|
||||
int64_t t_start_us);
|
||||
|
||||
// fit model defined by inputs and outputs to dataset
|
||||
GGML_API void ggml_opt_fit(
|
||||
ggml_backend_sched_t backend_sched, // backend scheduler for constructing the compute graphs
|
||||
ggml_context * ctx_compute, // context with temporarily allocated tensors to calculate the outputs
|
||||
ggml_tensor * inputs, // input tensor with shape [ne_datapoint, ndata_batch]
|
||||
ggml_tensor * outputs, // output tensor, must have shape [ne_label, ndata_batch] if labels are used
|
||||
ggml_opt_dataset_t dataset, // dataset with data and optionally also labels
|
||||
enum ggml_opt_loss_type loss_type, // loss to minimize
|
||||
ggml_opt_get_optimizer_params get_opt_pars, // callback to get optimizer params, userdata is pointer to epoch (of type int64_t)
|
||||
int64_t nepoch, // how many times the dataset should be iterated over
|
||||
int64_t nbatch_logical, // datapoints optimizer step, must be a multiple of ndata_batch in inputs/outputs
|
||||
float val_split, // fraction of the dataset to use for validation, must be in [0.0f, 1.0f)
|
||||
bool silent); // whether or not info prints to stderr should be suppressed
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,28 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define GGML_RPC_MAX_SERVERS 16
|
||||
|
||||
// backend API
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint);
|
||||
GGML_BACKEND_API bool ggml_backend_is_rpc(ggml_backend_t backend);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint);
|
||||
|
||||
GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
|
||||
|
||||
GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint, size_t free_mem, size_t total_mem);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,49 @@
|
|||
//
|
||||
// MIT license
|
||||
// Copyright (C) 2024 Intel Corporation
|
||||
// SPDX-License-Identifier: MIT
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#define GGML_SYCL_NAME "SYCL"
|
||||
#define GGML_SYCL_MAX_DEVICES 48
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// backend API
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_sycl_init(int device);
|
||||
|
||||
GGML_BACKEND_API bool ggml_backend_is_sycl(ggml_backend_t backend);
|
||||
|
||||
// devide buffer
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
|
||||
|
||||
// split tensor buffer that splits matrices by rows across multiple devices
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
|
||||
|
||||
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
|
||||
|
||||
GGML_BACKEND_API void ggml_backend_sycl_print_sycl_devices(void);
|
||||
GGML_BACKEND_API void ggml_backend_sycl_get_gpu_list(int *id_list, int max_len);
|
||||
GGML_BACKEND_API void ggml_backend_sycl_get_device_description(int device,
|
||||
char *description,
|
||||
size_t description_size);
|
||||
GGML_BACKEND_API int ggml_backend_sycl_get_device_count();
|
||||
GGML_BACKEND_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
||||
|
||||
// SYCL doesn't support registering host memory, keep here for reference
|
||||
// GGML_BACKEND_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
|
||||
// GGML_BACKEND_API void ggml_backend_sycl_unregister_host_buffer(void * buffer);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_sycl_reg(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,31 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define GGML_VK_NAME "Vulkan"
|
||||
#define GGML_VK_MAX_DEVICES 16
|
||||
|
||||
GGML_BACKEND_API void ggml_vk_instance_init(void);
|
||||
|
||||
// backend API
|
||||
GGML_BACKEND_API ggml_backend_t ggml_backend_vk_init(size_t dev_num);
|
||||
|
||||
GGML_BACKEND_API bool ggml_backend_is_vk(ggml_backend_t backend);
|
||||
GGML_BACKEND_API int ggml_backend_vk_get_device_count(void);
|
||||
GGML_BACKEND_API void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size);
|
||||
GGML_BACKEND_API void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
|
||||
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_vk_reg(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,202 @@
|
|||
// This file contains functionality related to "GGUF" files, the binary file format used by ggml.
|
||||
// GGUF files have the following structure:
|
||||
//
|
||||
// 1. File magic "GGUF" (4 bytes).
|
||||
// 2. File version (uint32_t).
|
||||
// 3. Number of ggml tensors in file (int64_t).
|
||||
// 4. Number of key-value-pairs in file (int64_t).
|
||||
// 5. For each KV pair:
|
||||
// 1. The key (string).
|
||||
// 2. The value type (gguf_type).
|
||||
// 3a. If the value type is GGUF_TYPE_ARRAY:
|
||||
// 1. The type of the array (gguf_type).
|
||||
// 2. The number of elements in the array (uint64_t).
|
||||
// 3. The binary representation of each element in the array.
|
||||
// 3b. Otherwise:
|
||||
// 1. The binary representation of the value.
|
||||
// 6. For each ggml tensor:
|
||||
// 1. The tensor name (string).
|
||||
// 2. The number of dimensions of the tensor (uint32_t).
|
||||
// 3. For each dimension:
|
||||
// 1. The size of the tensor in the dimension (int64_t).
|
||||
// 4. The tensor data type (ggml_type).
|
||||
// 5. The tensor data offset in the tensor data binary blob (uint64_t).
|
||||
// 7. The tensor data binary blob (optional, aligned).
|
||||
//
|
||||
// Strings are serialized as the string length (uint64_t) followed by the C string without the null terminator.
|
||||
// All enums are stored as int32_t.
|
||||
// All bool values are stored as int8_t.
|
||||
// If the special key "general.alignment" (uint32_t) is defined it is used for alignment,
|
||||
// otherwise GGUF_DEFAULT_ALIGNMENT is used.
|
||||
//
|
||||
// Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define GGUF_MAGIC "GGUF"
|
||||
#define GGUF_VERSION 3
|
||||
|
||||
#define GGUF_KEY_GENERAL_ALIGNMENT "general.alignment"
|
||||
|
||||
#define GGUF_DEFAULT_ALIGNMENT 32
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// types that can be stored as GGUF KV data
|
||||
enum gguf_type {
|
||||
GGUF_TYPE_UINT8 = 0,
|
||||
GGUF_TYPE_INT8 = 1,
|
||||
GGUF_TYPE_UINT16 = 2,
|
||||
GGUF_TYPE_INT16 = 3,
|
||||
GGUF_TYPE_UINT32 = 4,
|
||||
GGUF_TYPE_INT32 = 5,
|
||||
GGUF_TYPE_FLOAT32 = 6,
|
||||
GGUF_TYPE_BOOL = 7,
|
||||
GGUF_TYPE_STRING = 8,
|
||||
GGUF_TYPE_ARRAY = 9,
|
||||
GGUF_TYPE_UINT64 = 10,
|
||||
GGUF_TYPE_INT64 = 11,
|
||||
GGUF_TYPE_FLOAT64 = 12,
|
||||
GGUF_TYPE_COUNT, // marks the end of the enum
|
||||
};
|
||||
|
||||
struct gguf_context;
|
||||
|
||||
struct gguf_init_params {
|
||||
bool no_alloc;
|
||||
|
||||
// if not NULL, create a ggml_context and allocate the tensor data in it
|
||||
struct ggml_context ** ctx;
|
||||
};
|
||||
|
||||
GGML_API struct gguf_context * gguf_init_empty(void);
|
||||
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
|
||||
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
|
||||
|
||||
GGML_API void gguf_free(struct gguf_context * ctx);
|
||||
|
||||
GGML_API const char * gguf_type_name(enum gguf_type type);
|
||||
|
||||
GGML_API uint32_t gguf_get_version (const struct gguf_context * ctx);
|
||||
GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
|
||||
GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
|
||||
|
||||
GGML_API int64_t gguf_get_n_kv(const struct gguf_context * ctx);
|
||||
GGML_API int64_t gguf_find_key(const struct gguf_context * ctx, const char * key); // returns -1 if key is not found
|
||||
GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int64_t key_id);
|
||||
|
||||
GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int64_t key_id);
|
||||
GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id);
|
||||
|
||||
// will abort if the wrong type is used for the key
|
||||
GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int64_t key_id);
|
||||
GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int64_t key_id);
|
||||
GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int64_t key_id);
|
||||
GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int64_t key_id);
|
||||
GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int64_t key_id);
|
||||
GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int64_t key_id);
|
||||
GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int64_t key_id);
|
||||
GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int64_t key_id);
|
||||
GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int64_t key_id);
|
||||
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int64_t key_id);
|
||||
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int64_t key_id);
|
||||
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int64_t key_id);
|
||||
GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int64_t key_id);
|
||||
GGML_API size_t gguf_get_arr_n (const struct gguf_context * ctx, int64_t key_id);
|
||||
|
||||
// get raw pointer to the first element of the array with the given key_id
|
||||
// for bool arrays, note that they are always stored as int8 on all platforms (usually this makes no difference)
|
||||
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int64_t key_id);
|
||||
|
||||
// get ith C string from array with given key_id
|
||||
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int64_t key_id, size_t i);
|
||||
|
||||
GGML_API int64_t gguf_get_n_tensors (const struct gguf_context * ctx);
|
||||
GGML_API int64_t gguf_find_tensor (const struct gguf_context * ctx, const char * name); // returns -1 if the tensor is not found
|
||||
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int64_t tensor_id);
|
||||
GGML_API const char * gguf_get_tensor_name (const struct gguf_context * ctx, int64_t tensor_id);
|
||||
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int64_t tensor_id);
|
||||
GGML_API size_t gguf_get_tensor_size (const struct gguf_context * ctx, int64_t tensor_id);
|
||||
|
||||
// removes key if it exists, returns id that the key had prior to removal (-1 if it didn't exist)
|
||||
GGML_API int64_t gguf_remove_key(struct gguf_context * ctx, const char * key);
|
||||
|
||||
// overrides an existing KV pair or adds a new one, the new KV pair is always at the back
|
||||
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|
||||
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
|
||||
GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
|
||||
GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
|
||||
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
|
||||
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
|
||||
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
|
||||
GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
|
||||
GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
|
||||
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
|
||||
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
|
||||
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
|
||||
|
||||
// creates a new array with n elements of the given type and copies the corresponding number of bytes from data
|
||||
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, size_t n);
|
||||
|
||||
// creates a new array with n strings and copies the corresponding strings from data
|
||||
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, size_t n);
|
||||
|
||||
// set or add KV pairs from another context
|
||||
GGML_API void gguf_set_kv(struct gguf_context * ctx, const struct gguf_context * src);
|
||||
|
||||
// add tensor to GGUF context, tensor name must be unique
|
||||
GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
|
||||
|
||||
// after changing a tensor's type, the offsets of all tensors with higher indices are immediately recalculated
|
||||
// in such a way that the tensor data remains as one contiguous block (except for padding)
|
||||
GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
|
||||
|
||||
// assumes that at least gguf_get_tensor_size bytes can be read from data
|
||||
GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data);
|
||||
|
||||
// writing gguf files can be done in 3 ways:
|
||||
//
|
||||
// - write the entire gguf_context to a binary file in a single pass:
|
||||
//
|
||||
// gguf_write_to_file(ctx, fname, /*only_meta =*/ false);
|
||||
//
|
||||
// - write only the meta data to a file, then re-open the file and append the tensor data:
|
||||
//
|
||||
// gguf_write_to_file(ctx, fname, /*only_meta =*/ true);
|
||||
// FILE * f = fopen(fname, "ab");
|
||||
// fwrite(f, ...); // write tensor data
|
||||
// fclose(f);
|
||||
//
|
||||
// - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
|
||||
//
|
||||
// FILE * f = fopen(fname, "wb");
|
||||
// const size_t size_meta = gguf_get_meta_size(ctx);
|
||||
// fseek(f, size_meta, SEEK_SET);
|
||||
// fwrite(f, ...); // write tensor data
|
||||
// void * data = malloc(size_meta);
|
||||
// gguf_get_meta_data(ctx, data);
|
||||
// rewind(f);
|
||||
// fwrite(data, 1, data, f);
|
||||
// free(data);
|
||||
// fclose(f);
|
||||
//
|
||||
|
||||
// write the entire context to a binary file
|
||||
GGML_API bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
|
||||
|
||||
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
|
||||
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
|
||||
|
||||
// writes the meta data to pointer "data"
|
||||
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,357 @@
|
|||
include(CheckCXXCompilerFlag)
|
||||
|
||||
add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})
|
||||
|
||||
# enable libstdc++ assertions for debug builds
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
|
||||
endif()
|
||||
|
||||
if (NOT MSVC)
|
||||
if (GGML_SANITIZE_THREAD)
|
||||
add_compile_options(-fsanitize=thread)
|
||||
link_libraries (-fsanitize=thread)
|
||||
endif()
|
||||
|
||||
if (GGML_SANITIZE_ADDRESS)
|
||||
add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
|
||||
link_libraries (-fsanitize=address)
|
||||
endif()
|
||||
|
||||
if (GGML_SANITIZE_UNDEFINED)
|
||||
add_compile_options(-fsanitize=undefined)
|
||||
link_libraries (-fsanitize=undefined)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
function(ggml_get_flags CCID CCVER)
|
||||
set(C_FLAGS "")
|
||||
set(CXX_FLAGS "")
|
||||
|
||||
if (CCID MATCHES "Clang")
|
||||
set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
|
||||
set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
|
||||
|
||||
if (
|
||||
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
|
||||
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
|
||||
)
|
||||
list(APPEND C_FLAGS -Wdouble-promotion)
|
||||
endif()
|
||||
elseif (CCID STREQUAL "GNU")
|
||||
set(C_FLAGS -Wdouble-promotion)
|
||||
set(CXX_FLAGS -Wno-array-bounds)
|
||||
|
||||
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
|
||||
list(APPEND CXX_FLAGS -Wextra-semi)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
|
||||
set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
if (GGML_FATAL_WARNINGS)
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
list(APPEND C_FLAGS -Werror)
|
||||
list(APPEND CXX_FLAGS -Werror)
|
||||
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
add_compile_options(/WX)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (GGML_ALL_WARNINGS)
|
||||
if (NOT MSVC)
|
||||
list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
|
||||
list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
|
||||
-Werror=implicit-int -Werror=implicit-function-declaration)
|
||||
list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
|
||||
|
||||
list(APPEND C_FLAGS ${WARNING_FLAGS})
|
||||
list(APPEND CXX_FLAGS ${WARNING_FLAGS})
|
||||
|
||||
ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
|
||||
|
||||
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
|
||||
"$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
|
||||
else()
|
||||
# todo : msvc
|
||||
set(C_FLAGS "")
|
||||
set(CXX_FLAGS "")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (GGML_LTO)
|
||||
include(CheckIPOSupported)
|
||||
check_ipo_supported(RESULT result OUTPUT output)
|
||||
if (result)
|
||||
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
|
||||
else()
|
||||
message(WARNING "IPO is not supported: ${output}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (GGML_CCACHE)
|
||||
find_program(GGML_CCACHE_FOUND ccache)
|
||||
find_program(GGML_SCCACHE_FOUND sccache)
|
||||
|
||||
if (GGML_CCACHE_FOUND OR GGML_SCCACHE_FOUND)
|
||||
if(GGML_CCACHE_FOUND)
|
||||
set(GGML_CCACHE_VARIANT ccache)
|
||||
else()
|
||||
set(GGML_CCACHE_VARIANT sccache)
|
||||
endif()
|
||||
# TODO: should not be set globally
|
||||
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${GGML_CCACHE_VARIANT}")
|
||||
set(ENV{CCACHE_SLOPPINESS} time_macros)
|
||||
message(STATUS "${GGML_CCACHE_VARIANT} found, compilation results will be cached. Disable with GGML_CCACHE=OFF.")
|
||||
else()
|
||||
message(STATUS "Warning: ccache not found - consider installing it for faster compilation or disable this warning with GGML_CCACHE=OFF")
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
# this version of Apple ld64 is buggy
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v
|
||||
ERROR_VARIABLE output
|
||||
OUTPUT_QUIET
|
||||
)
|
||||
|
||||
if (output MATCHES "dyld-1015\.7")
|
||||
add_compile_definitions(HAVE_BUGGY_APPLE_LINKER)
|
||||
endif()
|
||||
|
||||
# architecture specific
|
||||
# TODO: probably these flags need to be tweaked on some architectures
|
||||
# feel free to update the Makefile for your architecture and send a pull request or issue
|
||||
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
|
||||
if (MSVC)
|
||||
string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR)
|
||||
message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}")
|
||||
else ()
|
||||
set(CMAKE_GENERATOR_PLATFORM_LWR "")
|
||||
endif ()
|
||||
|
||||
if (NOT MSVC)
|
||||
if (GGML_STATIC)
|
||||
add_link_options(-static)
|
||||
if (MINGW)
|
||||
add_link_options(-static-libgcc -static-libstdc++)
|
||||
endif()
|
||||
endif()
|
||||
if (GGML_GPROF)
|
||||
add_compile_options(-pg)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (MINGW)
|
||||
# Target Windows 8 for PrefetchVirtualMemory
|
||||
add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
|
||||
endif()
|
||||
|
||||
#
|
||||
# POSIX conformance
|
||||
#
|
||||
|
||||
# clock_gettime came in POSIX.1b (1993)
|
||||
# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
|
||||
# posix_memalign came in POSIX.1-2001 / SUSv3
|
||||
# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
|
||||
|
||||
# Somehow in OpenBSD whenever POSIX conformance is specified
|
||||
# some string functions rely on locale_t availability,
|
||||
# which was introduced in POSIX.1-2008, forcing us to go higher
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
|
||||
add_compile_definitions(_XOPEN_SOURCE=700)
|
||||
else()
|
||||
add_compile_definitions(_XOPEN_SOURCE=600)
|
||||
endif()
|
||||
|
||||
# Data types, macros and functions related to controlling CPU affinity and
|
||||
# some memory allocation are available on Linux through GNU extensions in libc
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "Linux" OR CMAKE_SYSTEM_NAME MATCHES "Android")
|
||||
add_compile_definitions(_GNU_SOURCE)
|
||||
endif()
|
||||
|
||||
# RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
|
||||
# and on macOS its availability depends on enabling Darwin extensions
|
||||
# similarly on DragonFly, enabling BSD extensions is necessary
|
||||
if (
|
||||
CMAKE_SYSTEM_NAME MATCHES "Darwin" OR
|
||||
CMAKE_SYSTEM_NAME MATCHES "iOS" OR
|
||||
CMAKE_SYSTEM_NAME MATCHES "tvOS" OR
|
||||
CMAKE_SYSTEM_NAME MATCHES "DragonFly"
|
||||
)
|
||||
add_compile_definitions(_DARWIN_C_SOURCE)
|
||||
endif()
|
||||
|
||||
# alloca is a non-standard interface that is not visible on BSDs when
|
||||
# POSIX conformance is specified, but not all of them provide a clean way
|
||||
# to enable it in such cases
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
|
||||
add_compile_definitions(__BSD_VISIBLE)
|
||||
endif()
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "NetBSD")
|
||||
add_compile_definitions(_NETBSD_SOURCE)
|
||||
endif()
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
|
||||
add_compile_definitions(_BSD_SOURCE)
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
|
||||
endif()
|
||||
|
||||
# ggml
|
||||
|
||||
if (GGML_BACKEND_DL AND NOT BUILD_SHARED_LIBS)
|
||||
message(FATAL_ERROR "GGML_BACKEND_DL requires BUILD_SHARED_LIBS")
|
||||
endif()
|
||||
|
||||
add_library(ggml-base
|
||||
../include/ggml.h
|
||||
../include/ggml-alloc.h
|
||||
../include/ggml-backend.h
|
||||
../include/ggml-cpp.h
|
||||
../include/ggml-opt.h
|
||||
../include/gguf.h
|
||||
ggml.c
|
||||
ggml-alloc.c
|
||||
ggml-backend.cpp
|
||||
ggml-opt.cpp
|
||||
ggml-threading.cpp
|
||||
ggml-threading.h
|
||||
ggml-quants.c
|
||||
ggml-quants.h
|
||||
gguf.cpp)
|
||||
|
||||
target_include_directories(ggml-base PRIVATE .)
|
||||
|
||||
add_library(ggml
|
||||
ggml-backend-reg.cpp)
|
||||
|
||||
target_link_libraries(ggml PUBLIC ggml-base)
|
||||
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
target_link_libraries(ggml PRIVATE dl)
|
||||
endif()
|
||||
|
||||
function(ggml_add_backend_library backend)
|
||||
if (GGML_BACKEND_DL)
|
||||
add_library(${backend} MODULE ${ARGN})
|
||||
# write the shared library to the output directory
|
||||
set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
|
||||
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
|
||||
add_dependencies(ggml ${backend})
|
||||
else()
|
||||
add_library(${backend} ${ARGN})
|
||||
target_link_libraries(ggml PUBLIC ${backend})
|
||||
install(TARGETS ${backend} LIBRARY)
|
||||
endif()
|
||||
|
||||
target_link_libraries(${backend} PRIVATE ggml-base)
|
||||
target_include_directories(${backend} PRIVATE ..)
|
||||
|
||||
if (${BUILD_SHARED_LIBS})
|
||||
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_BUILD)
|
||||
target_compile_definitions(${backend} PUBLIC GGML_BACKEND_SHARED)
|
||||
endif()
|
||||
|
||||
if(NOT GGML_AVAILABLE_BACKENDS)
|
||||
set(GGML_AVAILABLE_BACKENDS "${backend}"
|
||||
CACHE INTERNAL "List of backends for cmake package")
|
||||
else()
|
||||
list(FIND GGML_AVAILABLE_BACKENDS "${backend}" has_backend)
|
||||
if(has_backend EQUAL -1)
|
||||
set(GGML_AVAILABLE_BACKENDS "${GGML_AVAILABLE_BACKENDS};${backend}"
|
||||
CACHE INTERNAL "List of backends for cmake package")
|
||||
endif()
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
function(ggml_add_backend backend)
|
||||
string(TOUPPER "GGML_${backend}" backend_id)
|
||||
if (${backend_id})
|
||||
string(TOLOWER "ggml-${backend}" backend_target)
|
||||
add_subdirectory(${backend_target})
|
||||
message(STATUS "Including ${backend} backend")
|
||||
if (NOT GGML_BACKEND_DL)
|
||||
string(TOUPPER "GGML_USE_${backend}" backend_use)
|
||||
target_compile_definitions(ggml PUBLIC ${backend_use})
|
||||
endif()
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
function(ggml_add_cpu_backend_variant tag_name)
|
||||
set(GGML_CPU_TAG_NAME ${tag_name})
|
||||
# other: OPENMP LLAMAFILE CPU_HBM
|
||||
foreach (feat NATIVE
|
||||
AVX AVX2 AVX_VNNI FMA F16C
|
||||
AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
|
||||
AMX_TILE AMX_INT8 AMX_BF16)
|
||||
set(GGML_${feat} OFF)
|
||||
endforeach()
|
||||
|
||||
foreach (feat ${ARGN})
|
||||
set(GGML_${feat} ON)
|
||||
endforeach()
|
||||
|
||||
ggml_add_cpu_backend_variant_impl(${tag_name})
|
||||
endfunction()
|
||||
|
||||
ggml_add_backend(CPU)
|
||||
|
||||
if (GGML_CPU_ALL_VARIANTS)
|
||||
if (NOT GGML_BACKEND_DL)
|
||||
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
|
||||
endif()
|
||||
ggml_add_cpu_backend_variant(sandybridge AVX)
|
||||
ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA)
|
||||
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
|
||||
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
||||
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
|
||||
if (NOT MSVC)
|
||||
# MSVC doesn't support AMX
|
||||
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
||||
endif()
|
||||
elseif (GGML_CPU)
|
||||
ggml_add_cpu_backend_variant_impl("")
|
||||
endif()
|
||||
|
||||
ggml_add_backend(BLAS)
|
||||
ggml_add_backend(CANN)
|
||||
ggml_add_backend(CUDA)
|
||||
ggml_add_backend(HIP)
|
||||
ggml_add_backend(Kompute)
|
||||
ggml_add_backend(METAL)
|
||||
ggml_add_backend(MUSA)
|
||||
ggml_add_backend(RPC)
|
||||
ggml_add_backend(SYCL)
|
||||
ggml_add_backend(Vulkan)
|
||||
ggml_add_backend(OpenCL)
|
||||
|
||||
foreach (target ggml-base ggml)
|
||||
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
|
||||
target_compile_features (${target} PRIVATE c_std_11 cxx_std_17) # don't bump
|
||||
endforeach()
|
||||
|
||||
target_link_libraries(ggml-base PRIVATE Threads::Threads)
|
||||
|
||||
find_library(MATH_LIBRARY m)
|
||||
if (MATH_LIBRARY)
|
||||
if (NOT WIN32 OR NOT DEFINED ENV{ONEAPI_ROOT})
|
||||
target_link_libraries(ggml-base PRIVATE m)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "Android")
|
||||
target_link_libraries(ggml-base PRIVATE dl)
|
||||
endif()
|
||||
|
||||
if (BUILD_SHARED_LIBS)
|
||||
foreach (target ggml-base ggml)
|
||||
set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
target_compile_definitions(${target} PRIVATE GGML_BUILD)
|
||||
target_compile_definitions(${target} PUBLIC GGML_SHARED)
|
||||
endforeach()
|
||||
endif()
|
File diff suppressed because it is too large
Load Diff
107
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/CMakeLists.txt
vendored
Normal file
107
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/CMakeLists.txt
vendored
Normal file
|
@ -0,0 +1,107 @@
|
|||
if (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
|
||||
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
||||
CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64)$") AND
|
||||
CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 11.0)
|
||||
message(STATUS "Using AMX")
|
||||
|
||||
file(GLOB GGML_HEADERS_AMX "*.h")
|
||||
list(APPEND GGML_HEADERS_AMX "../../include/ggml-amx.h")
|
||||
|
||||
file(GLOB GGML_SOURCES_AMX "*.cpp")
|
||||
|
||||
add_library(ggml-amx
|
||||
${GGML_HEADERS_AMX}
|
||||
${GGML_SOURCES_AMX})
|
||||
|
||||
target_link_libraries(ggml-amx PRIVATE ggml-base)
|
||||
target_include_directories(ggml-amx PRIVATE . ..)
|
||||
|
||||
# this is duplicated from the CPU backend, since the AMX backend also depends on the architecture flags
|
||||
# TODO: integrate AMX backend into the CPU backend
|
||||
if (MSVC)
|
||||
# instruction set detection for MSVC only
|
||||
if (GGML_NATIVE)
|
||||
# TODO: improve, should not reference files from the parent folder
|
||||
include(../ggml-cpu/cmake/FindSIMD.cmake)
|
||||
endif ()
|
||||
if (GGML_AVX512)
|
||||
list(APPEND ARCH_FLAGS /arch:AVX512)
|
||||
# MSVC has no compile-time flags enabling specific
|
||||
# AVX512 extensions, neither it defines the
|
||||
# macros corresponding to the extensions.
|
||||
# Do it manually.
|
||||
if (GGML_AVX512_VBMI)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
|
||||
endif()
|
||||
if (GGML_AVX512_VNNI)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
|
||||
endif()
|
||||
if (GGML_AVX512_BF16)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
|
||||
endif()
|
||||
if (GGML_AMX_TILE)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_TILE__>)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_TILE__>)
|
||||
endif()
|
||||
if (GGML_AMX_INT8)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_INT8__>)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_INT8__>)
|
||||
endif()
|
||||
if (GGML_AMX_BF16)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_BF16__>)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_BF16__>)
|
||||
endif()
|
||||
elseif (GGML_AVX2)
|
||||
list(APPEND ARCH_FLAGS /arch:AVX2)
|
||||
elseif (GGML_AVX)
|
||||
list(APPEND ARCH_FLAGS /arch:AVX)
|
||||
endif()
|
||||
else()
|
||||
if (GGML_NATIVE)
|
||||
list(APPEND ARCH_FLAGS -march=native)
|
||||
endif()
|
||||
if (GGML_F16C)
|
||||
list(APPEND ARCH_FLAGS -mf16c)
|
||||
endif()
|
||||
if (GGML_FMA)
|
||||
list(APPEND ARCH_FLAGS -mfma)
|
||||
endif()
|
||||
if (GGML_AVX)
|
||||
list(APPEND ARCH_FLAGS -mavx)
|
||||
endif()
|
||||
if (GGML_AVX2)
|
||||
list(APPEND ARCH_FLAGS -mavx2)
|
||||
endif()
|
||||
if (GGML_AVX512)
|
||||
list(APPEND ARCH_FLAGS -mavx512f)
|
||||
list(APPEND ARCH_FLAGS -mavx512dq)
|
||||
list(APPEND ARCH_FLAGS -mavx512bw)
|
||||
endif()
|
||||
if (GGML_AVX512_VBMI)
|
||||
list(APPEND ARCH_FLAGS -mavx512vbmi)
|
||||
endif()
|
||||
if (GGML_AVX512_VNNI)
|
||||
list(APPEND ARCH_FLAGS -mavx512vnni)
|
||||
endif()
|
||||
if (GGML_AVX512_BF16)
|
||||
list(APPEND ARCH_FLAGS -mavx512bf16)
|
||||
endif()
|
||||
if (GGML_AMX_TILE)
|
||||
list(APPEND ARCH_FLAGS -mamx-tile)
|
||||
endif()
|
||||
if (GGML_AMX_INT8)
|
||||
list(APPEND ARCH_FLAGS -mamx-int8)
|
||||
endif()
|
||||
if (GGML_AMX_BF16)
|
||||
list(APPEND ARCH_FLAGS -mamx-bf16)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
target_compile_options(ggml-amx PRIVATE ${ARCH_FLAGS})
|
||||
else()
|
||||
set(GGML_AMX OFF PARENT_SCOPE)
|
||||
message(WARNING "AMX requires x86 and gcc version > 11.0. Turning off GGML_AMX.")
|
||||
endif()
|
|
@ -0,0 +1,94 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
// hack until AMX is moved into the CPU backend
|
||||
#include "../ggml-cpu/ggml-cpu-impl.h" // <immintrin.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#define TILE_M 16
|
||||
#define TILE_N 16
|
||||
#define TILE_K 32
|
||||
#define VNNI_BLK 4
|
||||
|
||||
#define AMX_BLK_SIZE 32
|
||||
|
||||
#define TMM0 0
|
||||
#define TMM1 1
|
||||
#define TMM2 2
|
||||
#define TMM3 3
|
||||
#define TMM4 4
|
||||
#define TMM5 5
|
||||
#define TMM6 6
|
||||
#define TMM7 7
|
||||
|
||||
// parallel routines
|
||||
template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
|
||||
inline T div_up(T x, T y) { return (x + y - 1) / y; }
|
||||
|
||||
template <typename T>
|
||||
inline void balance211(T n, T nth, T ith, T& n_start, T& n_end) {
|
||||
#if 0
|
||||
// onednn partition pattern
|
||||
T& n_my = n_end;
|
||||
if (nth <= 1 || n == 0) {
|
||||
n_start = 0;
|
||||
n_my = n;
|
||||
} else {
|
||||
T n1 = div_up(n, nth);
|
||||
T n2 = n1 - 1;
|
||||
T T1 = n - n2 * nth;
|
||||
n_my = ith < T1 ? n1 : n2;
|
||||
n_start = ith <= T1 ? ith*n1 : T1 * n1 + (ith - T1) * n2;
|
||||
}
|
||||
n_end += n_start;
|
||||
#else
|
||||
// pytorch aten partition pattern
|
||||
T n_my = div_up(n, nth);
|
||||
n_start = ith * n_my;
|
||||
n_end = std::min(n_start + n_my, n);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename func_t>
|
||||
inline void parallel_for(int nth, int n, const func_t& f) {
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel num_threads(nth)
|
||||
{
|
||||
//int nth = omp_get_num_threads();
|
||||
int ith = omp_get_thread_num();
|
||||
int tbegin, tend;
|
||||
balance211(n, nth, ith, tbegin, tend);
|
||||
f(tbegin, tend);
|
||||
}
|
||||
#else
|
||||
f(0, n);
|
||||
|
||||
GGML_UNUSED(nth);
|
||||
#endif
|
||||
}
|
||||
|
||||
// quantized types that have AMX support
|
||||
inline bool qtype_has_amx_kernels(const enum ggml_type type) {
|
||||
// TODO: fix padding for vnni format
|
||||
return (type == GGML_TYPE_Q4_0) ||
|
||||
(type == GGML_TYPE_Q4_1);
|
||||
//(type == GGML_TYPE_Q8_0) ||
|
||||
//(type == GGML_TYPE_Q4_K) ||
|
||||
//(type == GGML_TYPE_Q5_K) ||
|
||||
//(type == GGML_TYPE_Q6_K) ||
|
||||
//(type == GGML_TYPE_IQ4_XS);
|
||||
}
|
||||
|
||||
// ggml backend context
|
||||
struct ggml_backend_amx_context {
|
||||
int n_threads = GGML_DEFAULT_N_THREADS;
|
||||
std::unique_ptr<char[]> work_data;
|
||||
size_t work_size = 0;
|
||||
};
|
446
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/ggml-amx.cpp
vendored
Normal file
446
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-amx/ggml-amx.cpp
vendored
Normal file
|
@ -0,0 +1,446 @@
|
|||
#include "ggml-amx.h"
|
||||
#include "ggml-amx/common.h"
|
||||
#include "ggml-amx/mmq.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-impl.h"
|
||||
|
||||
#if defined(__gnu_linux__)
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
|
||||
#if defined(__AMX_INT8__)
|
||||
|
||||
// AMX buffer interface
|
||||
static void ggml_backend_amx_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||
free(buffer->context);
|
||||
}
|
||||
|
||||
static void * ggml_backend_amx_buffer_get_base(ggml_backend_buffer_t buffer) {
|
||||
return (void *)(buffer->context);
|
||||
}
|
||||
|
||||
static void ggml_backend_amx_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
|
||||
memset((char *)tensor->data + offset, value, size);
|
||||
|
||||
GGML_UNUSED(buffer);
|
||||
}
|
||||
|
||||
static void ggml_backend_amx_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
||||
if (qtype_has_amx_kernels(tensor->type)) {
|
||||
ggml_backend_amx_convert_weight(tensor, data, offset, size);
|
||||
} else {
|
||||
memcpy((char *)tensor->data + offset, data, size);
|
||||
}
|
||||
|
||||
GGML_UNUSED(buffer);
|
||||
}
|
||||
|
||||
static void ggml_backend_amx_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
||||
GGML_ASSERT(!qtype_has_amx_kernels(tensor->type));
|
||||
memcpy(data, (const char *)tensor->data + offset, size);
|
||||
|
||||
GGML_UNUSED(buffer);
|
||||
}
|
||||
|
||||
static bool ggml_backend_amx_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) {
|
||||
if (ggml_backend_buffer_is_host(src->buffer)) {
|
||||
if (qtype_has_amx_kernels(src->type)) {
|
||||
ggml_backend_amx_convert_weight(dst, src->data, 0, ggml_backend_amx_get_alloc_size(dst));
|
||||
} else {
|
||||
memcpy(dst->data, src->data, ggml_nbytes(src));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
GGML_UNUSED(buffer);
|
||||
}
|
||||
|
||||
static void ggml_backend_amx_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
||||
memset(buffer->context, value, buffer->size);
|
||||
}
|
||||
|
||||
static ggml_backend_buffer_i ggml_backend_amx_buffer_interface = {
|
||||
/* .free_buffer = */ ggml_backend_amx_buffer_free_buffer,
|
||||
/* .get_base = */ ggml_backend_amx_buffer_get_base,
|
||||
/* .init_tensor = */ NULL, // no initialization required
|
||||
/* .memset_tensor = */ ggml_backend_amx_buffer_memset_tensor,
|
||||
/* .set_tensor = */ ggml_backend_amx_buffer_set_tensor,
|
||||
/* .get_tensor = */ ggml_backend_amx_buffer_get_tensor,
|
||||
/* .cpy_tensor = */ ggml_backend_amx_buffer_cpy_tensor,
|
||||
/* .clear = */ ggml_backend_amx_buffer_clear,
|
||||
/* .reset = */ NULL,
|
||||
};
|
||||
|
||||
static const char * ggml_backend_amx_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
|
||||
return "AMX";
|
||||
|
||||
GGML_UNUSED(buft);
|
||||
}
|
||||
|
||||
static ggml_backend_buffer_t ggml_backend_amx_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
||||
void * data = aligned_alloc(TENSOR_ALIGNMENT, size);
|
||||
if (data == NULL) {
|
||||
fprintf(stderr, "%s: failed to allocate buffer of size %zu\n", __func__, size);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ggml_backend_buffer_init(buft, ggml_backend_amx_buffer_interface, data, size);
|
||||
}
|
||||
|
||||
static size_t ggml_backend_amx_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
|
||||
return TENSOR_ALIGNMENT;
|
||||
|
||||
GGML_UNUSED(buft);
|
||||
}
|
||||
|
||||
static size_t ggml_backend_amx_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor* tensor) {
|
||||
return ggml_backend_amx_get_alloc_size(tensor);
|
||||
|
||||
GGML_UNUSED(buft);
|
||||
}
|
||||
|
||||
static bool ggml_backend_amx_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
|
||||
return false;
|
||||
|
||||
GGML_UNUSED(buft);
|
||||
}
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_amx_buffer_type() {
|
||||
static struct ggml_backend_buffer_type ggml_backend_buffer_type_amx = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_amx_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_amx_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_amx_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_amx_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_amx_buffer_type_is_host,
|
||||
},
|
||||
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_amx_reg(), 0),
|
||||
/* .context = */ NULL,
|
||||
};
|
||||
|
||||
return &ggml_backend_buffer_type_amx;
|
||||
}
|
||||
|
||||
// backend interface
|
||||
|
||||
static const char * ggml_backend_amx_name(ggml_backend_t backend) {
|
||||
return "AMX";
|
||||
|
||||
GGML_UNUSED(backend);
|
||||
}
|
||||
|
||||
static void ggml_backend_amx_free(ggml_backend_t backend) {
|
||||
ggml_backend_amx_context * ctx = (ggml_backend_amx_context *)backend->context;
|
||||
delete ctx;
|
||||
delete backend;
|
||||
}
|
||||
|
||||
static enum ggml_status ggml_backend_amx_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
ggml_backend_amx_context * ctx = (ggml_backend_amx_context *)backend->context;
|
||||
|
||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
struct ggml_tensor * node = cgraph->nodes[i];
|
||||
|
||||
switch (node->op) {
|
||||
case GGML_OP_MUL_MAT:
|
||||
ggml_backend_amx_mul_mat(ctx, node);
|
||||
break;
|
||||
|
||||
case GGML_OP_NONE:
|
||||
case GGML_OP_RESHAPE:
|
||||
case GGML_OP_VIEW:
|
||||
case GGML_OP_PERMUTE:
|
||||
case GGML_OP_TRANSPOSE:
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf(stderr, "%s: unsupported op %s\n", __func__, ggml_op_desc(node));
|
||||
GGML_ASSERT(false);
|
||||
}
|
||||
}
|
||||
|
||||
return GGML_STATUS_SUCCESS;
|
||||
|
||||
GGML_UNUSED(backend);
|
||||
}
|
||||
|
||||
static struct ggml_backend_i ggml_backend_amx_i = {
|
||||
/* .get_name = */ ggml_backend_amx_name,
|
||||
/* .free = */ ggml_backend_amx_free,
|
||||
/* .set_tensor_async = */ NULL,
|
||||
/* .get_tensor_async = */ NULL,
|
||||
/* .cpy_tensor_async = */ NULL,
|
||||
/* .synchronize = */ NULL,
|
||||
/* .graph_plan_create = */ NULL,
|
||||
/* .graph_plan_free = */ NULL,
|
||||
/* .graph_plan_update = */ NULL,
|
||||
/* .graph_plan_compute = */ NULL,
|
||||
/* .graph_compute = */ ggml_backend_amx_graph_compute,
|
||||
/* .event_record = */ NULL,
|
||||
/* .event_wait = */ NULL,
|
||||
};
|
||||
|
||||
static ggml_guid_t ggml_backend_amx_guid() {
|
||||
static ggml_guid guid = { 0x13, 0xb8, 0xa4, 0xc4, 0xba, 0xfe, 0x51, 0x67, 0x87, 0x44, 0x55, 0x15, 0xb2, 0x35, 0x62, 0x3e };
|
||||
return &guid;
|
||||
}
|
||||
|
||||
#define ARCH_GET_XCOMP_PERM 0x1022
|
||||
#define ARCH_REQ_XCOMP_PERM 0x1023
|
||||
#define XFEATURE_XTILECFG 17
|
||||
#define XFEATURE_XTILEDATA 18
|
||||
|
||||
static bool ggml_amx_init() {
|
||||
#if defined(__gnu_linux__)
|
||||
if (syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA)) {
|
||||
fprintf(stderr, "AMX is not ready to be used!\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
#elif defined(_WIN32)
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
ggml_backend_t ggml_backend_amx_init() {
|
||||
|
||||
// invoke a Linux system call to request access to AMX features
|
||||
ggml_amx_init();
|
||||
|
||||
// backend context
|
||||
ggml_backend_amx_context * ctx = new ggml_backend_amx_context;
|
||||
|
||||
// ggml amx backend
|
||||
ggml_backend_t backend = new ggml_backend {
|
||||
/* .guid = */ ggml_backend_amx_guid(),
|
||||
/* .interface = */ ggml_backend_amx_i,
|
||||
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_amx_reg(), 0),
|
||||
/* .context = */ ctx,
|
||||
};
|
||||
|
||||
return backend;
|
||||
}
|
||||
|
||||
bool ggml_backend_is_amx(ggml_backend_t backend) {
|
||||
return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_amx_guid());
|
||||
}
|
||||
|
||||
void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads) {
|
||||
GGML_ASSERT(ggml_backend_is_amx(backend_amx));
|
||||
|
||||
ggml_backend_amx_context * ctx = (ggml_backend_amx_context *)backend_amx->context;
|
||||
ctx->n_threads = n_threads;
|
||||
}
|
||||
|
||||
// device interface
|
||||
|
||||
static const char * ggml_backend_amx_device_get_name(ggml_backend_dev_t dev) {
|
||||
return "AMX";
|
||||
|
||||
GGML_UNUSED(dev);
|
||||
}
|
||||
|
||||
static const char * ggml_backend_amx_device_get_description(ggml_backend_dev_t dev) {
|
||||
return "Intel Advanced Matrix Extensions";
|
||||
|
||||
GGML_UNUSED(dev);
|
||||
}
|
||||
|
||||
static void ggml_backend_amx_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
||||
// TODO
|
||||
*free = 0;
|
||||
*total = 0;
|
||||
|
||||
GGML_UNUSED(dev);
|
||||
}
|
||||
|
||||
static enum ggml_backend_dev_type ggml_backend_amx_device_get_type(ggml_backend_dev_t dev) {
|
||||
return GGML_BACKEND_DEVICE_TYPE_ACCEL;
|
||||
|
||||
GGML_UNUSED(dev);
|
||||
}
|
||||
|
||||
static void ggml_backend_amx_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
|
||||
props->name = ggml_backend_amx_device_get_name(dev);
|
||||
props->description = ggml_backend_amx_device_get_description(dev);
|
||||
props->type = ggml_backend_amx_device_get_type(dev);
|
||||
ggml_backend_amx_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
||||
|
||||
// `buffer_from_host_ptr` is intended to be used in mmap, when memory layout unchanged
|
||||
props->caps = {
|
||||
/* .async = */ false,
|
||||
/* .host_buffer = */ false,
|
||||
/* .buffer_from_host_ptr = */ false,
|
||||
/* .events = */ false,
|
||||
};
|
||||
}
|
||||
|
||||
static ggml_backend_t ggml_backend_amx_device_init(ggml_backend_dev_t dev, const char * params) {
|
||||
return ggml_backend_amx_init();
|
||||
|
||||
GGML_UNUSED(dev);
|
||||
GGML_UNUSED(params);
|
||||
}
|
||||
|
||||
static ggml_backend_buffer_type_t ggml_backend_amx_device_get_buffer_type(ggml_backend_dev_t dev) {
|
||||
return ggml_backend_amx_buffer_type();
|
||||
|
||||
GGML_UNUSED(dev);
|
||||
}
|
||||
|
||||
static bool ggml_backend_amx_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
|
||||
|
||||
// handle only 2d gemm for now
|
||||
auto is_contiguous_2d = [](const struct ggml_tensor * t) {
|
||||
return ggml_is_contiguous(t) && t->ne[3] == 1 && t->ne[2] == 1;
|
||||
};
|
||||
|
||||
switch (op->op) {
|
||||
case GGML_OP_NONE:
|
||||
case GGML_OP_RESHAPE:
|
||||
case GGML_OP_VIEW:
|
||||
case GGML_OP_PERMUTE:
|
||||
case GGML_OP_TRANSPOSE:
|
||||
return true;
|
||||
|
||||
case GGML_OP_MUL_MAT: {
|
||||
const struct ggml_tensor * src0 = op->src[0];
|
||||
const struct ggml_tensor * src1 = op->src[1];
|
||||
|
||||
const enum ggml_type type = src0->type;
|
||||
const int64_t ne0 = op->ne[0];
|
||||
|
||||
// amx kernels enables for Q4_0, Q4_1, Q8_0, F16
|
||||
// Q4_K, Q5_K, Q6_K, IQ4_XS enabled for QK_K = 256
|
||||
bool has_amx_kernels = qtype_has_amx_kernels(type) || (type == GGML_TYPE_F16);
|
||||
|
||||
bool can_use_amx =
|
||||
is_contiguous_2d(src0) && // src0 must be contiguous
|
||||
is_contiguous_2d(src1) && // src1 must be contiguous
|
||||
src1->type == GGML_TYPE_F32 && // src1 must be float32
|
||||
has_amx_kernels && // with amx kernel impls
|
||||
ne0 % (TILE_N * 2) == 0; // out_features is 32x
|
||||
|
||||
return can_use_amx;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
GGML_UNUSED(dev);
|
||||
}
|
||||
|
||||
static bool ggml_backend_amx_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
|
||||
return buft->iface.get_name == ggml_backend_amx_buffer_type_get_name;
|
||||
|
||||
GGML_UNUSED(dev);
|
||||
}
|
||||
|
||||
static const struct ggml_backend_device_i ggml_backend_amx_device_i = {
|
||||
/* .get_name = */ ggml_backend_amx_device_get_name,
|
||||
/* .get_description = */ ggml_backend_amx_device_get_description,
|
||||
/* .get_memory = */ ggml_backend_amx_device_get_memory,
|
||||
/* .get_type = */ ggml_backend_amx_device_get_type,
|
||||
/* .get_props = */ ggml_backend_amx_device_get_props,
|
||||
/* .init_backend = */ ggml_backend_amx_device_init,
|
||||
/* .get_buffer_type = */ ggml_backend_amx_device_get_buffer_type,
|
||||
/* .get_host_buffer_type = */ NULL,
|
||||
/* .buffer_from_host_ptr = */ NULL,
|
||||
/* .supports_op = */ ggml_backend_amx_device_supports_op,
|
||||
/* .supports_buft = */ ggml_backend_amx_device_supports_buft,
|
||||
/* .offload_op = */ NULL,
|
||||
/* .event_new = */ NULL,
|
||||
/* .event_free = */ NULL,
|
||||
/* .event_synchronize = */ NULL,
|
||||
};
|
||||
|
||||
// backend reg interface
|
||||
|
||||
static const char * ggml_backend_amx_reg_get_name(ggml_backend_reg_t reg) {
|
||||
return "AMX";
|
||||
|
||||
GGML_UNUSED(reg);
|
||||
}
|
||||
|
||||
static size_t ggml_backend_amx_reg_get_device_count(ggml_backend_reg_t reg) {
|
||||
return 1;
|
||||
|
||||
GGML_UNUSED(reg);
|
||||
}
|
||||
|
||||
static ggml_backend_dev_t ggml_backend_amx_reg_get_device(ggml_backend_reg_t reg, size_t index) {
|
||||
GGML_ASSERT(index == 0);
|
||||
|
||||
static ggml_backend_device ggml_backend_amx_device = {
|
||||
/* .iface = */ ggml_backend_amx_device_i,
|
||||
/* .reg = */ reg,
|
||||
/* .context = */ nullptr,
|
||||
};
|
||||
|
||||
return &ggml_backend_amx_device;
|
||||
|
||||
GGML_UNUSED(reg);
|
||||
GGML_UNUSED(index);
|
||||
}
|
||||
|
||||
static void * ggml_backend_amx_get_proc_address(ggml_backend_reg_t reg, const char * name) {
|
||||
if (std::strcmp(name, "ggml_backend_set_n_threads") == 0) {
|
||||
return (void *)ggml_backend_amx_set_n_threads;
|
||||
}
|
||||
return NULL;
|
||||
|
||||
GGML_UNUSED(reg);
|
||||
GGML_UNUSED(name);
|
||||
}
|
||||
|
||||
static const struct ggml_backend_reg_i ggml_backend_amx_reg_i = {
|
||||
/* .get_name = */ ggml_backend_amx_reg_get_name,
|
||||
/* .get_device_count = */ ggml_backend_amx_reg_get_device_count,
|
||||
/* .get_device = */ ggml_backend_amx_reg_get_device,
|
||||
/* .get_proc_address = */ ggml_backend_amx_get_proc_address,
|
||||
};
|
||||
|
||||
ggml_backend_reg_t ggml_backend_amx_reg(void) {
|
||||
static struct ggml_backend_reg ggml_backend_amx_reg = {
|
||||
/* .iface = */ ggml_backend_amx_reg_i,
|
||||
/* .context = */ NULL,
|
||||
};
|
||||
|
||||
return &ggml_backend_amx_reg;
|
||||
}
|
||||
|
||||
#else // if defined(__AMX_INT8__)
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool ggml_backend_is_amx(ggml_backend_t backend) {
|
||||
GGML_UNUSED(backend);
|
||||
return false;
|
||||
}
|
||||
|
||||
ggml_backend_t ggml_backend_amx_init(void) {
|
||||
fprintf(stderr, "GGML is not compiled with AMX support!\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads) {
|
||||
fprintf(stderr, "GGML is not compiled with AMX support!\n");
|
||||
|
||||
GGML_UNUSED(backend_amx);
|
||||
GGML_UNUSED(n_threads);
|
||||
}
|
||||
|
||||
ggml_backend_reg_t ggml_backend_amx_reg(void) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,17 @@
|
|||
#pragma once
|
||||
#include "common.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
size_t ggml_backend_amx_get_alloc_size(const struct ggml_tensor * tensor);
|
||||
|
||||
void ggml_backend_amx_convert_weight(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||
|
||||
void ggml_backend_amx_mul_mat(ggml_backend_amx_context * ctx, struct ggml_tensor * dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
255
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-backend-impl.h
vendored
Normal file
255
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-backend-impl.h
vendored
Normal file
|
@ -0,0 +1,255 @@
|
|||
#pragma once
|
||||
|
||||
// ggml-backend internal header
|
||||
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define GGML_BACKEND_API_VERSION 1
|
||||
|
||||
//
|
||||
// Backend buffer type
|
||||
//
|
||||
|
||||
struct ggml_backend_buffer_type_i {
|
||||
const char * (*get_name) (ggml_backend_buffer_type_t buft);
|
||||
// allocate a buffer of this type
|
||||
ggml_backend_buffer_t (*alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
|
||||
// tensor alignment
|
||||
size_t (*get_alignment) (ggml_backend_buffer_type_t buft);
|
||||
// (optional) max buffer size that can be allocated (defaults to SIZE_MAX)
|
||||
size_t (*get_max_size) (ggml_backend_buffer_type_t buft);
|
||||
// (optional) data size needed to allocate the tensor, including padding (defaults to ggml_nbytes)
|
||||
size_t (*get_alloc_size)(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor);
|
||||
// (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false)
|
||||
bool (*is_host) (ggml_backend_buffer_type_t buft);
|
||||
};
|
||||
|
||||
struct ggml_backend_buffer_type {
|
||||
struct ggml_backend_buffer_type_i iface;
|
||||
ggml_backend_dev_t device;
|
||||
void * context;
|
||||
};
|
||||
|
||||
//
|
||||
// Backend buffer
|
||||
//
|
||||
|
||||
struct ggml_backend_buffer_i {
|
||||
// (optional) free the buffer
|
||||
void (*free_buffer) (ggml_backend_buffer_t buffer);
|
||||
// base address of the buffer
|
||||
void * (*get_base) (ggml_backend_buffer_t buffer);
|
||||
// (optional) initialize a tensor in the buffer (eg. add tensor extras)
|
||||
void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||
// tensor data access
|
||||
void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
|
||||
void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||
void (*get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||
// (optional) tensor copy: dst is in the buffer, src may be in any buffer, including buffers from a different backend (return false if not supported)
|
||||
bool (*cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||
// clear the entire buffer
|
||||
void (*clear) (ggml_backend_buffer_t buffer, uint8_t value);
|
||||
// (optional) reset any internal state due to tensor initialization, such as tensor extras
|
||||
void (*reset) (ggml_backend_buffer_t buffer);
|
||||
};
|
||||
|
||||
struct ggml_backend_buffer {
|
||||
struct ggml_backend_buffer_i iface;
|
||||
ggml_backend_buffer_type_t buft;
|
||||
void * context;
|
||||
size_t size;
|
||||
enum ggml_backend_buffer_usage usage;
|
||||
};
|
||||
|
||||
GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
|
||||
ggml_backend_buffer_type_t buft,
|
||||
struct ggml_backend_buffer_i iface,
|
||||
void * context,
|
||||
size_t size);
|
||||
|
||||
// do not use directly, use ggml_backend_tensor_copy instead
|
||||
GGML_API bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||
|
||||
// multi-buffer
|
||||
// buffer that contains a collection of buffers
|
||||
GGML_API ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
|
||||
GGML_API bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
|
||||
GGML_API void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
||||
|
||||
//
|
||||
// Backend (stream)
|
||||
//
|
||||
|
||||
struct ggml_backend_i {
|
||||
const char * (*get_name)(ggml_backend_t backend);
|
||||
|
||||
void (*free)(ggml_backend_t backend);
|
||||
|
||||
// (optional) asynchronous tensor data access
|
||||
void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||
void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||
bool (*cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||
|
||||
// (optional) complete all pending operations (required if the backend supports async operations)
|
||||
void (*synchronize)(ggml_backend_t backend);
|
||||
|
||||
// (optional) graph plans (not used currently)
|
||||
// compute graph with a plan
|
||||
ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
|
||||
void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
// update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology
|
||||
void (*graph_plan_update) (ggml_backend_t backend, ggml_backend_graph_plan_t plan, const struct ggml_cgraph * cgraph);
|
||||
// compute the graph with the plan
|
||||
enum ggml_status (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
|
||||
// compute graph (always async if supported by the backend)
|
||||
enum ggml_status (*graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
|
||||
// (optional) event synchronization
|
||||
// record an event on this stream
|
||||
void (*event_record)(ggml_backend_t backend, ggml_backend_event_t event);
|
||||
// wait for an event on on a different stream
|
||||
void (*event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
|
||||
};
|
||||
|
||||
struct ggml_backend {
|
||||
ggml_guid_t guid;
|
||||
struct ggml_backend_i iface;
|
||||
ggml_backend_dev_t device;
|
||||
void * context;
|
||||
};
|
||||
|
||||
struct ggml_backend_event {
|
||||
struct ggml_backend_device * device;
|
||||
void * context;
|
||||
};
|
||||
|
||||
//
|
||||
// Backend device
|
||||
//
|
||||
|
||||
// Note: if additional properties are needed, we should add a struct with all of them
|
||||
// the current functions to obtain the properties can remain, since they are more convenient for often used properties
|
||||
struct ggml_backend_device_i {
|
||||
// device name: short identifier for this device, such as "CPU" or "CUDA0"
|
||||
const char * (*get_name)(ggml_backend_dev_t dev);
|
||||
|
||||
// device description: short informative description of the device, could be the model name
|
||||
const char * (*get_description)(ggml_backend_dev_t dev);
|
||||
|
||||
// device memory in bytes
|
||||
void (*get_memory)(ggml_backend_dev_t dev, size_t * free, size_t * total);
|
||||
|
||||
// device type
|
||||
enum ggml_backend_dev_type (*get_type)(ggml_backend_dev_t dev);
|
||||
|
||||
// device properties
|
||||
void (*get_props)(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props);
|
||||
|
||||
// backend (stream) initialization
|
||||
ggml_backend_t (*init_backend)(ggml_backend_dev_t dev, const char * params);
|
||||
|
||||
// preferred buffer type
|
||||
ggml_backend_buffer_type_t (*get_buffer_type)(ggml_backend_dev_t dev);
|
||||
|
||||
// (optional) host buffer type (in system memory, typically this is a pinned memory buffer for faster transfers between host and device)
|
||||
ggml_backend_buffer_type_t (*get_host_buffer_type)(ggml_backend_dev_t dev);
|
||||
|
||||
// (optional) buffer from pointer: create a buffer from a host pointer (useful for memory mapped models and importing data from other libraries)
|
||||
ggml_backend_buffer_t (*buffer_from_host_ptr)(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size);
|
||||
|
||||
// check if the backend can compute an operation
|
||||
bool (*supports_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
|
||||
|
||||
// check if the backend can use tensors allocated in a buffer type
|
||||
bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft);
|
||||
|
||||
// (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer
|
||||
// these should be expensive operations that may benefit from running on this backend instead of the CPU backend
|
||||
bool (*offload_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
|
||||
|
||||
// (optional) event synchronization
|
||||
ggml_backend_event_t (*event_new) (ggml_backend_dev_t dev);
|
||||
void (*event_free) (ggml_backend_dev_t dev, ggml_backend_event_t event);
|
||||
void (*event_synchronize) (ggml_backend_dev_t dev, ggml_backend_event_t event);
|
||||
};
|
||||
|
||||
struct ggml_backend_device {
|
||||
struct ggml_backend_device_i iface;
|
||||
ggml_backend_reg_t reg;
|
||||
void * context;
|
||||
};
|
||||
|
||||
//
|
||||
// Backend (reg)
|
||||
//
|
||||
|
||||
struct ggml_backend_reg_i {
|
||||
const char * (*get_name)(ggml_backend_reg_t reg);
|
||||
|
||||
// enumerate available devices
|
||||
size_t (*get_device_count)(ggml_backend_reg_t reg);
|
||||
ggml_backend_dev_t (*get_device)(ggml_backend_reg_t reg, size_t index);
|
||||
|
||||
// (optional) get a pointer to a function in the backend
|
||||
// backends can add custom functions that are not part of the standard ggml-backend interface
|
||||
void * (*get_proc_address)(ggml_backend_reg_t reg, const char * name);
|
||||
};
|
||||
|
||||
struct ggml_backend_reg {
|
||||
int api_version; // initialize to GGML_BACKEND_API_VERSION
|
||||
struct ggml_backend_reg_i iface;
|
||||
void * context;
|
||||
};
|
||||
|
||||
// Internal backend registry API
|
||||
GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
|
||||
|
||||
// Add backend dynamic loading support to the backend
|
||||
|
||||
// Initialize the backend
|
||||
typedef ggml_backend_reg_t (*ggml_backend_init_t)(void);
|
||||
// Optional: obtain a score for the backend based on the system configuration
|
||||
// Higher scores are preferred, 0 means the backend is not supported in the current system
|
||||
typedef int (*ggml_backend_score_t)(void);
|
||||
|
||||
#ifdef GGML_BACKEND_DL
|
||||
# ifdef __cplusplus
|
||||
# define GGML_BACKEND_DL_IMPL(reg_fn) \
|
||||
extern "C" { \
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
|
||||
} \
|
||||
ggml_backend_reg_t ggml_backend_init(void) { \
|
||||
return reg_fn(); \
|
||||
}
|
||||
# define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
|
||||
extern "C" { \
|
||||
GGML_BACKEND_API int ggml_backend_score(void); \
|
||||
} \
|
||||
int ggml_backend_score(void) { \
|
||||
return score_fn(); \
|
||||
}
|
||||
# else
|
||||
# define GGML_BACKEND_DL_IMPL(reg_fn) \
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
|
||||
ggml_backend_reg_t ggml_backend_init(void) { \
|
||||
return reg_fn(); \
|
||||
}
|
||||
# define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
|
||||
GGML_BACKEND_API int ggml_backend_score(void); \
|
||||
int ggml_backend_score(void) { \
|
||||
return score_fn(); \
|
||||
}
|
||||
# endif
|
||||
#else
|
||||
# define GGML_BACKEND_DL_IMPL(reg_fn)
|
||||
# define GGML_BACKEND_DL_SCORE_IMPL(score_fn)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
582
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-backend-reg.cpp
vendored
Normal file
582
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-backend-reg.cpp
vendored
Normal file
|
@ -0,0 +1,582 @@
|
|||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml-impl.h"
|
||||
#include <algorithm>
|
||||
#include <codecvt>
|
||||
#include <cstring>
|
||||
#include <filesystem>
|
||||
#include <locale>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#ifdef _WIN32
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
# ifndef NOMINMAX
|
||||
# define NOMINMAX
|
||||
# endif
|
||||
# include <windows.h>
|
||||
#elif defined(__APPLE__)
|
||||
# include <mach-o/dyld.h>
|
||||
# include <dlfcn.h>
|
||||
#else
|
||||
# include <dlfcn.h>
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
// Backend registry
|
||||
#ifdef GGML_USE_CPU
|
||||
#include "ggml-cpu.h"
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_CUDA
|
||||
#include "ggml-cuda.h"
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_METAL
|
||||
#include "ggml-metal.h"
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_SYCL
|
||||
#include "ggml-sycl.h"
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_VULKAN
|
||||
#include "ggml-vulkan.h"
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_OPENCL
|
||||
#include "ggml-opencl.h"
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_BLAS
|
||||
#include "ggml-blas.h"
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_RPC
|
||||
#include "ggml-rpc.h"
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_CANN
|
||||
#include "ggml-cann.h"
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_KOMPUTE
|
||||
#include "ggml-kompute.h"
|
||||
#endif
|
||||
|
||||
// disable C++17 deprecation warning for std::codecvt_utf8
|
||||
#if defined(__clang__)
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||||
#endif
|
||||
|
||||
static std::wstring utf8_to_utf16(const std::string & str) {
|
||||
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
||||
return converter.from_bytes(str);
|
||||
}
|
||||
|
||||
static std::string utf16_to_utf8(const std::wstring & str) {
|
||||
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
||||
return converter.to_bytes(str);
|
||||
}
|
||||
|
||||
#if defined(__clang__)
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
using dl_handle = std::remove_pointer_t<HMODULE>;
|
||||
|
||||
struct dl_handle_deleter {
|
||||
void operator()(HMODULE handle) {
|
||||
FreeLibrary(handle);
|
||||
}
|
||||
};
|
||||
|
||||
static dl_handle * dl_load_library(const std::wstring & path) {
|
||||
// suppress error dialogs for missing DLLs
|
||||
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||
|
||||
HMODULE handle = LoadLibraryW(path.c_str());
|
||||
|
||||
SetErrorMode(old_mode);
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
||||
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||
|
||||
void * p = (void *) GetProcAddress(handle, name);
|
||||
|
||||
SetErrorMode(old_mode);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
using dl_handle = void;
|
||||
|
||||
struct dl_handle_deleter {
|
||||
void operator()(void * handle) {
|
||||
dlclose(handle);
|
||||
}
|
||||
};
|
||||
|
||||
static void * dl_load_library(const std::wstring & path) {
|
||||
dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
||||
return dlsym(handle, name);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
|
||||
|
||||
struct ggml_backend_reg_entry {
|
||||
ggml_backend_reg_t reg;
|
||||
dl_handle_ptr handle;
|
||||
};
|
||||
|
||||
struct ggml_backend_registry {
|
||||
std::vector<ggml_backend_reg_entry> backends;
|
||||
std::vector<ggml_backend_dev_t> devices;
|
||||
|
||||
ggml_backend_registry() {
|
||||
#ifdef GGML_USE_CUDA
|
||||
register_backend(ggml_backend_cuda_reg());
|
||||
#endif
|
||||
#ifdef GGML_USE_METAL
|
||||
register_backend(ggml_backend_metal_reg());
|
||||
#endif
|
||||
#ifdef GGML_USE_SYCL
|
||||
register_backend(ggml_backend_sycl_reg());
|
||||
#endif
|
||||
#ifdef GGML_USE_VULKAN
|
||||
register_backend(ggml_backend_vk_reg());
|
||||
#endif
|
||||
#ifdef GGML_USE_OPENCL
|
||||
register_backend(ggml_backend_opencl_reg());
|
||||
#endif
|
||||
#ifdef GGML_USE_CANN
|
||||
register_backend(ggml_backend_cann_reg());
|
||||
#endif
|
||||
#ifdef GGML_USE_BLAS
|
||||
register_backend(ggml_backend_blas_reg());
|
||||
#endif
|
||||
#ifdef GGML_USE_RPC
|
||||
register_backend(ggml_backend_rpc_reg());
|
||||
#endif
|
||||
#ifdef GGML_USE_KOMPUTE
|
||||
register_backend(ggml_backend_kompute_reg());
|
||||
#endif
|
||||
#ifdef GGML_USE_CPU
|
||||
register_backend(ggml_backend_cpu_reg());
|
||||
#endif
|
||||
}
|
||||
|
||||
~ggml_backend_registry() {
|
||||
// FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
|
||||
// since backend threads may still be running and accessing resources from the dynamic library
|
||||
for (auto & entry : backends) {
|
||||
if (entry.handle) {
|
||||
entry.handle.release(); // NOLINT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
|
||||
if (!reg) {
|
||||
return;
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
|
||||
__func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
|
||||
#endif
|
||||
backends.push_back({ reg, std::move(handle) });
|
||||
for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
|
||||
register_device(ggml_backend_reg_dev_get(reg, i));
|
||||
}
|
||||
}
|
||||
|
||||
void register_device(ggml_backend_dev_t device) {
|
||||
#ifndef NDEBUG
|
||||
GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
|
||||
#endif
|
||||
devices.push_back(device);
|
||||
}
|
||||
|
||||
ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
|
||||
dl_handle_ptr handle { dl_load_library(path) };
|
||||
if (!handle) {
|
||||
if (!silent) {
|
||||
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
||||
if (score_fn && score_fn() == 0) {
|
||||
if (!silent) {
|
||||
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
|
||||
if (!backend_init_fn) {
|
||||
if (!silent) {
|
||||
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ggml_backend_reg_t reg = backend_init_fn();
|
||||
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
|
||||
if (!silent) {
|
||||
if (!reg) {
|
||||
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
|
||||
} else {
|
||||
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
||||
__func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
|
||||
|
||||
register_backend(reg, std::move(handle));
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
void unload_backend(ggml_backend_reg_t reg, bool silent) {
|
||||
auto it = std::find_if(backends.begin(), backends.end(),
|
||||
[reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; });
|
||||
|
||||
if (it == backends.end()) {
|
||||
if (!silent) {
|
||||
GGML_LOG_ERROR("%s: backend not found\n", __func__);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (!silent) {
|
||||
GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
|
||||
}
|
||||
|
||||
// remove devices
|
||||
devices.erase(
|
||||
std::remove_if(devices.begin(), devices.end(),
|
||||
[reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
|
||||
devices.end());
|
||||
|
||||
// remove backend
|
||||
backends.erase(it);
|
||||
}
|
||||
};
|
||||
|
||||
static ggml_backend_registry & get_reg() {
|
||||
static ggml_backend_registry reg;
|
||||
return reg;
|
||||
}
|
||||
|
||||
// Internal API
|
||||
void ggml_backend_register(ggml_backend_reg_t reg) {
|
||||
get_reg().register_backend(reg);
|
||||
}
|
||||
|
||||
void ggml_backend_device_register(ggml_backend_dev_t device) {
|
||||
get_reg().register_device(device);
|
||||
}
|
||||
|
||||
// Backend (reg) enumeration
|
||||
static bool striequals(const char * a, const char * b) {
|
||||
for (; *a && *b; a++, b++) {
|
||||
if (std::tolower(*a) != std::tolower(*b)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return *a == *b;
|
||||
}
|
||||
|
||||
size_t ggml_backend_reg_count() {
|
||||
return get_reg().backends.size();
|
||||
}
|
||||
|
||||
ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
|
||||
GGML_ASSERT(index < ggml_backend_reg_count());
|
||||
return get_reg().backends[index].reg;
|
||||
}
|
||||
|
||||
ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
|
||||
for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
|
||||
ggml_backend_reg_t reg = ggml_backend_reg_get(i);
|
||||
if (striequals(ggml_backend_reg_name(reg), name)) {
|
||||
return reg;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Device enumeration
|
||||
size_t ggml_backend_dev_count() {
|
||||
return get_reg().devices.size();
|
||||
}
|
||||
|
||||
ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
|
||||
GGML_ASSERT(index < ggml_backend_dev_count());
|
||||
return get_reg().devices[index];
|
||||
}
|
||||
|
||||
ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
|
||||
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
||||
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
||||
if (striequals(ggml_backend_dev_name(dev), name)) {
|
||||
return dev;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
|
||||
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
||||
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
||||
if (ggml_backend_dev_type(dev) == type) {
|
||||
return dev;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Convenience functions
|
||||
ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
|
||||
ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
|
||||
if (!dev) {
|
||||
return nullptr;
|
||||
}
|
||||
return ggml_backend_dev_init(dev, params);
|
||||
}
|
||||
|
||||
ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
|
||||
ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
|
||||
if (!dev) {
|
||||
return nullptr;
|
||||
}
|
||||
return ggml_backend_dev_init(dev, params);
|
||||
}
|
||||
|
||||
ggml_backend_t ggml_backend_init_best(void) {
|
||||
ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
|
||||
if (!dev) {
|
||||
dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
||||
}
|
||||
if (!dev) {
|
||||
return nullptr;
|
||||
}
|
||||
return ggml_backend_dev_init(dev, nullptr);
|
||||
}
|
||||
|
||||
// Dynamic loading
|
||||
ggml_backend_reg_t ggml_backend_load(const char * path) {
|
||||
return get_reg().load_backend(utf8_to_utf16(path), false);
|
||||
}
|
||||
|
||||
void ggml_backend_unload(ggml_backend_reg_t reg) {
|
||||
get_reg().unload_backend(reg, true);
|
||||
}
|
||||
|
||||
static std::wstring get_executable_path() {
|
||||
#if defined(__APPLE__)
|
||||
// get executable path
|
||||
std::vector<char> path;
|
||||
uint32_t size;
|
||||
while (true) {
|
||||
size = path.size();
|
||||
if (_NSGetExecutablePath(path.data(), &size) == 0) {
|
||||
break;
|
||||
}
|
||||
path.resize(size);
|
||||
}
|
||||
std::string base_path(path.data(), size);
|
||||
// remove executable name
|
||||
auto last_slash = base_path.find_last_of('/');
|
||||
if (last_slash != std::string::npos) {
|
||||
base_path = base_path.substr(0, last_slash);
|
||||
}
|
||||
return utf8_to_utf16(base_path + "/");
|
||||
#elif defined(__linux__) || defined(__FreeBSD__)
|
||||
std::string base_path = ".";
|
||||
std::vector<char> path(1024);
|
||||
while (true) {
|
||||
// get executable path
|
||||
# if defined(__linux__)
|
||||
ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
|
||||
# elif defined(__FreeBSD__)
|
||||
ssize_t len = readlink("/proc/curproc/file", path.data(), path.size());
|
||||
# endif
|
||||
if (len == -1) {
|
||||
break;
|
||||
}
|
||||
if (len < (ssize_t) path.size()) {
|
||||
base_path = std::string(path.data(), len);
|
||||
// remove executable name
|
||||
auto last_slash = base_path.find_last_of('/');
|
||||
if (last_slash != std::string::npos) {
|
||||
base_path = base_path.substr(0, last_slash);
|
||||
}
|
||||
break;
|
||||
}
|
||||
path.resize(path.size() * 2);
|
||||
}
|
||||
|
||||
return utf8_to_utf16(base_path + "/");
|
||||
#elif defined(_WIN32)
|
||||
std::vector<wchar_t> path(MAX_PATH);
|
||||
DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
|
||||
if (len == 0) {
|
||||
return {};
|
||||
}
|
||||
std::wstring base_path(path.data(), len);
|
||||
// remove executable name
|
||||
auto last_slash = base_path.find_last_of('\\');
|
||||
if (last_slash != std::string::npos) {
|
||||
base_path = base_path.substr(0, last_slash);
|
||||
}
|
||||
return base_path + L"\\";
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
static std::wstring backend_filename_prefix() {
|
||||
#ifdef _WIN32
|
||||
return L"ggml-";
|
||||
#else
|
||||
return L"libggml-";
|
||||
#endif
|
||||
}
|
||||
|
||||
static std::wstring backend_filename_suffix() {
|
||||
#ifdef _WIN32
|
||||
return L".dll";
|
||||
#else
|
||||
return L".so";
|
||||
#endif
|
||||
}
|
||||
|
||||
static std::wstring path_separator() {
|
||||
#ifdef _WIN32
|
||||
return L"\\";
|
||||
#else
|
||||
return L"/";
|
||||
#endif
|
||||
}
|
||||
|
||||
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
|
||||
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
|
||||
// TODO: search system paths
|
||||
std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
|
||||
std::vector<std::wstring> search_paths;
|
||||
if (user_search_path == nullptr) {
|
||||
search_paths.push_back(L"." + path_separator());
|
||||
search_paths.push_back(get_executable_path());
|
||||
} else {
|
||||
search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
|
||||
}
|
||||
|
||||
int best_score = 0;
|
||||
std::wstring best_path;
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
for (const auto & search_path : search_paths) {
|
||||
if (!fs::exists(search_path)) {
|
||||
continue;
|
||||
}
|
||||
fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
|
||||
for (const auto & entry : dir_it) {
|
||||
if (entry.is_regular_file()) {
|
||||
std::wstring filename = entry.path().filename().wstring();
|
||||
std::wstring ext = entry.path().extension().wstring();
|
||||
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
|
||||
dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
|
||||
if (!handle && !silent) {
|
||||
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
|
||||
}
|
||||
if (handle) {
|
||||
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
||||
if (score_fn) {
|
||||
int s = score_fn();
|
||||
#ifndef NDEBUG
|
||||
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
|
||||
#endif
|
||||
if (s > best_score) {
|
||||
best_score = s;
|
||||
best_path = entry.path().wstring();
|
||||
}
|
||||
} else {
|
||||
if (!silent) {
|
||||
GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (best_score == 0) {
|
||||
// try to load the base backend
|
||||
for (const auto & search_path : search_paths) {
|
||||
std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
|
||||
if (fs::exists(path)) {
|
||||
return get_reg().load_backend(path, silent);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return get_reg().load_backend(best_path, silent);
|
||||
}
|
||||
|
||||
void ggml_backend_load_all() {
|
||||
ggml_backend_load_all_from_path(nullptr);
|
||||
}
|
||||
|
||||
void ggml_backend_load_all_from_path(const char * dir_path) {
|
||||
#ifdef NDEBUG
|
||||
bool silent = true;
|
||||
#else
|
||||
bool silent = false;
|
||||
#endif
|
||||
|
||||
ggml_backend_load_best("blas", silent, dir_path);
|
||||
ggml_backend_load_best("cann", silent, dir_path);
|
||||
ggml_backend_load_best("cuda", silent, dir_path);
|
||||
ggml_backend_load_best("hip", silent, dir_path);
|
||||
ggml_backend_load_best("kompute", silent, dir_path);
|
||||
ggml_backend_load_best("metal", silent, dir_path);
|
||||
ggml_backend_load_best("rpc", silent, dir_path);
|
||||
ggml_backend_load_best("sycl", silent, dir_path);
|
||||
ggml_backend_load_best("vulkan", silent, dir_path);
|
||||
ggml_backend_load_best("opencl", silent, dir_path);
|
||||
ggml_backend_load_best("musa", silent, dir_path);
|
||||
ggml_backend_load_best("cpu", silent, dir_path);
|
||||
// check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend
|
||||
const char * backend_path = std::getenv("GGML_BACKEND_PATH");
|
||||
if (backend_path) {
|
||||
ggml_backend_load(backend_path);
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
87
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-blas/CMakeLists.txt
vendored
Normal file
87
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-blas/CMakeLists.txt
vendored
Normal file
|
@ -0,0 +1,87 @@
|
|||
if (GGML_STATIC)
|
||||
set(BLA_STATIC ON)
|
||||
endif()
|
||||
#if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
|
||||
# set(BLA_SIZEOF_INTEGER 8)
|
||||
#endif()
|
||||
|
||||
set(BLA_VENDOR ${GGML_BLAS_VENDOR})
|
||||
find_package(BLAS)
|
||||
|
||||
if (BLAS_FOUND)
|
||||
message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
|
||||
|
||||
ggml_add_backend_library(ggml-blas
|
||||
ggml-blas.cpp
|
||||
)
|
||||
|
||||
if (${GGML_BLAS_VENDOR} MATCHES "Apple")
|
||||
add_compile_definitions(ACCELERATE_NEW_LAPACK)
|
||||
add_compile_definitions(ACCELERATE_LAPACK_ILP64)
|
||||
add_compile_definitions(GGML_BLAS_USE_ACCELERATE)
|
||||
elseif ("${BLAS_INCLUDE_DIRS}" STREQUAL "")
|
||||
# BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
|
||||
# see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
|
||||
find_package(PkgConfig REQUIRED)
|
||||
if (${GGML_BLAS_VENDOR} MATCHES "Generic")
|
||||
pkg_check_modules(DepBLAS blas)
|
||||
elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS")
|
||||
# As of openblas v0.3.22, the 64-bit is named openblas64.pc
|
||||
pkg_check_modules(DepBLAS openblas64)
|
||||
if (NOT DepBLAS_FOUND)
|
||||
pkg_check_modules(DepBLAS openblas)
|
||||
endif()
|
||||
elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME")
|
||||
add_compile_definitions(GGML_BLAS_USE_BLIS)
|
||||
pkg_check_modules(DepBLAS blis)
|
||||
elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS")
|
||||
pkg_check_modules(DepBLAS blas-atlas)
|
||||
elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS")
|
||||
pkg_check_modules(DepBLAS flexiblas_api)
|
||||
elseif (${GGML_BLAS_VENDOR} MATCHES "Intel")
|
||||
add_compile_definitions(GGML_BLAS_USE_MKL)
|
||||
# all Intel* libraries share the same include path
|
||||
pkg_check_modules(DepBLAS mkl-sdl)
|
||||
elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC")
|
||||
# this doesn't provide pkg-config
|
||||
# suggest to assign BLAS_INCLUDE_DIRS on your own
|
||||
if ("${NVHPC_VERSION}" STREQUAL "")
|
||||
message(WARNING "Better to set NVHPC_VERSION")
|
||||
else()
|
||||
set(DepBLAS_FOUND ON)
|
||||
set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include")
|
||||
endif()
|
||||
endif()
|
||||
if (DepBLAS_FOUND)
|
||||
set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS})
|
||||
else()
|
||||
message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically"
|
||||
" detected by pkgconfig, trying to find cblas.h from possible paths...")
|
||||
find_path(BLAS_INCLUDE_DIRS
|
||||
NAMES cblas.h
|
||||
HINTS
|
||||
/usr/include
|
||||
/usr/local/include
|
||||
/usr/include/openblas
|
||||
/opt/homebrew/opt/openblas/include
|
||||
/usr/local/opt/openblas/include
|
||||
/usr/include/x86_64-linux-gnu/openblas/include
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
|
||||
|
||||
target_compile_options(ggml-blas PRIVATE ${BLAS_LINKER_FLAGS})
|
||||
|
||||
if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel"))
|
||||
add_compile_definitions(GGML_BLAS_USE_MKL)
|
||||
endif()
|
||||
|
||||
target_link_libraries (ggml-blas PRIVATE ${BLAS_LIBRARIES})
|
||||
target_include_directories(ggml-blas PRIVATE ${BLAS_INCLUDE_DIRS})
|
||||
else()
|
||||
message(ERROR "BLAS not found, please refer to "
|
||||
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
|
||||
" to set correct GGML_BLAS_VENDOR")
|
||||
endif()
|
517
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-blas/ggml-blas.cpp
vendored
Normal file
517
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-blas/ggml-blas.cpp
vendored
Normal file
|
@ -0,0 +1,517 @@
|
|||
#include "ggml-impl.h"
|
||||
#include "ggml-blas.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
|
||||
#include <future>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
|
||||
#if defined(GGML_BLAS_USE_ACCELERATE)
|
||||
# include <Accelerate/Accelerate.h>
|
||||
#elif defined(GGML_BLAS_USE_MKL)
|
||||
# include <mkl.h>
|
||||
#elif defined(GGML_BLAS_USE_BLIS)
|
||||
# include <blis.h>
|
||||
#elif defined(GGML_BLAS_USE_NVPL)
|
||||
# include <nvpl_blas.h>
|
||||
#else
|
||||
# include <cblas.h>
|
||||
#endif
|
||||
|
||||
struct ggml_backend_blas_context {
|
||||
int n_threads = GGML_DEFAULT_N_THREADS;
|
||||
std::unique_ptr<char[]> work_data;
|
||||
size_t work_size = 0;
|
||||
#ifndef GGML_USE_OPENMP
|
||||
std::vector<std::future<void>> tasks;
|
||||
#endif
|
||||
};
|
||||
|
||||
static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) {
|
||||
const struct ggml_tensor * src0 = dst->src[0];
|
||||
const struct ggml_tensor * src1 = dst->src[1];
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
const enum ggml_type type = src0->type;
|
||||
|
||||
GGML_ASSERT(ne0 == ne01);
|
||||
GGML_ASSERT(ne1 == ne11);
|
||||
GGML_ASSERT(ne2 == ne12);
|
||||
GGML_ASSERT(ne3 == ne13);
|
||||
|
||||
// we don't support permuted src0 or src1
|
||||
GGML_ASSERT(nb00 == ggml_type_size(type));
|
||||
GGML_ASSERT(nb10 == ggml_type_size(src1->type));
|
||||
|
||||
// dst cannot be transposed or permuted
|
||||
GGML_ASSERT(nb0 == sizeof(float));
|
||||
GGML_ASSERT(nb0 <= nb1);
|
||||
GGML_ASSERT(nb1 <= nb2);
|
||||
GGML_ASSERT(nb2 <= nb3);
|
||||
|
||||
// broadcast factors
|
||||
const int64_t r2 = ne12/ne02;
|
||||
const int64_t r3 = ne13/ne03;
|
||||
|
||||
const int64_t ne_plane = ne01*ne00;
|
||||
const size_t desired_wsize = type == GGML_TYPE_F32 ? 0 : ne03*ne02*ne_plane*sizeof(float);
|
||||
|
||||
if (ctx->work_size < desired_wsize) {
|
||||
ctx->work_data.reset(new char[desired_wsize]);
|
||||
ctx->work_size = desired_wsize;
|
||||
}
|
||||
void * wdata = ctx->work_data.get();
|
||||
|
||||
// convert src0 to float
|
||||
if (type != GGML_TYPE_F32) {
|
||||
const auto * type_traits = ggml_get_type_traits(type);
|
||||
ggml_to_float_t const to_float = type_traits->to_float;
|
||||
|
||||
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
||||
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
||||
const void * x = (char *) src0->data + i02*nb02 + i03*nb03;
|
||||
float * const wplane = (float *) wdata + i02*ne_plane + i03*ne02*ne_plane;
|
||||
|
||||
const int min_cols_per_thread = 4096;
|
||||
const int min_rows_per_thread = std::max((int)(min_cols_per_thread/ne00), 1);
|
||||
const int n_threads = std::max(std::min(ctx->n_threads, (int)(ne01/min_rows_per_thread)), 1);
|
||||
|
||||
#ifdef GGML_USE_OPENMP
|
||||
#pragma omp parallel for num_threads(n_threads)
|
||||
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
||||
to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00);
|
||||
}
|
||||
#else
|
||||
for (int i = 1; i < n_threads; i++) {
|
||||
const int64_t start = i*ne01/n_threads;
|
||||
const int64_t end = (i + 1)*ne01/n_threads;
|
||||
if (start < end) {
|
||||
ctx->tasks.push_back(std::async(std::launch::async, [=]() {
|
||||
for (int64_t i01 = start; i01 < end; i01++) {
|
||||
to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00);
|
||||
}
|
||||
}));
|
||||
}
|
||||
}
|
||||
{
|
||||
// reuse the current thread for the first task
|
||||
const int64_t start = 0;
|
||||
const int64_t end = ne01/n_threads;
|
||||
for (int64_t i01 = start; i01 < end; i01++) {
|
||||
to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef GGML_USE_OPENMP
|
||||
// wait for all tasks to finish
|
||||
for (auto & task : ctx->tasks) {
|
||||
task.get();
|
||||
}
|
||||
ctx->tasks.clear();
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(OPENBLAS_VERSION)
|
||||
openblas_set_num_threads(ctx->n_threads);
|
||||
#endif
|
||||
|
||||
#if defined(GGML_BLAS_USE_BLIS)
|
||||
bli_thread_set_num_threads(ctx->n_threads);
|
||||
#endif
|
||||
|
||||
#if defined(GGML_BLAS_USE_NVPL)
|
||||
nvpl_blas_set_num_threads(ctx->n_threads);
|
||||
#endif
|
||||
|
||||
for (int64_t i13 = 0; i13 < ne13; i13++) {
|
||||
for (int64_t i12 = 0; i12 < ne12; i12++) {
|
||||
const int64_t i03 = i13/r3;
|
||||
const int64_t i02 = i12/r2;
|
||||
|
||||
const float * x = (float *) ((char *) src0->data + i02*nb02 + i03*nb03);
|
||||
const float * y = (float *) ((char *) src1->data + i12*nb12 + i13*nb13);
|
||||
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
|
||||
|
||||
if (type != GGML_TYPE_F32) {
|
||||
x = (float *) wdata + i02*ne_plane + i03*ne02*ne_plane;
|
||||
}
|
||||
|
||||
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
|
||||
ne1, ne01, ne10,
|
||||
1.0f, y, ne10,
|
||||
x, ne00,
|
||||
0.0f, d, ne01);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ggml_backend_blas_out_prod(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) {
|
||||
const struct ggml_tensor * src0 = dst->src[0];
|
||||
const struct ggml_tensor * src1 = dst->src[1];
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
GGML_ASSERT(ne0 == ne00);
|
||||
GGML_ASSERT(ne1 == ne10);
|
||||
GGML_ASSERT(ne2 == ne02);
|
||||
GGML_ASSERT(ne02 == ne12);
|
||||
GGML_ASSERT(ne3 == ne13);
|
||||
GGML_ASSERT(ne03 == ne13);
|
||||
|
||||
// we don't support permuted src0 or src1
|
||||
GGML_ASSERT(nb00 == sizeof(float));
|
||||
|
||||
// dst cannot be transposed or permuted
|
||||
GGML_ASSERT(nb0 == sizeof(float));
|
||||
// GGML_ASSERT(nb0 <= nb1);
|
||||
// GGML_ASSERT(nb1 <= nb2);
|
||||
// GGML_ASSERT(nb2 <= nb3);
|
||||
|
||||
// Arguments to ggml_compute_forward_out_prod (expressed as major,minor)
|
||||
// src0: (k,n)
|
||||
// src1: (k,m)
|
||||
// dst: (m,n)
|
||||
//
|
||||
// Arguments to sgemm (see https://github.com/Reference-LAPACK/lapack/blob/master/BLAS/SRC/sgemm.f)
|
||||
// Also expressed as (major,minor)
|
||||
// a: (m,k): so src1 transposed
|
||||
// b: (k,n): so src0
|
||||
// c: (m,n)
|
||||
//
|
||||
// However, if ggml_is_transposed(src1) is true, then
|
||||
// src1->data already contains a transposed version, so sgemm mustn't
|
||||
// transpose it further.
|
||||
|
||||
int n = src0->ne[0];
|
||||
int k = src0->ne[1];
|
||||
int m = src1->ne[0];
|
||||
|
||||
CBLAS_TRANSPOSE transposeA;
|
||||
int lda;
|
||||
|
||||
if (!ggml_is_transposed(src1)) {
|
||||
transposeA = CblasTrans;
|
||||
lda = m;
|
||||
} else {
|
||||
transposeA = CblasNoTrans;
|
||||
lda = k;
|
||||
}
|
||||
|
||||
float * a = (float *) ((char *) src1->data);
|
||||
float * b = (float *) ((char *) src0->data);
|
||||
float * c = (float *) ((char *) dst->data);
|
||||
|
||||
cblas_sgemm(CblasRowMajor, transposeA, CblasNoTrans, m, n, k, 1.0, a, lda, b, n, 0.0, c, n);
|
||||
|
||||
GGML_UNUSED(ctx);
|
||||
}
|
||||
|
||||
// backend interface
|
||||
|
||||
static const char * ggml_backend_blas_get_name(ggml_backend_t backend) {
|
||||
return "BLAS";
|
||||
|
||||
GGML_UNUSED(backend);
|
||||
}
|
||||
|
||||
static void ggml_backend_blas_free(ggml_backend_t backend) {
|
||||
ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend->context;
|
||||
delete ctx;
|
||||
delete backend;
|
||||
}
|
||||
|
||||
static enum ggml_status ggml_backend_blas_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend->context;
|
||||
|
||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
struct ggml_tensor * node = cgraph->nodes[i];
|
||||
|
||||
switch (node->op) {
|
||||
case GGML_OP_MUL_MAT:
|
||||
ggml_backend_blas_mul_mat(ctx, node);
|
||||
break;
|
||||
|
||||
case GGML_OP_OUT_PROD:
|
||||
ggml_backend_blas_out_prod(ctx, node);
|
||||
break;
|
||||
|
||||
case GGML_OP_NONE:
|
||||
case GGML_OP_RESHAPE:
|
||||
case GGML_OP_VIEW:
|
||||
case GGML_OP_PERMUTE:
|
||||
case GGML_OP_TRANSPOSE:
|
||||
break;
|
||||
|
||||
default:
|
||||
GGML_ABORT("%s: unsupported op %s\n", __func__, ggml_op_desc(node));
|
||||
}
|
||||
}
|
||||
|
||||
return GGML_STATUS_SUCCESS;
|
||||
|
||||
GGML_UNUSED(backend);
|
||||
}
|
||||
|
||||
static struct ggml_backend_i blas_backend_i = {
|
||||
/* .get_name = */ ggml_backend_blas_get_name,
|
||||
/* .free = */ ggml_backend_blas_free,
|
||||
/* .set_tensor_async = */ NULL,
|
||||
/* .get_tensor_async = */ NULL,
|
||||
/* .cpy_tensor_async = */ NULL,
|
||||
/* .synchronize = */ NULL,
|
||||
/* .graph_plan_create = */ NULL,
|
||||
/* .graph_plan_free = */ NULL,
|
||||
/* .graph_plan_update = */ NULL,
|
||||
/* .graph_plan_compute = */ NULL,
|
||||
/* .graph_compute = */ ggml_backend_blas_graph_compute,
|
||||
/* .event_record = */ NULL,
|
||||
/* .event_wait = */ NULL,
|
||||
};
|
||||
|
||||
static ggml_guid_t ggml_backend_blas_guid(void) {
|
||||
static ggml_guid guid = { 0x12, 0xa8, 0xae, 0xf4, 0xc0, 0x1e, 0x61, 0x97, 0x8f, 0xeb, 0x33, 0x04, 0xa1, 0x33, 0x51, 0x2d };
|
||||
return &guid;
|
||||
}
|
||||
|
||||
ggml_backend_t ggml_backend_blas_init(void) {
|
||||
ggml_backend_blas_context * ctx = new ggml_backend_blas_context;
|
||||
|
||||
ggml_backend_t backend = new ggml_backend {
|
||||
/* .guid = */ ggml_backend_blas_guid(),
|
||||
/* .interface = */ blas_backend_i,
|
||||
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_blas_reg(), 0),
|
||||
/* .context = */ ctx,
|
||||
};
|
||||
|
||||
#if defined(OPENBLAS_VERSION) && defined(GGML_USE_OPENMP)
|
||||
if (openblas_get_parallel() != OPENBLAS_OPENMP) {
|
||||
GGML_LOG_DEBUG("%s: warning: ggml is using OpenMP, but OpenBLAS was compiled without OpenMP support\n", __func__);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(BLIS_ENABLE_CBLAS) && defined(GGML_USE_OPENMP) && !defined(BLIS_ENABLE_OPENMP)
|
||||
GGML_LOG_DEBUG("%s: warning: ggml is using OpenMP, but BLIS was compiled without OpenMP support\n", __func__);
|
||||
#endif
|
||||
|
||||
return backend;
|
||||
}
|
||||
|
||||
bool ggml_backend_is_blas(ggml_backend_t backend) {
|
||||
return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_blas_guid());
|
||||
}
|
||||
|
||||
void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads) {
|
||||
GGML_ASSERT(ggml_backend_is_blas(backend_blas));
|
||||
|
||||
ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend_blas->context;
|
||||
ctx->n_threads = n_threads;
|
||||
}
|
||||
|
||||
// device interface
|
||||
|
||||
static const char * ggml_backend_blas_device_get_name(ggml_backend_dev_t dev) {
|
||||
return "BLAS";
|
||||
|
||||
GGML_UNUSED(dev);
|
||||
}
|
||||
|
||||
static const char * ggml_backend_blas_device_get_description(ggml_backend_dev_t dev) {
|
||||
#if defined(GGML_BLAS_USE_ACCELERATE)
|
||||
return "Accelerate";
|
||||
#elif defined(GGML_BLAS_USE_MKL)
|
||||
return "MKL";
|
||||
#elif defined(GGML_BLAS_USE_BLIS)
|
||||
return "BLIS";
|
||||
#elif defined(GGML_BLAS_USE_NVPL)
|
||||
return "NVPL";
|
||||
#elif defined(OPENBLAS_VERSION)
|
||||
return "OpenBLAS";
|
||||
#else
|
||||
return "BLAS";
|
||||
#endif
|
||||
|
||||
GGML_UNUSED(dev);
|
||||
}
|
||||
|
||||
static void ggml_backend_blas_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
||||
// TODO
|
||||
*free = 0;
|
||||
*total = 0;
|
||||
|
||||
GGML_UNUSED(dev);
|
||||
}
|
||||
|
||||
static enum ggml_backend_dev_type ggml_backend_blas_device_get_type(ggml_backend_dev_t dev) {
|
||||
return GGML_BACKEND_DEVICE_TYPE_ACCEL;
|
||||
|
||||
GGML_UNUSED(dev);
|
||||
}
|
||||
|
||||
static void ggml_backend_blas_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
|
||||
props->name = ggml_backend_blas_device_get_name(dev);
|
||||
props->description = ggml_backend_blas_device_get_description(dev);
|
||||
props->type = ggml_backend_blas_device_get_type(dev);
|
||||
ggml_backend_blas_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
||||
props->caps = {
|
||||
/* .async = */ false,
|
||||
/* .host_buffer = */ false,
|
||||
/* .buffer_from_host_ptr = */ true,
|
||||
/* .events = */ false,
|
||||
};
|
||||
}
|
||||
|
||||
static ggml_backend_t ggml_backend_blas_device_init_backend(ggml_backend_dev_t dev, const char * params) {
|
||||
return ggml_backend_blas_init();
|
||||
|
||||
GGML_UNUSED(dev);
|
||||
GGML_UNUSED(params);
|
||||
}
|
||||
|
||||
static ggml_backend_buffer_type_t ggml_backend_blas_device_get_buffer_type(ggml_backend_dev_t dev) {
|
||||
return ggml_backend_cpu_buffer_type();
|
||||
|
||||
GGML_UNUSED(dev);
|
||||
}
|
||||
|
||||
static ggml_backend_buffer_t ggml_backend_blas_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
|
||||
return ggml_backend_cpu_buffer_from_ptr(ptr, size);
|
||||
|
||||
GGML_UNUSED(dev);
|
||||
GGML_UNUSED(max_tensor_size);
|
||||
}
|
||||
|
||||
static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
|
||||
const struct ggml_tensor * src0 = op->src[0];
|
||||
const struct ggml_tensor * src1 = op->src[1];
|
||||
|
||||
switch (op->op) {
|
||||
case GGML_OP_NONE:
|
||||
case GGML_OP_RESHAPE:
|
||||
case GGML_OP_VIEW:
|
||||
case GGML_OP_PERMUTE:
|
||||
case GGML_OP_TRANSPOSE:
|
||||
return true;
|
||||
|
||||
case GGML_OP_MUL_MAT:
|
||||
{
|
||||
// BLAS usually is only faster for large matrices
|
||||
const struct ggml_tensor * src0 = op->src[0];
|
||||
const struct ggml_tensor * src1 = op->src[1];
|
||||
|
||||
const int64_t ne10 = src1->ne[0];
|
||||
|
||||
const int64_t ne0 = op->ne[0];
|
||||
const int64_t ne1 = op->ne[1];
|
||||
|
||||
// TODO: find the optimal value
|
||||
const int64_t min_batch = 32;
|
||||
|
||||
return ggml_is_contiguous(src0) &&
|
||||
ggml_is_contiguous(src1) &&
|
||||
src1->type == GGML_TYPE_F32 &&
|
||||
(ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) &&
|
||||
(src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
|
||||
}
|
||||
|
||||
case GGML_OP_OUT_PROD:
|
||||
return op->src[0]->type == GGML_TYPE_F32 &&
|
||||
op->src[1]->type == GGML_TYPE_F32 &&
|
||||
ggml_is_matrix(src0) &&
|
||||
ggml_is_matrix(src1) &&
|
||||
ggml_is_contiguous(src0) &&
|
||||
(ggml_is_contiguous(src1) || ggml_is_transposed(src1)) &&
|
||||
(src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
|
||||
|
||||
default:
|
||||
return false;
|
||||
|
||||
}
|
||||
|
||||
GGML_UNUSED(dev);
|
||||
}
|
||||
|
||||
static bool ggml_backend_blas_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
|
||||
return ggml_backend_buft_is_host(buft);
|
||||
|
||||
GGML_UNUSED(dev);
|
||||
}
|
||||
|
||||
static const struct ggml_backend_device_i ggml_backend_blas_device_i = {
|
||||
/* .get_name = */ ggml_backend_blas_device_get_name,
|
||||
/* .get_description = */ ggml_backend_blas_device_get_description,
|
||||
/* .get_memory = */ ggml_backend_blas_device_get_memory,
|
||||
/* .get_type = */ ggml_backend_blas_device_get_type,
|
||||
/* .get_props = */ ggml_backend_blas_device_get_props,
|
||||
/* .init_backend = */ ggml_backend_blas_device_init_backend,
|
||||
/* .get_buffer_type = */ ggml_backend_blas_device_get_buffer_type,
|
||||
/* .get_host_buffer_type = */ NULL,
|
||||
/* .buffer_from_host_ptr = */ ggml_backend_blas_device_buffer_from_host_ptr,
|
||||
/* .supports_op = */ ggml_backend_blas_device_supports_op,
|
||||
/* .supports_buft = */ ggml_backend_blas_device_supports_buft,
|
||||
/* .offload_op = */ NULL,
|
||||
/* .event_new = */ NULL,
|
||||
/* .event_free = */ NULL,
|
||||
/* .event_synchronize = */ NULL,
|
||||
};
|
||||
|
||||
// backend reg interface
|
||||
|
||||
static const char * ggml_backend_blas_reg_get_name(ggml_backend_reg_t reg) {
|
||||
return "BLAS";
|
||||
|
||||
GGML_UNUSED(reg);
|
||||
}
|
||||
|
||||
static size_t ggml_backend_blas_reg_get_device_count(ggml_backend_reg_t reg) {
|
||||
return 1;
|
||||
|
||||
GGML_UNUSED(reg);
|
||||
}
|
||||
|
||||
static ggml_backend_dev_t ggml_backend_blas_reg_get_device(ggml_backend_reg_t reg, size_t index) {
|
||||
GGML_ASSERT(index == 0);
|
||||
|
||||
static ggml_backend_device ggml_backend_blas_device = {
|
||||
/* .iface = */ ggml_backend_blas_device_i,
|
||||
/* .reg = */ reg,
|
||||
/* .context = */ nullptr,
|
||||
};
|
||||
|
||||
return &ggml_backend_blas_device;
|
||||
|
||||
GGML_UNUSED(reg);
|
||||
GGML_UNUSED(index);
|
||||
}
|
||||
|
||||
static void * ggml_backend_blas_get_proc_address(ggml_backend_reg_t reg, const char * name) {
|
||||
if (std::strcmp(name, "ggml_backend_set_n_threads") == 0) {
|
||||
return (void *)ggml_backend_blas_set_n_threads;
|
||||
}
|
||||
return NULL;
|
||||
|
||||
GGML_UNUSED(reg);
|
||||
GGML_UNUSED(name);
|
||||
}
|
||||
|
||||
static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = {
|
||||
/* .get_name = */ ggml_backend_blas_reg_get_name,
|
||||
/* .get_device_count = */ ggml_backend_blas_reg_get_device_count,
|
||||
/* .get_device = */ ggml_backend_blas_reg_get_device,
|
||||
/* .get_proc_address = */ ggml_backend_blas_get_proc_address,
|
||||
};
|
||||
|
||||
ggml_backend_reg_t ggml_backend_blas_reg(void) {
|
||||
static struct ggml_backend_reg ggml_backend_blas_reg = {
|
||||
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
||||
/* .iface = */ ggml_backend_blas_reg_i,
|
||||
/* .context = */ NULL,
|
||||
};
|
||||
|
||||
return &ggml_backend_blas_reg;
|
||||
}
|
||||
|
||||
GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg)
|
76
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/CMakeLists.txt
vendored
Normal file
76
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/CMakeLists.txt
vendored
Normal file
|
@ -0,0 +1,76 @@
|
|||
if ("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOME})
|
||||
set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME})
|
||||
message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}")
|
||||
endif()
|
||||
|
||||
# Auto-detech Soc type and Soc version, if detect failed, will abort build
|
||||
set(SOC_VERSION "")
|
||||
function(detect_ascend_soc_type SOC_VERSION)
|
||||
execute_process(
|
||||
COMMAND bash -c "npu-smi info|awk -F' ' 'NF > 0 && NR==7 {print $3}'"
|
||||
OUTPUT_VARIABLE npu_info
|
||||
RESULT_VARIABLE npu_result
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
if("${npu_info}" STREQUAL "" OR ${npu_result})
|
||||
message(FATAL_ERROR "Auto-detech ascend soc type failed, please specify manually or check ascend device working normally.")
|
||||
endif()
|
||||
set(${SOC_VERSION} "Ascend${npu_info}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
if(NOT SOC_TYPE)
|
||||
detect_ascend_soc_type(SOC_VERSION)
|
||||
set(SOC_TYPE "${SOC_VERSION}")
|
||||
message(STATUS "CANN: SOC_VERSION auto-detected is:${SOC_VERSION}")
|
||||
endif()
|
||||
|
||||
string(TOLOWER ${SOC_TYPE} SOC_VERSION) # SOC_VERSION need lower
|
||||
|
||||
# Construct Soc specify compile option: ASCEND_#Soc_Major_SN. Such as ASCEND_910B, ASCEND_310P.
|
||||
string(REGEX MATCH "[0-9]+[a-zA-Z]" SOC_TYPE_MAJOR_SN "${SOC_VERSION}")
|
||||
set(SOC_TYPE_COMPILE_OPTION "ASCEND_${SOC_TYPE_MAJOR_SN}")
|
||||
string(TOUPPER ${SOC_TYPE_COMPILE_OPTION} SOC_TYPE_COMPILE_OPTION)
|
||||
|
||||
if (CANN_INSTALL_DIR)
|
||||
# Only Support Linux.
|
||||
if (NOT UNIX)
|
||||
message(FATAL_ERROR "CANN: CANN toolkit supports unix but not ${CMAKE_SYSTEM_NAME}")
|
||||
endif()
|
||||
|
||||
# Supported platforms: x86-64, arm64
|
||||
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
|
||||
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
|
||||
else()
|
||||
message(FATAL_ERROR "CANN: CANN toolkit supports x86-64 and arm64 but not ${CMAKE_SYSTEM_PROCESSOR}")
|
||||
endif()
|
||||
|
||||
# Set header and libs
|
||||
set(CANN_INCLUDE_DIRS
|
||||
${CANN_INSTALL_DIR}/include
|
||||
${CANN_INSTALL_DIR}/include/aclnn
|
||||
${CANN_INSTALL_DIR}/acllib/include
|
||||
)
|
||||
|
||||
add_subdirectory(kernels)
|
||||
list(APPEND CANN_LIBRARIES
|
||||
ascendcl
|
||||
nnopbase
|
||||
opapi
|
||||
acl_op_compiler
|
||||
ascendc_kernels
|
||||
)
|
||||
|
||||
file(GLOB GGML_SOURCES_CANN "*.cpp")
|
||||
|
||||
ggml_add_backend_library(ggml-cann ${GGML_SOURCES_CANN})
|
||||
target_link_libraries(ggml-cann PRIVATE ${CANN_LIBRARIES})
|
||||
target_include_directories(ggml-cann PRIVATE ${CANN_INCLUDE_DIRS})
|
||||
target_link_directories(ggml-cann PRIVATE ${CANN_INSTALL_DIR}/lib64)
|
||||
|
||||
target_compile_definitions(ggml-cann PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")
|
||||
|
||||
message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}")
|
||||
message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}")
|
||||
else()
|
||||
message(FATAL_ERROR "CANN: Can't find CANN_INSTALL_DIR, did you forget to source set_var.sh?")
|
||||
endif()
|
2579
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/Doxyfile
vendored
Normal file
2579
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/Doxyfile
vendored
Normal file
File diff suppressed because it is too large
Load Diff
175
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/acl_tensor.cpp
vendored
Normal file
175
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/acl_tensor.cpp
vendored
Normal file
|
@ -0,0 +1,175 @@
|
|||
/*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "acl_tensor.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
aclDataType ggml_cann_type_mapping(ggml_type type) {
|
||||
switch (type) {
|
||||
case GGML_TYPE_F32:
|
||||
return ACL_FLOAT;
|
||||
case GGML_TYPE_F16:
|
||||
return ACL_FLOAT16;
|
||||
case GGML_TYPE_I8:
|
||||
return ACL_INT8;
|
||||
case GGML_TYPE_I16:
|
||||
return ACL_INT16;
|
||||
case GGML_TYPE_I32:
|
||||
return ACL_INT32;
|
||||
case GGML_TYPE_Q4_0:
|
||||
return ACL_INT4;
|
||||
case GGML_TYPE_Q8_0:
|
||||
return ACL_INT8;
|
||||
default:
|
||||
return ACL_DT_UNDEFINED;
|
||||
}
|
||||
return ACL_DT_UNDEFINED;
|
||||
}
|
||||
|
||||
aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
|
||||
size_t* nb, int64_t dims, aclFormat format,
|
||||
size_t offset) {
|
||||
// If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be
|
||||
// added.
|
||||
int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
|
||||
|
||||
int64_t acl_storage_len = 0;
|
||||
if (ne == nullptr) {
|
||||
acl_storage_len = ggml_nbytes(tensor);
|
||||
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
||||
acl_ne[i] = tensor->ne[i];
|
||||
// The step size of acl is in elements.
|
||||
acl_stride[i] = tensor->nb[i] / ggml_element_size(tensor);
|
||||
}
|
||||
} else {
|
||||
// With bcast
|
||||
for (int i = 0; i < dims; i++) {
|
||||
acl_storage_len += (ne[i] - 1) * nb[i];
|
||||
acl_ne[i] = ne[i];
|
||||
acl_stride[i] = nb[i] / ggml_element_size(tensor);
|
||||
}
|
||||
}
|
||||
|
||||
// Reverse ne and stride.
|
||||
int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims);
|
||||
std::reverse(acl_ne, acl_ne + final_dims);
|
||||
std::reverse(acl_stride, acl_stride + final_dims);
|
||||
|
||||
aclTensor* acl_tensor = aclCreateTensor(
|
||||
acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride,
|
||||
offset / ggml_element_size(tensor), format, &acl_storage_len, 1,
|
||||
tensor->data);
|
||||
|
||||
return acl_tensor;
|
||||
}
|
||||
|
||||
bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) {
|
||||
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
||||
if (t1->ne[i] != t0->ne[i] && t1->ne[i] != 1) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0,
|
||||
const ggml_tensor* src1,
|
||||
int64_t* bcast_src0_ne,
|
||||
int64_t* bcast_src1_ne, size_t* bcast_src0_nb,
|
||||
size_t* bcast_src1_nb) {
|
||||
GGML_ASSERT(ggml_can_repeat(src1, src0));
|
||||
int bcast_dim_cnt = 0;
|
||||
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
||||
int64_t nr = src0->ne[i] / src1->ne[i];
|
||||
bcast_src0_ne[bcast_dim_cnt] = src0->ne[i] / nr;
|
||||
bcast_src1_ne[bcast_dim_cnt] = src1->ne[i];
|
||||
bcast_src0_nb[bcast_dim_cnt] = src0->nb[i];
|
||||
bcast_src1_nb[bcast_dim_cnt] = src1->nb[i];
|
||||
bcast_dim_cnt++;
|
||||
if (nr != 1) {
|
||||
// Need to add an extra dim.
|
||||
bcast_src0_ne[bcast_dim_cnt] = nr;
|
||||
bcast_src1_ne[bcast_dim_cnt] = 1;
|
||||
bcast_src0_nb[bcast_dim_cnt] = bcast_src0_nb[bcast_dim_cnt - 1] *
|
||||
bcast_src0_ne[bcast_dim_cnt - 1];
|
||||
bcast_src1_nb[bcast_dim_cnt] = bcast_src1_nb[bcast_dim_cnt - 1] *
|
||||
bcast_src1_ne[bcast_dim_cnt - 1];
|
||||
bcast_dim_cnt++;
|
||||
}
|
||||
}
|
||||
return bcast_dim_cnt;
|
||||
}
|
||||
|
||||
int64_t ggml_cann_get_mulmat_bcast_shape(
|
||||
const int64_t* input_ne, const int64_t* weight_ne, const int64_t* dst_ne,
|
||||
const size_t* input_nb, const size_t* weight_nb, const size_t* dst_nb,
|
||||
int64_t* bcast_input_ne, int64_t* bcast_weight_ne, int64_t* bcast_dst_ne,
|
||||
size_t* bcast_input_nb, size_t* bcast_weight_nb, size_t* bcast_dst_nb) {
|
||||
// input and dst shoule in same shape, except first two dims.
|
||||
GGML_ASSERT(input_ne[2] == dst_ne[2]);
|
||||
GGML_ASSERT(input_ne[3] == dst_ne[3]);
|
||||
|
||||
int bcast_dim_cnt = 0;
|
||||
|
||||
// For mul_mat, a dimension needs to be added before the dimension that
|
||||
// weight needs to be expanded to satisfy the bcast rule of matrix
|
||||
// multiplication.
|
||||
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
||||
int64_t nr = input_ne[i] / weight_ne[i];
|
||||
// Do not use bcast in the first two dimensions because we only support
|
||||
// the bcast batch dimension. Just copy them.
|
||||
if (i < 2 || nr == 1) {
|
||||
bcast_input_ne[bcast_dim_cnt] = input_ne[i];
|
||||
bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
|
||||
bcast_dst_ne[bcast_dim_cnt] = dst_ne[i];
|
||||
|
||||
bcast_input_nb[bcast_dim_cnt] = input_nb[i];
|
||||
bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
|
||||
bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
|
||||
bcast_dim_cnt++;
|
||||
} else {
|
||||
// Need to add an extra dim.
|
||||
bcast_input_ne[bcast_dim_cnt] = nr;
|
||||
bcast_dst_ne[bcast_dim_cnt] = nr;
|
||||
bcast_weight_ne[bcast_dim_cnt] = 1;
|
||||
bcast_input_nb[bcast_dim_cnt] = input_nb[i];
|
||||
bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
|
||||
bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
|
||||
bcast_dim_cnt++;
|
||||
|
||||
bcast_input_ne[bcast_dim_cnt] = input_ne[i] / nr;
|
||||
bcast_dst_ne[bcast_dim_cnt] = dst_ne[i] / nr;
|
||||
bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
|
||||
bcast_input_nb[bcast_dim_cnt] = bcast_input_nb[bcast_dim_cnt - 1] *
|
||||
bcast_input_ne[bcast_dim_cnt - 1];
|
||||
bcast_dst_nb[bcast_dim_cnt] = bcast_dst_nb[bcast_dim_cnt - 1] *
|
||||
bcast_dst_ne[bcast_dim_cnt - 1];
|
||||
bcast_weight_nb[bcast_dim_cnt] =
|
||||
bcast_weight_nb[bcast_dim_cnt - 1] *
|
||||
bcast_weight_ne[bcast_dim_cnt - 1];
|
||||
bcast_dim_cnt++;
|
||||
}
|
||||
}
|
||||
return bcast_dim_cnt;
|
||||
}
|
258
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/acl_tensor.h
vendored
Normal file
258
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/acl_tensor.h
vendored
Normal file
|
@ -0,0 +1,258 @@
|
|||
/*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CANN_ACL_TENSOR_H
|
||||
#define CANN_ACL_TENSOR_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
#include <aclnn/aclnn_base.h>
|
||||
#include "common.h"
|
||||
|
||||
/**
|
||||
* @brief Maps a ggml_type to its corresponding aclDataType.
|
||||
*
|
||||
* @details This function takes a ggml_type as input and returns the corresponding
|
||||
* aclDataType. It supports mapping for various ggml_types. If the input type
|
||||
* does not match any of the predefined ggml_types, the function returns
|
||||
* ACL_DT_UNDEFINED.
|
||||
*
|
||||
* @param type The ggml_type to be mapped.
|
||||
* @return The corresponding aclDataType. If the input type is not recognized,
|
||||
* ACL_DT_UNDEFINED is returned.
|
||||
*/
|
||||
aclDataType ggml_cann_type_mapping(ggml_type type);
|
||||
|
||||
/**
|
||||
* @brief Creates an ACL tensor from a ggml_tensor with optional shape.
|
||||
*
|
||||
* @details This function creates an ACL tensor based on the properties of the
|
||||
* provided ggml_tensor. It supports customer shape by adjusting dimensions
|
||||
* and strides accordingly. If customer shape is applied, additional
|
||||
* dimensions and strides are calculated based on the provided parameters.
|
||||
*
|
||||
* @param tensor Pointer to the ggml_tensor to be converted to ACL tensor.
|
||||
* @param ne Pointer to an array containing dimensions. Defaults to nullptr
|
||||
* if no customer shape is applied.
|
||||
* @param nb Pointer to an array containing strides. Defaults to nullptr
|
||||
* if no customer shape is applied.
|
||||
* @param dims Number of dimensions in the tensor. Defaults to 0 if no customer
|
||||
* shape is applied.
|
||||
* @param format ACL tensor format. Defaults to ACL_FORMAT_ND.
|
||||
* @param offset Offset in bytes for the ACL tensor data. Defaults to 0.
|
||||
* @return Pointer to the created ACL tensor.
|
||||
*/
|
||||
aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne = nullptr,
|
||||
size_t* nb = nullptr, int64_t dims = 0,
|
||||
aclFormat format = ACL_FORMAT_ND,
|
||||
size_t offset = 0);
|
||||
|
||||
/**
|
||||
* @brief Template for creating an ACL tensor from provided parameters. typename TYPE
|
||||
* should be size_t or float.
|
||||
*
|
||||
* @details This function creates an ACL tensor using the provided data pointer,
|
||||
* data type, dimensions, strides, format, offset, and additional parameters.
|
||||
* It calculates necessary dimensions and strides based on the provided ne and nb
|
||||
* arrays, adjusting them for the ACL tensor creation. The ACL storage length
|
||||
* is also calculated based on the provided dimensions and strides.
|
||||
*
|
||||
* @param data_ptr Pointer to the data buffer for the ACL tensor.
|
||||
* @param dtype ACL data type of the tensor.
|
||||
* @param type_size Size of each element in the tensor data buffer.
|
||||
* @param ne Pointer to an array containing tensor dimensions.
|
||||
* @param nb Pointer to an array containing tensor strides.
|
||||
* @param dims Number of dimensions of the tensor.
|
||||
* @param format ACL tensor format. Defaults to ACL_FORMAT_ND.
|
||||
* @param offset Offset in bytes for the ACL tensor data. Defaults to 0.
|
||||
* @return Pointer to the created ACL tensor.
|
||||
*/
|
||||
template<typename TYPE>
|
||||
aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype,
|
||||
TYPE type_size, int64_t* ne, TYPE* nb,
|
||||
int64_t dims,
|
||||
aclFormat format = ACL_FORMAT_ND,
|
||||
size_t offset = 0) {
|
||||
int64_t tmp_ne[GGML_MAX_DIMS * 2];
|
||||
int64_t tmp_stride[GGML_MAX_DIMS * 2];
|
||||
|
||||
memcpy(tmp_ne, ne, dims * sizeof(int64_t));
|
||||
for (int i = 0; i < dims; i++) {
|
||||
tmp_stride[i] = nb[i] / type_size;
|
||||
}
|
||||
|
||||
std::reverse(tmp_ne, tmp_ne + dims);
|
||||
std::reverse(tmp_stride, tmp_stride + dims);
|
||||
|
||||
int64_t acl_storage_len = 0;
|
||||
for (int i = 0; i < dims; i++) {
|
||||
acl_storage_len += (ne[i] - 1) * nb[i];
|
||||
}
|
||||
|
||||
aclTensor* acl_tensor =
|
||||
aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size,
|
||||
format, &acl_storage_len, 1, data_ptr);
|
||||
|
||||
return acl_tensor;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Checks if tensors require broadcasting based on their shapes.
|
||||
*
|
||||
* @details This function determines if two ggml_tensors need to be broadcasted for
|
||||
* element-wise operations. Broadcasting is necessary if the shapes of the
|
||||
* tensors are not identical and no dimension in either tensor equals 1.
|
||||
*
|
||||
* @param t0 Pointer to the first ggml_tensor.
|
||||
* @param t1 Pointer to the second ggml_tensor.
|
||||
* @return True if broadcasting is needed, False otherwise.
|
||||
*
|
||||
* @remarks This function iterates over the dimensions of t0 and t1. It checks if each
|
||||
* dimension in t1 differs from t0's corresponding dimension and is not equal
|
||||
* to 1. If such a dimension is found, broadcasting is required to align t1
|
||||
* with t0 for element-wise operations.
|
||||
*/
|
||||
bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1);
|
||||
|
||||
/**
|
||||
* @brief Computes broadcast shapes and strides for two ggml_tensors.
|
||||
*
|
||||
* @details This function calculates the broadcast shapes and strides for two ggml_tensors,
|
||||
* following the broadcasting rules similar to numpy. It adjusts dimensions and
|
||||
* strides to ensure compatibility for element-wise operations where one tensor
|
||||
* can be broadcasted to match the shape of another tensor.
|
||||
*
|
||||
* @param src0 Pointer to the first ggml_tensor.
|
||||
* @param src1 Pointer to the second ggml_tensor.
|
||||
* @param bcast_ne_src0 Output array to store broadcasted dimensions for src0.
|
||||
* @param bcast_ne_src1 Output array to store broadcasted dimensions for src1.
|
||||
* @param bcast_nb_src0 Output array to store broadcasted strides for src0.
|
||||
* @param bcast_nb_src1 Output array to store broadcasted strides for src1.
|
||||
* @return Number of dimensions in the broadcasted shape.
|
||||
*
|
||||
* @pre ggml_can_repeat(src1, src0) must return true, indicating src1 can be broadcasted
|
||||
* to match src0.
|
||||
*
|
||||
* @remarks This function iterates over the dimensions of src0 and src1, calculating the
|
||||
* necessary broadcast dimensions and strides. If a dimension requires broadcasting
|
||||
* (i.e., its size in src1 is smaller than in src0), an additional dimension is
|
||||
* added with size calculated to match src0's dimension. This adjustment ensures
|
||||
* that src1 can be element-wise broadcasted to src0's shape.
|
||||
*
|
||||
* How it works:
|
||||
*
|
||||
* if dim0 has padding.
|
||||
* a -> (2, 2) padding = 2
|
||||
* a: [[1, 2, *, *]
|
||||
* [2, 3, *, *]]
|
||||
* nb = (8, 4, 2)
|
||||
*
|
||||
* if a should bcast with b -> (2, 4)
|
||||
* b' -> (2, 2, 2)
|
||||
* b : [[1, 2, 3, 4, *, *]
|
||||
* [5, 6, 7, 8, *, *]]
|
||||
* nb = (12, 6, 1)
|
||||
*
|
||||
* after bcast:
|
||||
* a' -> (2, 1, 2)
|
||||
* a': [[[1, 2], *, *]
|
||||
* [[2, 3], *, *]]
|
||||
* nb = (8, 4, 2, 1)
|
||||
*
|
||||
* b' : [[[1, 2], [3, 4], *, *]
|
||||
* [[5, 6], [7, 8], *, *]]
|
||||
* nb = (12, 6, 2, 1)
|
||||
* \endcode
|
||||
*
|
||||
* dim1 in a inserted dim, should add nb for dim1,
|
||||
* and all other nb moves to next in order.
|
||||
*/
|
||||
int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1,
|
||||
int64_t* bcast_ne_src0, int64_t* bcast_ne_src1,
|
||||
size_t* bcast_nb_src0, size_t* bcast_nb_src1);
|
||||
|
||||
// Bcast macro to avoid duplicate code.
|
||||
#define BCAST_SHAPE(src0, src1) \
|
||||
int64_t bcast_##src0##_ne[GGML_MAX_DIMS * 2]; \
|
||||
int64_t bcast_##src1##_ne[GGML_MAX_DIMS * 2]; \
|
||||
size_t bcast_##src0##_nb[GGML_MAX_DIMS * 2]; \
|
||||
size_t bcast_##src1##_nb[GGML_MAX_DIMS * 2]; \
|
||||
int64_t bcast_dims = ggml_cann_get_bcast_shape( \
|
||||
src0, src1, bcast_##src0##_ne, bcast_##src1##_ne, bcast_##src0##_nb, \
|
||||
bcast_##src1##_nb);
|
||||
|
||||
#define BCAST_PARAM(tensor) bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims
|
||||
|
||||
/**
|
||||
* @brief Calculates broadcast shapes for matrix multiplication.
|
||||
*
|
||||
* @details This function computes the broadcast shapes required for matrix multiplication
|
||||
* based on the input, weight, and destination tensor shapes. It ensures that the
|
||||
* dimensions of weight tensors are expanded appropriately to satisfy matrix
|
||||
* multiplication broadcast rules.
|
||||
*
|
||||
* @param input_ne Array containing the dimensions of the input tensor.
|
||||
* @param weight_ne Array containing the dimensions of the weight tensor.
|
||||
* @param dst_ne Array containing the dimensions of the destination tensor.
|
||||
* @param input_nb Array containing the strides of the input tensor.
|
||||
* @param weight_nb Array containing the strides of the weight tensor.
|
||||
* @param dst_nb Array containing the strides of the destination tensor.
|
||||
* @param bcast_input_ne Output array for broadcasted input tensor dimensions.
|
||||
* @param bcast_weight_ne Output array for broadcasted weight tensor dimensions.
|
||||
* @param bcast_dst_ne Output array for broadcasted destination tensor dimensions.
|
||||
* @param bcast_input_nb Output array for broadcasted input tensor strides.
|
||||
* @param bcast_weight_nb Output array for broadcasted weight tensor strides.
|
||||
* @param bcast_dst_nb Output array for broadcasted destination tensor strides.
|
||||
* @return The number of dimensions in the broadcasted tensors.
|
||||
*
|
||||
* @remarks This function iterates over the tensor dimensions and calculates the broadcast
|
||||
* shapes needed for matrix multiplication. It ensures that dimensions where
|
||||
* weight tensor requires expansion are appropriately handled to conform with
|
||||
* broadcasting rules.
|
||||
* @note compare with ggml_cann_get_bcast_shape, mul_mat broadcast need add this new dim
|
||||
* before cast dim.
|
||||
* @sa ggml_cann_get_bcast_shape
|
||||
*/
|
||||
int64_t ggml_cann_get_mulmat_bcast_shape(
|
||||
const int64_t* input_ne, const int64_t* weight_ne, const int64_t* dst_ne,
|
||||
const size_t* input_nb, const size_t* weight_nb, const size_t* dst_nb,
|
||||
int64_t* bcast_input_ne, int64_t* bcast_weight_ne, int64_t* bcast_dst_ne,
|
||||
size_t* bcast_input_nb, size_t* bcast_weight_nb, size_t* bcast_dst_nb);
|
||||
|
||||
// Bcast macro to avoid duplicate code.
|
||||
#define BCAST_MUL_MAT_SHAPE(input, weight, dst) \
|
||||
int64_t bcast_##input##_ne[GGML_MAX_DIMS * 2]; \
|
||||
int64_t bcast_##weight##_ne[GGML_MAX_DIMS * 2]; \
|
||||
int64_t bcast_##dst##_ne[GGML_MAX_DIMS * 2]; \
|
||||
size_t bcast_##input##_nb[GGML_MAX_DIMS * 2]; \
|
||||
size_t bcast_##weight##_nb[GGML_MAX_DIMS * 2]; \
|
||||
size_t bcast_##dst##_nb[GGML_MAX_DIMS * 2]; \
|
||||
int64_t bcast_dims = ggml_cann_get_mulmat_bcast_shape( \
|
||||
input->ne, weight->ne, dst->ne, input->nb, weight->nb, dst->nb, \
|
||||
bcast_##input##_ne, bcast_##weight##_ne, bcast_##dst##_ne, \
|
||||
bcast_##input##_nb, bcast_##weight##_nb, bcast_##dst##_nb);
|
||||
|
||||
#define BCAST_MUL_MAT_PARAM(tensor) \
|
||||
bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims
|
||||
|
||||
#endif // CANN_ACL_TENSOR_H
|
3427
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/aclnn_ops.cpp
vendored
Normal file
3427
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/aclnn_ops.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
592
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/aclnn_ops.h
vendored
Normal file
592
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/aclnn_ops.h
vendored
Normal file
|
@ -0,0 +1,592 @@
|
|||
#ifndef CANN_ACLNN_OPS
|
||||
#define CANN_ACLNN_OPS
|
||||
|
||||
/**
|
||||
* @file acl_tensor
|
||||
* @brief This file contains related functions of ggml_tensor and acl_tensor.
|
||||
* Contains conversion from ggml_tensor to acl_tensor, broadcast and other
|
||||
* functions.
|
||||
* @author hipudding <huafengchun@gmail.com>
|
||||
* @author wangshuai09 <391746016@qq.com>
|
||||
* @date July 15, 2024
|
||||
*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <aclnnop/aclnn_add.h>
|
||||
#include <aclnnop/aclnn_arange.h>
|
||||
#include <aclnnop/aclnn_argsort.h>
|
||||
#include <aclnnop/aclnn_cat.h>
|
||||
#include <aclnnop/aclnn_clamp.h>
|
||||
#include <aclnnop/aclnn_div.h>
|
||||
#include <aclnnop/aclnn_gelu.h>
|
||||
#include <aclnnop/aclnn_hardsigmoid.h>
|
||||
#include <aclnnop/aclnn_hardswish.h>
|
||||
#include <aclnnop/aclnn_leaky_relu.h>
|
||||
#include <aclnnop/aclnn_mul.h>
|
||||
#include <aclnnop/aclnn_relu.h>
|
||||
#include <aclnnop/aclnn_silu.h>
|
||||
#include <aclnnop/aclnn_tanh.h>
|
||||
#include "acl_tensor.h"
|
||||
#include "common.h"
|
||||
|
||||
/**
|
||||
* @brief Repeats a ggml tensor along each dimension to match the dimensions
|
||||
* of another tensor.
|
||||
*
|
||||
* @details This function repeats the elements of a source ggml tensor along
|
||||
* each dimension to create a destination tensor with the specified
|
||||
* dimensions. The operation is performed using the ACL backend and
|
||||
* executed asynchronously on the device.
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The ggml tensor representing the destination, which op is
|
||||
* GGML_OP_REPEAT and specifies the desired dimensions.
|
||||
*/
|
||||
void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Adds two ggml tensors using the CANN backend.
|
||||
*
|
||||
* @details This function performs an element-wise addition of two tensors. In
|
||||
* case the tensors do not have the same shape, one or both tensors
|
||||
* will be broadcasted to match the shape of the other before the
|
||||
* addition is performed.The formula for the operation is given by:
|
||||
* \f[
|
||||
* \text{dst} = \text{acl_src0} + \alpha \cdot \text{acl_src1}
|
||||
* \f]
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The ggml tensor representing the destination, result of the
|
||||
* addition is stored at dst->data, and dst->op is `GGML_OP_ADD`
|
||||
*/
|
||||
void ggml_cann_add(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Applies the Leaky ReLU activation function to a tensor using the CANN
|
||||
* backend.
|
||||
*
|
||||
* @details This function computes the Leaky ReLU activation for each element of
|
||||
* the input tensor. The Leaky ReLU function allows a small gradient
|
||||
* when the unit is not active (i.e., when the input is negative). The
|
||||
* Leaky ReLU function is defined as:
|
||||
* \f[
|
||||
* \text{dst} = \max(0, src) + \text{negativeSlope} \cdot \min(0,
|
||||
* src)
|
||||
* \f]
|
||||
* `negativeSlope` is in dst->params.
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the result of the Leaky ReLU
|
||||
* activation is stored, which op is `GGML_OP_LEAKY_RELU`
|
||||
*/
|
||||
void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Concatenates multiple tensors along a specified dimension using the
|
||||
* CANN backend.
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param tensorList A pointer to the list of tensors to be concatenated.
|
||||
* @param dst The destination tensor where the result of the
|
||||
* concatenation is stored. dst->op is `GGML_OP_CONCAT`.
|
||||
* @param concat_dim The dimension along which the tensors are concatenated.
|
||||
*
|
||||
* @attention tensorList length should be 2 and the dimension using for concat
|
||||
* default to 1.
|
||||
*/
|
||||
void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Generates a sequence of evenly spaced values within a specified
|
||||
* interval for a ggml tensor using the CANN backend.
|
||||
*
|
||||
* @details This function creates a sequence of numbers over a specified i
|
||||
* nterval, starting from `start`, ending before `stop`, and
|
||||
* incrementing by `step`. The sequence is stored in the destination
|
||||
* tensor `dst`.
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the generated sequence will be stored.
|
||||
* `start`, 'stop' and 'step' are in dst->op_params and dst->op is
|
||||
* `GGML_OP_ARANGE`.
|
||||
*/
|
||||
void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Computes the square of the elements of a ggml tensor using the CANN
|
||||
* backend.
|
||||
* @details The function sets the second source tensor of the destination
|
||||
* tensor `dst` to be equal to the first source tensor. This is
|
||||
* effectively squaring the elements since the multiplication becomes
|
||||
* `element * element`.
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the squared values will be stored,
|
||||
* which dst->op is `GGML_OP_SQR`.
|
||||
*/
|
||||
void ggml_cann_sqr(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Applies a clamp operation to the elements of a ggml tensor using the
|
||||
* CANN backend.
|
||||
*
|
||||
* @details This function clamps the elements of the input tensor `src` to a
|
||||
* specified range defined by `min` and `max` values. The result is
|
||||
* stored in the destination tensor `dst`. The operation is defined as:
|
||||
* \f[
|
||||
* y = \max(\min(x, max\_value), min\_value)
|
||||
* \f]
|
||||
* where `x` is an element of the input tensor, and `y` is the
|
||||
* corresponding element in the output tensor.
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the clamped values will be stored.
|
||||
* dst->op is `GGML_OP_CLAMP`, `min` and `max` value is in dst->params.
|
||||
*/
|
||||
void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Scales the elements of a ggml tensor by a constant factor using the
|
||||
* CANN backend.
|
||||
*
|
||||
* @details This function multiplies each element of the input tensor `src` by
|
||||
* a scaling factor `scale`, storing the result in the destination
|
||||
* tensor `dst`. The operation is defined as:
|
||||
* \f[
|
||||
* dst = src \times scale
|
||||
* \f]
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the scaled values will be stored.
|
||||
* dst->op is `GGML_OP_SCALE` and `scale` value is in dst->params.
|
||||
*/
|
||||
void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Sorts the elements of a ggml tensor and returns the indices that
|
||||
* would sort the tensor using the CANN backend.
|
||||
*
|
||||
* @details This function performs an argsort operation on the input tensor
|
||||
* `src`. It sorts the elements of `src` in either ascending or
|
||||
* descending order, depending on the `GGML_SORT_ORDER_DESC`,
|
||||
* and returns the indices that would sort the original tensor.
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the sorted indices will be stored.
|
||||
* dst->op is `GGML_OP_ARGSORT`.
|
||||
*/
|
||||
void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Computes the Layer Normalization for a ggml tensor using the CANN
|
||||
* backend.
|
||||
*
|
||||
* @details This function applies the Layer Normalization operation on the
|
||||
* input tensor `src` and stores the result in the destination tensor
|
||||
* `dst`. Layer Normalization normalizes the features at each sample in
|
||||
* a mini-batch independently. It is commonly used in neural networks
|
||||
* to normalize the activations of a layer by adjusting and scaling
|
||||
* the outputs.
|
||||
* The operation is defined as:
|
||||
* \f[
|
||||
* \text { out }=\frac{x-\mathrm{E}[x]}{\sqrt{\text{Var}[x]+eps}}
|
||||
* \f]
|
||||
* `Var` defaults dst->ne[0]. `eps` is in dst->params.
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the normalized values will be stored.
|
||||
* @attention `Var` defaults to dst->ne[0].
|
||||
*/
|
||||
void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Computes the Group Normalization for a ggml tensor using the CANN
|
||||
* backend.
|
||||
*
|
||||
* @brief This function applies the Group Normalization operation on the input
|
||||
* tensor `src` and stores the result in the destination tensor `dst`.
|
||||
* Group Normalization divides the channels into groups and normalizes
|
||||
* the features within each group across spatial locations.
|
||||
* It is commonly used in convolutional neural networks to improve
|
||||
* training stability and performance.
|
||||
* The operation is defined as:
|
||||
* \f[
|
||||
* \text { out }=\frac{x-\mathrm{E}[x]}{\sqrt{\text{Var}[x]+eps}}
|
||||
* \f]
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the normalized values will be stored.
|
||||
* `n_groups` is in dst->params, which split C channel to `n_groups`.
|
||||
* dst->op is `GGML_OP_GROUP_NORM`.
|
||||
*
|
||||
* @attention eps defaults to 1e-6f.
|
||||
*/
|
||||
void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Computes the accumulation of tensors using the CANN backend.
|
||||
*
|
||||
* @details This function performs an accumulation operation on two tensors.
|
||||
* Depending on the `inplace` flag, it either updates the destination
|
||||
* tensor `dst` in place by adding `alpha * src1` to it, or it creates
|
||||
* a new tensor as the result of `src0 + alpha * src1` and stores it in
|
||||
* `dst`.
|
||||
* The operation is defined as:
|
||||
* \f[
|
||||
* dst = src0 + alpha \times src1
|
||||
* \f]
|
||||
* if `inplace` is `true`, `src0` is equal to 'dst'.
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the accumulated values will be stored.
|
||||
* `inplace` is in dst->params, and dst->op is `GGML_OP_ACC`.
|
||||
*/
|
||||
void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Computes the sum of elements along the last dimension of a ggml tensor
|
||||
* using the CANN backend.
|
||||
*
|
||||
* @details This function performs a reduction sum operation along the last
|
||||
* dimension of the input tensor `src`. The result of the sum is stored
|
||||
* in the destination tensor `dst`.
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the reduced values will be stored。
|
||||
* dst->op is `GGML_OP_SUM_ROWS`.
|
||||
*
|
||||
* @attention `reduce_dims` defaults to 3, which means the last dimension.
|
||||
*/
|
||||
void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Upsamples a ggml tensor using nearest neighbor interpolation using
|
||||
* the CANN backend.
|
||||
*
|
||||
* @details This function performs upsampling of the input tensor `src` using
|
||||
* nearest neighbor interpolation. The upsampling is applied to the
|
||||
* height and width dimensions (last two dimensions) of the tensor. The
|
||||
* result is stored in the destination tensor `dst`, which must have
|
||||
* the appropriate dimensions for the upsampled output.
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the upsampled values will be stored.
|
||||
* dst->op is `GGML_OP_UPSCALE`.
|
||||
*/
|
||||
void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
|
||||
ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Pads a ggml tensor to match the dimensions of the destination tensor
|
||||
* using the CANN backend.
|
||||
*
|
||||
* @details This function pads the input tensor `src` so that it matches the
|
||||
* dimensions of the destination tensor `dst`. The amount of padding
|
||||
* is calculated based on the difference in sizes between `src` and
|
||||
* `dst` along each dimension. The padded tensor is stored in `dst`.
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor, which specifies the target dimensions for
|
||||
* padding. dst->op is `GGML_OP_PAD`.
|
||||
*/
|
||||
void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Executes a 2D pooling operation on a ggml tensor using the CANN
|
||||
* backend.
|
||||
*
|
||||
* @details This function dispatches the execution of a 2D pooling operation on
|
||||
* the input tensor `dst`. The type of pooling (average or max) is
|
||||
* determined by the `op` parameter, which is read from the operation
|
||||
* parameters of `dst`. The function supports average pooling
|
||||
* (`GGML_OP_POOL_AVG`) and max pooling (`GGML_OP_POOL_MAX`). If an
|
||||
* invalid operation is encountered, the function asserts a failure.
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor on which the pooling operation is to be
|
||||
* performed. dst->op is `GGML_OP_POOL_2D`.
|
||||
*/
|
||||
void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Duplicates a ggml tensor using the CANN backend.
|
||||
*
|
||||
* @details This function duplicates the contents of the source tensor `src` to
|
||||
* the destination tensor `dst`. The function supports various tensor
|
||||
* types and configurations, including handling of extra data, type
|
||||
* conversions, and special cases for contiguous and non-contiguous
|
||||
* tensors.
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the duplicated data will be stored.
|
||||
* dst->op is `GGML_OP_DUP`
|
||||
*
|
||||
* @attention Only support Fp16/FP32. Not support when src and dst have
|
||||
* different shape and dst is no-contiguous.
|
||||
* @note: This func need to simplify.
|
||||
*/
|
||||
void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Computes the Root Mean Square (RMS) normalization of a ggml tensor
|
||||
* using the CANN backend.
|
||||
*
|
||||
* @details This function applies RMS normalization to the input tensor `src`
|
||||
* and stores the result in the destination tensor `dst`. RMS
|
||||
* normalization involves computing the root mean square of the input
|
||||
* tensor along a specified dimension and then dividing each element of
|
||||
* the tensor by this value, adjusted by a small epsilon value to
|
||||
* prevent division by zero.
|
||||
* The operation is defined as:
|
||||
* \f[
|
||||
* \text{RmsNorm}\left(x_i\right)=\frac{x_i}{\text{Rms}(\mathbf{x})} g_i,
|
||||
* \quad \text { where } \text{Rms}(\mathbf{x})=\sqrt{\frac{1}{n} \sum_{i=1}^n x_i^2+e p s}
|
||||
* \f]
|
||||
* `eps` is in dst->op_params.
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the normalized values will be stored.
|
||||
* dst->op is `GGML_OP_RMS_NORM`.
|
||||
*/
|
||||
void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Applies a diagonal mask to the tensor with a specified value.
|
||||
*
|
||||
* @details This function creates a mask tensor filled with ones, then applies
|
||||
* an upper triangular and lower triangular operation to it based on
|
||||
* the number of past elements specified. Afterward, it adds the masked
|
||||
* tensor to the destination tensor in-place.
|
||||
*
|
||||
* @param ctx The backend CANN context used for operations.
|
||||
* @param dst The destination tensor where the result will be stored. dst->op is
|
||||
* `GGML_OP_DIAG_MASK`
|
||||
* @param value The value to use for masking.
|
||||
*/
|
||||
void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float value);
|
||||
|
||||
/**
|
||||
* @brief Performs an image-to-column transformation on the input tensor.
|
||||
*
|
||||
* @details This function takes an input tensor and applies an image-to-column
|
||||
* operation, converting spatial dimensions into column-like
|
||||
* structures suitable for convolutional operations. It supports both
|
||||
* half-precision (F16) and single-precision (F32) floating-point data
|
||||
* types.
|
||||
*
|
||||
* @param ctx The backend CANN context for executing operations.
|
||||
* @param dst The destination tensor that stores the result of the operation.
|
||||
* dst->op is `GGML_OP_IM2COL`.
|
||||
*/
|
||||
void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Computes time step embeddings using sine and cosine functions.
|
||||
*
|
||||
* @details This function calculates time step embeddings by applying sine and
|
||||
* cosine transformations to a given input tensor, which is typically
|
||||
* used in temporal models like diffusion models or transformers to
|
||||
* encode time information effectively.
|
||||
*
|
||||
* @param ctx The backend CANN context for executing operations.
|
||||
* @param dst The destination tensor where the result of the embedding operation
|
||||
* will be stored. dst->op is `GGML_OP_TIMESTEP_EMBEDDING`.
|
||||
*/
|
||||
void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
// @see ggml_cann_dup.
|
||||
void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Computes the softmax activation with optional masking.
|
||||
*
|
||||
* @details This function computes the softmax activation over the input tensor,
|
||||
* optionally applying a mask and scaling factor. It supports both FP16
|
||||
* and FP32 data types and can handle masking by broadcasting the mask
|
||||
* across rows if necessary.
|
||||
* The function performs the following steps:
|
||||
* 1. Multiplies the input tensor by a scale factor.
|
||||
* 2. Optionally casts the mask tensor to FP32 if it is in FP16 format.
|
||||
* 3. Broadcasts the mask tensor if its dimensions do not match the
|
||||
* input tensor's dimensions.
|
||||
* 4. Adds the mask to the scaled input tensor.
|
||||
* 5. Applies the softmax activation function along the specified
|
||||
* dimension.
|
||||
*
|
||||
* @param ctx The backend CANN context for executing operations.
|
||||
* @param dst The destination tensor where the result will be stored. dst->op is
|
||||
* `GGML_OP_SOFTMAX`.
|
||||
*/
|
||||
void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Extracts specific rows from a tensor based on indices.
|
||||
*
|
||||
* @details This function retrieves rows from a source tensor src0 according to
|
||||
* the indices provided in another tensor src1 and stores the result in
|
||||
* a destination tensor (\p dst). It supports different data types
|
||||
* including F32, F16, Q4_0, and Q8_0.
|
||||
*
|
||||
* @param ctx The backend CANN context for executing operations.
|
||||
* @param dst The destination tensor where the extracted rows will be stored.
|
||||
* dst->op is `GGML_OP_GET_ROWS`.
|
||||
*/
|
||||
void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Executes matrix multiplication for the given tensor.
|
||||
*
|
||||
* @details This function performs matrix multiplication on the source tensors
|
||||
* associated with the destination tensor. It supports matrix
|
||||
* multiplication F32, F16, and Q8_0.
|
||||
*
|
||||
* @param ctx The backend CANN context for executing operations.
|
||||
* @param dst The destination tensor for storing the result of the matrix
|
||||
* multiplication. dst->op is `GGML_OP_MUL_MAT`.
|
||||
*/
|
||||
void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
/**
|
||||
* @brief Applies Rotary Positional Embedding (RoPE) to the input tensor.
|
||||
*
|
||||
* @details This function implements the RoPE mechanism, which is a method to
|
||||
* encode positional information into sequence data, particularly
|
||||
* useful in transformer models. It supports both F32 and F16 data
|
||||
* types.
|
||||
*
|
||||
* @param ctx The backend CANN context for executing operations.
|
||||
* @param dst The destination tensor where the RoPE-transformed data will be
|
||||
* stored. dst->op is `GGML_OP_ROPE`.
|
||||
*
|
||||
* @note The function currently does not support cases where the n_dims is less
|
||||
* than the input tensor's first dimension.
|
||||
* @note The function currently does not support cases where the freq_factors is
|
||||
* not NULL.
|
||||
* @note The function currently does not support cases where the ext_factor is
|
||||
* not equal 0.
|
||||
* @note The function currently does not support cases where the freq_scale is
|
||||
* not equal 1.
|
||||
*/
|
||||
void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||
|
||||
template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
|
||||
aclTensor*, uint64_t*, aclOpExecutor**),
|
||||
aclnnStatus execute(void*, uint64_t, aclOpExecutor*, aclrtStream)>
|
||||
void ggml_cann_mul_div(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||
ggml_tensor* src0 = dst->src[0];
|
||||
ggml_tensor* src1 = dst->src[1];
|
||||
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
||||
|
||||
aclTensor* acl_src0;
|
||||
aclTensor* acl_src1;
|
||||
aclTensor* acl_dst;
|
||||
|
||||
// Need bcast
|
||||
if (!ggml_are_same_shape(src0, src1) && ggml_cann_need_bcast(src0, src1)) {
|
||||
BCAST_SHAPE(src0, src1)
|
||||
acl_src0 = ggml_cann_create_tensor(src0, BCAST_PARAM(src0));
|
||||
acl_src1 = ggml_cann_create_tensor(src1, BCAST_PARAM(src1));
|
||||
acl_dst = ggml_cann_create_tensor(dst, BCAST_PARAM(src0));
|
||||
} else {
|
||||
acl_src0 = ggml_cann_create_tensor(src0);
|
||||
acl_src1 = ggml_cann_create_tensor(src1);
|
||||
acl_dst = ggml_cann_create_tensor(dst);
|
||||
}
|
||||
|
||||
uint64_t workspaceSize = 0;
|
||||
aclOpExecutor* executor;
|
||||
void* workspaceAddr = nullptr;
|
||||
|
||||
ACL_CHECK(getWorkspaceSize(acl_src0, acl_src1, acl_dst, &workspaceSize,
|
||||
&executor));
|
||||
if (workspaceSize > 0) {
|
||||
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
||||
workspaceAddr = workspace_allocator.get();
|
||||
}
|
||||
|
||||
aclrtStream main_stream = ctx.stream();
|
||||
ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
|
||||
|
||||
ACL_CHECK(aclDestroyTensor(acl_src0));
|
||||
ACL_CHECK(aclDestroyTensor(acl_src1));
|
||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||
}
|
||||
|
||||
// Activation functions template.
|
||||
template <aclnnStatus getWorkspaceSize(const aclTensor*, aclTensor*, uint64_t*,
|
||||
aclOpExecutor**),
|
||||
aclnnStatus execute(void*, uint64_t, aclOpExecutor*,
|
||||
const aclrtStream)>
|
||||
void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||
ggml_tensor* src = dst->src[0];
|
||||
|
||||
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||
|
||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
uint64_t workspaceSize = 0;
|
||||
aclOpExecutor* executor;
|
||||
void* workspaceAddr = nullptr;
|
||||
|
||||
ACL_CHECK(getWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor));
|
||||
if (workspaceSize > 0) {
|
||||
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
||||
workspaceAddr = workspace_allocator.get();
|
||||
}
|
||||
|
||||
aclrtStream main_stream = ctx.stream();
|
||||
ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
|
||||
|
||||
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||
}
|
||||
|
||||
// Activation functions template for const aclTensors.
|
||||
template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
|
||||
uint64_t*, aclOpExecutor**),
|
||||
aclnnStatus execute(void*, uint64_t, aclOpExecutor*,
|
||||
const aclrtStream)>
|
||||
void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||
ggml_tensor* src = dst->src[0];
|
||||
|
||||
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||
|
||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
uint64_t workspaceSize = 0;
|
||||
aclOpExecutor* executor;
|
||||
void* workspaceAddr = nullptr;
|
||||
|
||||
ACL_CHECK(getWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor));
|
||||
if (workspaceSize > 0) {
|
||||
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
||||
workspaceAddr = workspace_allocator.get();
|
||||
}
|
||||
|
||||
aclrtStream main_stream = ctx.stream();
|
||||
ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
|
||||
|
||||
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||
}
|
||||
|
||||
#endif // CANN_ACLNN_OPS
|
|
@ -0,0 +1,286 @@
|
|||
/*
|
||||
* Copyright (c) 2023-2024 The ggml authors
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CANN_COMMON_H
|
||||
#define CANN_COMMON_H
|
||||
|
||||
#include <acl/acl.h>
|
||||
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "../include/ggml-cann.h"
|
||||
#include "../include/ggml.h"
|
||||
|
||||
#define MATRIX_ROW_PADDING 512
|
||||
#define GGML_CANN_MAX_STREAMS 8
|
||||
|
||||
/**
|
||||
* @brief Handles CANN-related errors by printing an error message and
|
||||
* terminating the program.
|
||||
* @param stmt The statement that caused the error.
|
||||
* @param func The function in which the error occurred.
|
||||
* @param file The file in which the error occurred.
|
||||
* @param line The line number at which the error occurred.
|
||||
* @param msg The error message.
|
||||
*/
|
||||
[[noreturn]] void ggml_cann_error(const char* stmt, const char* func,
|
||||
const char* file, int line, const char* msg);
|
||||
|
||||
/**
|
||||
* @brief Checks the result of a CANN function call and invokes the error
|
||||
* handler if the call fails.
|
||||
* @param stmt The CANN function call to check.
|
||||
* @param success The success code that indicates the call was successful.
|
||||
* @param error_fn The function to call to retrieve the error message.
|
||||
*/
|
||||
#define ACL_CHECK_GEN(stmt, success, error_fn) \
|
||||
do { \
|
||||
int err_code = (stmt); \
|
||||
if (err_code != (success)) { \
|
||||
ggml_cann_error(#stmt, __func__, __FILE__, __LINE__, error_fn()); \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
#define ACL_CHECK(stmt) ACL_CHECK_GEN(stmt, 0, aclGetRecentErrMsg)
|
||||
|
||||
/**
|
||||
* @brief Contains information about CANN devices.
|
||||
*/
|
||||
struct ggml_cann_device_info {
|
||||
/**
|
||||
* @brief Number of CANN devices available.
|
||||
*/
|
||||
int32_t device_count;
|
||||
|
||||
/**
|
||||
* @brief Information about a single CANN device.
|
||||
*/
|
||||
struct cann_device_info {
|
||||
int cc; /**< Compute capability. */
|
||||
size_t smpb; /**< Maximum shared memory per block. */
|
||||
bool vmm; /**< Virtual memory support. */
|
||||
size_t vmm_granularity; /**< Granularity of virtual memory. */
|
||||
size_t total_vram; /**< Total video RAM available on the device. */
|
||||
};
|
||||
|
||||
cann_device_info devices[GGML_CANN_MAX_DEVICES] =
|
||||
{}; /**< Array of CANN device information. */
|
||||
};
|
||||
|
||||
const ggml_cann_device_info& ggml_cann_info();
|
||||
|
||||
void ggml_cann_set_device(int32_t device);
|
||||
int32_t ggml_cann_get_device();
|
||||
|
||||
/**
|
||||
* @brief Abstract base class for memory pools used by CANN.
|
||||
*/
|
||||
struct ggml_cann_pool {
|
||||
/**
|
||||
* @brief Virtual destructor for the memory pool.
|
||||
*/
|
||||
virtual ~ggml_cann_pool() = default;
|
||||
|
||||
/**
|
||||
* @brief Allocates memory from the pool.
|
||||
*
|
||||
* @param size The size of the memory block to allocate.
|
||||
* @param actual_size Pointer to a variable where the actual allocated size
|
||||
* will be stored.
|
||||
* @return Pointer to the allocated memory block.
|
||||
*/
|
||||
virtual void* alloc(size_t size, size_t* actual_size) = 0;
|
||||
|
||||
/**
|
||||
* @brief Frees a previously allocated memory block.
|
||||
*
|
||||
* @param ptr Pointer to the memory block to free.
|
||||
* @param size Size of the memory block to free.
|
||||
* @note Note that all CANN opertors are running async. Make sure memory is
|
||||
* still avaiable before this operator finished.
|
||||
*/
|
||||
virtual void free(void* ptr, size_t size) = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief RAII wrapper for managing memory allocations from a CANN memory pool.
|
||||
*/
|
||||
struct ggml_cann_pool_alloc {
|
||||
ggml_cann_pool* pool = nullptr; /**< Pointer to the memory pool. */
|
||||
void* ptr = nullptr; /**< Pointer to the allocated memory block. */
|
||||
size_t actual_size = 0; /**< Actual size of the allocated memory block. */
|
||||
|
||||
/**
|
||||
* @brief Default constructor.
|
||||
*/
|
||||
ggml_cann_pool_alloc() = default;
|
||||
|
||||
/**
|
||||
* @brief Constructor that initializes the memory pool.
|
||||
* @param pool Reference to the memory pool.
|
||||
*/
|
||||
explicit ggml_cann_pool_alloc(ggml_cann_pool& pool) : pool(&pool) {}
|
||||
|
||||
/**
|
||||
* @brief Constructor that initializes the memory pool and allocates memory.
|
||||
* @param pool Reference to the memory pool.
|
||||
* @param size Size of the memory block to allocate.
|
||||
*/
|
||||
ggml_cann_pool_alloc(ggml_cann_pool& pool, size_t size) : pool(&pool) {
|
||||
alloc(size);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destructor that frees the allocated memory block.
|
||||
*/
|
||||
~ggml_cann_pool_alloc() {
|
||||
if (ptr != nullptr) {
|
||||
pool->free(ptr, actual_size);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Allocates memory from the pool.
|
||||
* @param size Size of the memory block to allocate.
|
||||
* @return Pointer to the allocated memory block.
|
||||
*/
|
||||
void* alloc(size_t size) {
|
||||
GGML_ASSERT(pool != nullptr);
|
||||
GGML_ASSERT(ptr == nullptr);
|
||||
ptr = pool->alloc(size, &this->actual_size);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Allocates memory from a specific memory pool.
|
||||
* @param pool Reference to the memory pool.
|
||||
* @param size Size of the memory block to allocate.
|
||||
* @return Pointer to the allocated memory block.
|
||||
*/
|
||||
void* alloc(ggml_cann_pool& pool, size_t size) {
|
||||
this->pool = &pool;
|
||||
return alloc(size);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gets the pointer to the allocated memory block.
|
||||
* @return Pointer to the allocated memory block.
|
||||
*/
|
||||
void* get() { return ptr; }
|
||||
|
||||
// Deleted copy constructor
|
||||
ggml_cann_pool_alloc(const ggml_cann_pool_alloc&) = delete;
|
||||
|
||||
// Deleted move constructor
|
||||
ggml_cann_pool_alloc(ggml_cann_pool_alloc&&) = delete;
|
||||
|
||||
// Deleted copy assignment operator
|
||||
ggml_cann_pool_alloc& operator=(const ggml_cann_pool_alloc&) = delete;
|
||||
|
||||
// Deleted move assignment operator
|
||||
ggml_cann_pool_alloc& operator=(ggml_cann_pool_alloc&&) = delete;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Context for managing CANN backend operations.
|
||||
*/
|
||||
struct ggml_backend_cann_context {
|
||||
int32_t device; /**< Device ID. */
|
||||
std::string name; /**< Name of the device. */
|
||||
std::string description; /**< Description of the device. */
|
||||
aclrtEvent copy_event = nullptr; /**< Event for managing copy operations. */
|
||||
|
||||
aclrtStream streams[GGML_CANN_MAX_STREAMS] = {nullptr}; /**< Array of streams for the device. */
|
||||
|
||||
/**
|
||||
* @brief Constructor for initializing the context with a given device.
|
||||
* @param device Device ID.
|
||||
*/
|
||||
explicit ggml_backend_cann_context(int device)
|
||||
: device(device), name("CANN" + std::to_string(device)) {
|
||||
ggml_cann_set_device(device);
|
||||
description = aclrtGetSocName();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destructor for cleaning up resources.
|
||||
*/
|
||||
~ggml_backend_cann_context() {
|
||||
ggml_cann_set_device(device);
|
||||
if (copy_event != nullptr) {
|
||||
ACL_CHECK(aclrtDestroyEvent(copy_event));
|
||||
}
|
||||
for (int i = 0; i < GGML_CANN_MAX_STREAMS; ++i) {
|
||||
if (streams[i] != nullptr) {
|
||||
ACL_CHECK(aclrtDestroyStream(streams[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get or create a stream for a given index.
|
||||
* @param stream Index of the stream.
|
||||
* @return The stream corresponding to the given index.
|
||||
*/
|
||||
aclrtStream stream(int stream) {
|
||||
if (streams[stream] == nullptr) {
|
||||
ggml_cann_set_device(device);
|
||||
ACL_CHECK(aclrtCreateStream(&streams[stream]));
|
||||
}
|
||||
return streams[stream];
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get or create the default stream (index 0).
|
||||
* @return The default stream.
|
||||
*/
|
||||
aclrtStream stream() { return stream(0); }
|
||||
|
||||
// TODO: each stream should have a memory pool.
|
||||
std::unique_ptr<ggml_cann_pool>
|
||||
mem_pool; /**< Memory pool for the device. */
|
||||
|
||||
/**
|
||||
* @brief Create a new memory pool for a given device.
|
||||
* @param device Device ID.
|
||||
* @return A unique pointer to the new memory pool.
|
||||
*/
|
||||
static std::unique_ptr<ggml_cann_pool> new_pool_for_device(int device);
|
||||
|
||||
/**
|
||||
* @brief Get or create the memory pool for the context.
|
||||
* @return Reference to the memory pool.
|
||||
*/
|
||||
ggml_cann_pool& pool() {
|
||||
if (mem_pool == nullptr) {
|
||||
mem_pool = new_pool_for_device(device);
|
||||
}
|
||||
return *mem_pool;
|
||||
}
|
||||
};
|
||||
|
||||
#endif // CANN_COMMON_H
|
2188
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/ggml-cann.cpp
vendored
Normal file
2188
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/ggml-cann.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
30
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt
vendored
Normal file
30
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt
vendored
Normal file
|
@ -0,0 +1,30 @@
|
|||
file(GLOB SRC_FILES
|
||||
get_row_f32.cpp
|
||||
get_row_f16.cpp
|
||||
get_row_q4_0.cpp
|
||||
get_row_q8_0.cpp
|
||||
quantize_f32_q8_0.cpp
|
||||
quantize_f16_q8_0.cpp
|
||||
quantize_float_to_q4_0.cpp
|
||||
dup.cpp
|
||||
)
|
||||
|
||||
set(ASCEND_CANN_PACKAGE_PATH ${CANN_INSTALL_DIR})
|
||||
set(RUN_MODE "npu" CACHE STRING "run mode: npu/sim")
|
||||
|
||||
if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
|
||||
set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
|
||||
elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake)
|
||||
set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake)
|
||||
else()
|
||||
message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the compiler package is installed.")
|
||||
endif()
|
||||
include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
|
||||
|
||||
ascendc_library(ascendc_kernels STATIC
|
||||
${SRC_FILES}
|
||||
)
|
||||
|
||||
message(STATUS "CANN: compile ascend kernels witch SOC_TYPE:${SOC_TYPE}, SOC_VERSION:${SOC_VERSION}, compile macro:-D${SOC_TYPE_COMPILE_OPTION}.")
|
||||
ascendc_compile_definitions(ascendc_kernels PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")
|
||||
# ascendc_compile_definitions(ascendc_kernels PRIVATE -DASCENDC_DUMP)
|
19
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h
vendored
Normal file
19
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h
vendored
Normal file
|
@ -0,0 +1,19 @@
|
|||
#ifndef ASCENDC_KERNELS_H
|
||||
#define ASCENDC_KERNELS_H
|
||||
|
||||
#include "aclrtlaunch_ascendc_get_row_f32.h"
|
||||
#include "aclrtlaunch_ascendc_get_row_f16.h"
|
||||
#include "aclrtlaunch_ascendc_get_row_q8_0.h"
|
||||
#include "aclrtlaunch_ascendc_get_row_q4_0.h"
|
||||
|
||||
#include "aclrtlaunch_ascendc_quantize_f32_q8_0.h"
|
||||
#include "aclrtlaunch_ascendc_quantize_f16_q8_0.h"
|
||||
#include "aclrtlaunch_ascendc_quantize_f16_to_q4_0.h"
|
||||
#include "aclrtlaunch_ascendc_quantize_f32_to_q4_0.h"
|
||||
|
||||
#include "aclrtlaunch_ascendc_dup_by_rows_fp16.h"
|
||||
#include "aclrtlaunch_ascendc_dup_by_rows_fp32.h"
|
||||
#include "aclrtlaunch_ascendc_dup_by_rows_fp32_to_fp16.h"
|
||||
#include "aclrtlaunch_ascendc_dup_by_rows_fp16_to_fp32.h"
|
||||
|
||||
#endif // ASCENDC_KERNELS_H
|
236
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/dup.cpp
vendored
Normal file
236
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/dup.cpp
vendored
Normal file
|
@ -0,0 +1,236 @@
|
|||
#include "kernel_operator.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
using namespace AscendC;
|
||||
|
||||
#define BUFFER_NUM 2
|
||||
const int64_t SUPPORTED_MAX_DIM = 65535; // currently the limit of max block dim supportted by dup kernel is 65535template <typename SRC_T, typename DST_T>
|
||||
|
||||
template <typename SRC_T, typename DST_T>
|
||||
class DupByRows {
|
||||
public:
|
||||
__aicore__ inline DupByRows() {}
|
||||
__aicore__ inline void init(GM_ADDR src, GM_ADDR dst, int64_t *input_ne_ub,
|
||||
size_t *input_nb_ub) {
|
||||
/* Dup by rows when src is contigous on first dimension and dst is
|
||||
contiguous, each kernel process one row.
|
||||
*/
|
||||
|
||||
// Input has four dims.
|
||||
int64_t op_block_num = GetBlockNum();
|
||||
int64_t op_block_idx = GetBlockIdx();
|
||||
|
||||
// param
|
||||
num_rows = input_ne_ub[1] * input_ne_ub[2] * input_ne_ub[3];
|
||||
num_elem = input_ne_ub[0];
|
||||
|
||||
// index for (ne[1], ne[2], ne[3]): (idx_ne1, idx_ne2, idx_ne3)
|
||||
idx_ne3 = op_block_idx / (input_ne_ub[1] * input_ne_ub[2]);
|
||||
idx_ne2 = (op_block_idx - idx_ne3 * (input_ne_ub[1] * input_ne_ub[2]))
|
||||
/ (input_ne_ub[1]);
|
||||
idx_ne1 = op_block_idx - idx_ne3 * (input_ne_ub[1] * input_ne_ub[2])
|
||||
- idx_ne2 * input_ne_ub[1];
|
||||
|
||||
// src may not contiguous in dim [1,2,3], so stride decited by ne&nb
|
||||
src_stride = input_nb_ub[3] * idx_ne3 + input_nb_ub[2] * idx_ne2
|
||||
+ input_nb_ub[1] * idx_ne1;
|
||||
|
||||
// dst is contiguous
|
||||
dst_stride = op_block_idx * (input_ne_ub[0] * sizeof(DST_T));
|
||||
|
||||
src_gm.SetGlobalBuffer(reinterpret_cast<__gm__ SRC_T *>(src +
|
||||
src_stride));
|
||||
dst_gm.SetGlobalBuffer(reinterpret_cast<__gm__ DST_T *>(dst +
|
||||
dst_stride));
|
||||
|
||||
pipe.InitBuffer(src_queue, BUFFER_NUM, (sizeof(SRC_T) * num_elem +
|
||||
32 - 1) / 32 * 32);
|
||||
pipe.InitBuffer(dst_queue, BUFFER_NUM, (sizeof(DST_T) * num_elem +
|
||||
32 - 1) / 32 * 32);
|
||||
}
|
||||
|
||||
__aicore__ inline void copy_in() {
|
||||
LocalTensor<SRC_T> src_local = src_queue.AllocTensor<SRC_T>();
|
||||
const size_t elem_per_block = 32 / sizeof(SRC_T);
|
||||
size_t tail = num_elem % elem_per_block;
|
||||
size_t cpy_elements_len = tail > 0 ? num_elem + 1 : num_elem;
|
||||
DataCopy(src_local, src_gm, cpy_elements_len);
|
||||
src_queue.EnQue(src_local);
|
||||
}
|
||||
|
||||
__aicore__ inline void copy_out() {
|
||||
LocalTensor<DST_T> dst_local = dst_queue.DeQue<DST_T>();
|
||||
#ifdef ASCEND_310P
|
||||
const size_t elem_per_block = 32 / sizeof(DST_T);
|
||||
size_t tail = num_elem % elem_per_block;
|
||||
size_t len = num_elem & ~(elem_per_block - 1);
|
||||
if (len > 0) {
|
||||
DataCopy(dst_gm, dst_local, len);
|
||||
}
|
||||
if(tail != 0) {
|
||||
for (size_t i = tail; i < elem_per_block; i++) {
|
||||
dst_local[len + i].SetValue(0, 0);
|
||||
}
|
||||
SetAtomicAdd<float>();
|
||||
DataCopy(dst_gm[len], dst_local[len], elem_per_block);
|
||||
SetAtomicNone();
|
||||
}
|
||||
#else
|
||||
DataCopyExtParams dataCopyParams;
|
||||
dataCopyParams.blockCount = 1;
|
||||
dataCopyParams.blockLen = num_elem * sizeof(DST_T);
|
||||
DataCopyPad(dst_gm, dst_local, dataCopyParams);
|
||||
#endif
|
||||
dst_queue.FreeTensor(dst_local);
|
||||
}
|
||||
|
||||
__aicore__ inline void dup() {
|
||||
// main process, copy one row data from src to dst.
|
||||
copy_in();
|
||||
|
||||
LocalTensor<SRC_T> src_local = src_queue.DeQue<SRC_T>();
|
||||
LocalTensor<DST_T> dst_local = dst_queue.AllocTensor<DST_T>();
|
||||
|
||||
int32_t BLOCK_NUM = 32 / sizeof(DST_T);
|
||||
DataCopy(dst_local, src_local, (num_elem + BLOCK_NUM - 1)
|
||||
/ BLOCK_NUM * BLOCK_NUM);
|
||||
dst_queue.EnQue<DST_T>(dst_local);
|
||||
|
||||
src_queue.FreeTensor(src_local);
|
||||
copy_out();
|
||||
}
|
||||
|
||||
__aicore__ inline void dup_with_cast() {
|
||||
// main process, copy one row data from src to dst.
|
||||
// cast dtype from src to dst.
|
||||
copy_in();
|
||||
|
||||
LocalTensor<SRC_T> src_local = src_queue.DeQue<SRC_T>();
|
||||
LocalTensor<DST_T> dst_local = dst_queue.AllocTensor<DST_T>();
|
||||
|
||||
Cast(dst_local, src_local, RoundMode::CAST_NONE, num_elem);
|
||||
dst_queue.EnQue<DST_T>(dst_local);
|
||||
|
||||
src_queue.FreeTensor(src_local);
|
||||
copy_out();
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
TPipe pipe;
|
||||
GlobalTensor<SRC_T> src_gm;
|
||||
GlobalTensor<DST_T> dst_gm;
|
||||
|
||||
int64_t num_rows;
|
||||
int64_t num_elem;
|
||||
int64_t idx_ne3;
|
||||
int64_t idx_ne2;
|
||||
int64_t idx_ne1;
|
||||
int64_t src_stride;
|
||||
int64_t dst_stride;
|
||||
|
||||
TQue<QuePosition::VECIN, BUFFER_NUM> src_queue;
|
||||
TQue<QuePosition::VECOUT, BUFFER_NUM> dst_queue;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
|
||||
auto gm_ptr = (__gm__ uint8_t *)gm;
|
||||
auto ub_ptr = (uint8_t *)(ub);
|
||||
for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
|
||||
*ub_ptr = *gm_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp16(
|
||||
GM_ADDR src_gm,
|
||||
GM_ADDR dst_gm,
|
||||
GM_ADDR input_ne_gm,
|
||||
GM_ADDR input_nb_gm,
|
||||
GM_ADDR output_ne_gm,
|
||||
GM_ADDR output_nb_gm) {
|
||||
|
||||
int64_t input_ne_ub[4];
|
||||
size_t input_nb_ub[4];
|
||||
int64_t output_ne_ub[4];
|
||||
size_t output_nb_ub[4];
|
||||
|
||||
copy_to_ub(input_ne_gm, input_ne_ub, 32);
|
||||
copy_to_ub(input_nb_gm, input_nb_ub, 32);
|
||||
copy_to_ub(output_ne_gm, output_ne_ub, 32);
|
||||
copy_to_ub(output_nb_gm, output_nb_ub, 32);
|
||||
|
||||
DupByRows<half, half> op;
|
||||
op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub);
|
||||
op.dup();
|
||||
}
|
||||
|
||||
extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp32(
|
||||
GM_ADDR src_gm,
|
||||
GM_ADDR dst_gm,
|
||||
GM_ADDR input_ne_gm,
|
||||
GM_ADDR input_nb_gm,
|
||||
GM_ADDR output_ne_gm,
|
||||
GM_ADDR output_nb_gm) {
|
||||
int64_t input_ne_ub[4];
|
||||
size_t input_nb_ub[4];
|
||||
int64_t output_ne_ub[4];
|
||||
size_t output_nb_ub[4];
|
||||
|
||||
copy_to_ub(input_ne_gm, input_ne_ub, 32);
|
||||
copy_to_ub(input_nb_gm, input_nb_ub, 32);
|
||||
copy_to_ub(output_ne_gm, output_ne_ub, 32);
|
||||
copy_to_ub(output_nb_gm, output_nb_ub, 32);
|
||||
|
||||
DupByRows<float_t, float_t> op;
|
||||
op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub);
|
||||
op.dup();
|
||||
}
|
||||
|
||||
extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp32_to_fp16(
|
||||
GM_ADDR src_gm,
|
||||
GM_ADDR dst_gm,
|
||||
GM_ADDR input_ne_gm,
|
||||
GM_ADDR input_nb_gm,
|
||||
GM_ADDR output_ne_gm,
|
||||
GM_ADDR output_nb_gm) {
|
||||
|
||||
int64_t input_ne_ub[4];
|
||||
size_t input_nb_ub[4];
|
||||
int64_t output_ne_ub[4];
|
||||
size_t output_nb_ub[4];
|
||||
|
||||
copy_to_ub(input_ne_gm, input_ne_ub, 32);
|
||||
copy_to_ub(input_nb_gm, input_nb_ub, 32);
|
||||
copy_to_ub(output_ne_gm, output_ne_ub, 32);
|
||||
copy_to_ub(output_nb_gm, output_nb_ub, 32);
|
||||
|
||||
DupByRows<float_t, half> op;
|
||||
op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub);
|
||||
op.dup_with_cast();
|
||||
}
|
||||
|
||||
extern "C" __global__ __aicore__ void ascendc_dup_by_rows_fp16_to_fp32(
|
||||
GM_ADDR src_gm,
|
||||
GM_ADDR dst_gm,
|
||||
GM_ADDR input_ne_gm,
|
||||
GM_ADDR input_nb_gm,
|
||||
GM_ADDR output_ne_gm,
|
||||
GM_ADDR output_nb_gm) {
|
||||
|
||||
// copy params from gm to ub.
|
||||
int64_t input_ne_ub[4];
|
||||
size_t input_nb_ub[4];
|
||||
int64_t output_ne_ub[4];
|
||||
size_t output_nb_ub[4];
|
||||
|
||||
copy_to_ub(input_ne_gm, input_ne_ub, 32);
|
||||
copy_to_ub(input_nb_gm, input_nb_ub, 32);
|
||||
copy_to_ub(output_ne_gm, output_ne_ub, 32);
|
||||
copy_to_ub(output_nb_gm, output_nb_ub, 32);
|
||||
|
||||
DupByRows<half, float_t> op;
|
||||
op.init(src_gm, dst_gm, input_ne_ub, input_nb_ub);
|
||||
op.dup_with_cast();
|
||||
}
|
197
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp
vendored
Normal file
197
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp
vendored
Normal file
|
@ -0,0 +1,197 @@
|
|||
#include "kernel_operator.h"
|
||||
|
||||
// optimize me. Use template to avoid copy code.
|
||||
using namespace AscendC;
|
||||
|
||||
#define BUFFER_NUM 2
|
||||
|
||||
class GET_ROW_F16 {
|
||||
public:
|
||||
__aicore__ inline GET_ROW_F16() {}
|
||||
__aicore__ inline void init(GM_ADDR input, GM_ADDR indices, GM_ADDR output,
|
||||
int64_t *input_ne_ub, size_t *input_nb_ub,
|
||||
int64_t *indices_ne_ub, size_t *indices_nb_ub,
|
||||
int64_t *output_ne_ub, size_t *output_nb_ub) {
|
||||
// TODO, use template for F16/f32
|
||||
int64_t op_block_num = GetBlockNum();
|
||||
op_block_idx = GetBlockIdx();
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
input_ne[i] = input_ne_ub[i];
|
||||
input_stride[i] = input_nb_ub[i] / input_nb_ub[0];
|
||||
|
||||
indices_ne[i] = indices_ne_ub[i];
|
||||
indices_stride[i] = indices_nb_ub[i] / indices_nb_ub[0];
|
||||
|
||||
output_ne[i] = output_ne_ub[i];
|
||||
output_stride[i] = output_nb_ub[i] / output_nb_ub[0];
|
||||
}
|
||||
|
||||
// Indices has two dims. n_elements = all rows should get.
|
||||
// dr, all rows should this thread get.
|
||||
uint64_t n_elements =
|
||||
indices_ne[0] * indices_ne[1] * indices_ne[2] * indices_ne[3];
|
||||
dr = n_elements / op_block_num;
|
||||
|
||||
uint64_t tails = n_elements % op_block_num;
|
||||
if (op_block_idx < tails) {
|
||||
dr += 1;
|
||||
ir = dr * op_block_idx;
|
||||
} else {
|
||||
ir = dr * op_block_idx + tails;
|
||||
}
|
||||
|
||||
input_gm.SetGlobalBuffer((__gm__ half *)input);
|
||||
indices_gm.SetGlobalBuffer((__gm__ int32_t *)indices);
|
||||
output_gm.SetGlobalBuffer((__gm__ float *)output);
|
||||
|
||||
uint64_t input_local_buffer_size = ((input_ne[0] * sizeof(half) + 31)
|
||||
& ~31);
|
||||
uint64_t output_local_buffer_size = ((input_ne[0] * sizeof(float) + 31)
|
||||
& ~31);
|
||||
|
||||
local_buffer_elems = input_local_buffer_size / sizeof(half);
|
||||
|
||||
// TODO, consider long row that can't put in UB.
|
||||
// All data should asign to 32. It's ok because all data is align to 32.
|
||||
pipe.InitBuffer(input_queue, BUFFER_NUM, input_local_buffer_size);
|
||||
pipe.InitBuffer(output_queue, BUFFER_NUM, output_local_buffer_size);
|
||||
}
|
||||
|
||||
__aicore__ inline void copy_in(uint32_t offset, size_t len) {
|
||||
size_t origin_len = len;
|
||||
LocalTensor<half> input_local = input_queue.AllocTensor<half>();
|
||||
const size_t elem_per_block = 32 / sizeof(half);
|
||||
size_t tail = len % elem_per_block;
|
||||
len = len & ~(elem_per_block - 1);
|
||||
if(tail != 0) {
|
||||
len += elem_per_block;
|
||||
}
|
||||
DataCopy(input_local, input_gm[offset], len);
|
||||
input_queue.EnQue(input_local);
|
||||
}
|
||||
|
||||
__aicore__ inline void copy_out(uint32_t offset, size_t len) {
|
||||
LocalTensor<float> output_local = output_queue.DeQue<float>();
|
||||
const size_t elem_per_block = 32 / sizeof(float);
|
||||
size_t tail = len % elem_per_block;
|
||||
len = len & ~(elem_per_block - 1);
|
||||
if (len > 0) {
|
||||
DataCopy(output_gm[offset], output_local, len);
|
||||
}
|
||||
|
||||
if(tail != 0) {
|
||||
#ifdef ASCEND_310P
|
||||
for (size_t i = tail; i < elem_per_block; i++) {
|
||||
output_local[len + i].SetValue(0, 0);
|
||||
}
|
||||
SetAtomicAdd<float>();
|
||||
DataCopy(output_gm[offset + len], output_local[len], elem_per_block);
|
||||
SetAtomicNone();
|
||||
#else
|
||||
DataCopyExtParams dataCopyParams;
|
||||
dataCopyParams.blockCount = 1;
|
||||
dataCopyParams.blockLen = tail * sizeof(float);
|
||||
DataCopyPad(output_gm[offset + len], output_local[len],
|
||||
dataCopyParams);
|
||||
#endif
|
||||
}
|
||||
output_queue.FreeTensor(output_local);
|
||||
}
|
||||
|
||||
__aicore__ inline void calculate_row(int64_t idx) {
|
||||
const int64_t indices_ne2_idx = idx / (indices_ne[0] * indices_ne[1]);
|
||||
const int64_t indices_ne1_idx =
|
||||
(idx - indices_ne2_idx * indices_ne[0] * indices_ne[1]) /
|
||||
indices_ne[0];
|
||||
const int64_t indices_ne0_idx =
|
||||
(idx - indices_ne2_idx * indices_ne[0] * indices_ne[1] -
|
||||
indices_ne1_idx * indices_ne[0]);
|
||||
|
||||
const int64_t indices_offset = indices_ne0_idx * indices_stride[0] +
|
||||
indices_ne1_idx * indices_stride[1] +
|
||||
indices_ne2_idx * indices_stride[2];
|
||||
const int32_t selected_row_idx = indices_gm.GetValue(indices_offset);
|
||||
|
||||
const int64_t input_offset = selected_row_idx * input_stride[1] +
|
||||
indices_ne1_idx * input_stride[2] +
|
||||
indices_ne2_idx * input_stride[3];
|
||||
|
||||
const int64_t output_offset = indices_ne0_idx * output_stride[1] +
|
||||
indices_ne1_idx * output_stride[2] +
|
||||
indices_ne2_idx * output_stride[3];
|
||||
|
||||
copy_in(input_offset, input_ne[0]);
|
||||
LocalTensor<half> input_local = input_queue.DeQue<half>();
|
||||
LocalTensor<float> output_local = output_queue.AllocTensor<float>();
|
||||
|
||||
Cast(output_local, input_local, RoundMode::CAST_NONE,
|
||||
local_buffer_elems);
|
||||
output_queue.EnQue(output_local);
|
||||
copy_out(output_offset, input_ne[0]);
|
||||
|
||||
input_queue.FreeTensor(input_local);
|
||||
}
|
||||
|
||||
__aicore__ inline void calculate() {
|
||||
for (int64_t i = ir; i < ir + dr; i++) {
|
||||
calculate_row(i);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t input_ne[4];
|
||||
size_t input_stride[4];
|
||||
|
||||
int64_t indices_ne[4];
|
||||
size_t indices_stride[4];
|
||||
|
||||
int64_t output_ne[4];
|
||||
size_t output_stride[4];
|
||||
|
||||
size_t local_buffer_elems;
|
||||
|
||||
int64_t ir;
|
||||
int64_t dr;
|
||||
|
||||
TPipe pipe;
|
||||
GlobalTensor<half> input_gm;
|
||||
GlobalTensor<int32_t> indices_gm;
|
||||
GlobalTensor<float> output_gm;
|
||||
TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
|
||||
TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
|
||||
int64_t op_block_idx;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
|
||||
auto gm_ptr = (__gm__ uint8_t *)gm;
|
||||
auto ub_ptr = (uint8_t *)(ub);
|
||||
for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
|
||||
*ub_ptr = *gm_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ __aicore__ void ascendc_get_row_f16(
|
||||
GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm,
|
||||
GM_ADDR input_ne_gm, GM_ADDR input_nb_gm, GM_ADDR indices_ne_gm,
|
||||
GM_ADDR indices_nb_gm, GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) {
|
||||
int64_t input_ne_ub[4];
|
||||
size_t input_nb_ub[4];
|
||||
int64_t indices_ne_ub[4];
|
||||
size_t indices_nb_ub[4];
|
||||
int64_t output_ne_ub[4];
|
||||
size_t output_nb_ub[4];
|
||||
|
||||
copy_to_ub(input_ne_gm, input_ne_ub, 32);
|
||||
copy_to_ub(input_nb_gm, input_nb_ub, 32);
|
||||
copy_to_ub(indices_ne_gm, indices_ne_ub, 32);
|
||||
copy_to_ub(indices_nb_gm, indices_nb_ub, 32);
|
||||
copy_to_ub(output_ne_gm, output_ne_ub, 32);
|
||||
copy_to_ub(output_nb_gm, output_nb_ub, 32);
|
||||
|
||||
GET_ROW_F16 op;
|
||||
op.init(input_gm, indices_gm, output_gm, input_ne_ub, input_nb_ub,
|
||||
indices_ne_ub, indices_nb_ub, output_ne_ub, output_nb_ub);
|
||||
op.calculate();
|
||||
}
|
190
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp
vendored
Normal file
190
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp
vendored
Normal file
|
@ -0,0 +1,190 @@
|
|||
#include "kernel_operator.h"
|
||||
|
||||
// optimize me. Use template to avoid copy code.
|
||||
using namespace AscendC;
|
||||
|
||||
#define BUFFER_NUM 2
|
||||
|
||||
class GET_ROW_F32 {
|
||||
public:
|
||||
__aicore__ inline GET_ROW_F32() {}
|
||||
__aicore__ inline void init(GM_ADDR input, GM_ADDR indices, GM_ADDR output,
|
||||
int64_t *input_ne_ub, size_t *input_nb_ub,
|
||||
int64_t *indices_ne_ub, size_t *indices_nb_ub,
|
||||
int64_t *output_ne_ub, size_t *output_nb_ub) {
|
||||
int64_t op_block_num = GetBlockNum();
|
||||
op_block_idx = GetBlockIdx();
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
input_ne[i] = input_ne_ub[i];
|
||||
input_stride[i] = input_nb_ub[i] / input_nb_ub[0];
|
||||
|
||||
indices_ne[i] = indices_ne_ub[i];
|
||||
indices_stride[i] = indices_nb_ub[i] / indices_nb_ub[0];
|
||||
|
||||
output_ne[i] = output_ne_ub[i];
|
||||
output_stride[i] = output_nb_ub[i] / output_nb_ub[0];
|
||||
}
|
||||
|
||||
// Indices has two dims. n_elements = all rows should get.
|
||||
// dr, all rows should this thread get.
|
||||
uint64_t n_elements =
|
||||
indices_ne[0] * indices_ne[1] * indices_ne[2] * indices_ne[3];
|
||||
dr = n_elements / op_block_num;
|
||||
|
||||
uint64_t tails = n_elements % op_block_num;
|
||||
if (op_block_idx < tails) {
|
||||
dr += 1;
|
||||
ir = dr * op_block_idx;
|
||||
} else {
|
||||
ir = dr * op_block_idx + tails;
|
||||
}
|
||||
|
||||
input_gm.SetGlobalBuffer((__gm__ float *)input);
|
||||
indices_gm.SetGlobalBuffer((__gm__ int32_t *)indices);
|
||||
output_gm.SetGlobalBuffer((__gm__ float *)output);
|
||||
|
||||
uint64_t local_buffer_size = ((input_ne[0] * sizeof(float) + 31) & ~31);
|
||||
local_buffer_elems = local_buffer_size / sizeof(float);
|
||||
|
||||
// TODO, consider long row that can't put in UB.
|
||||
// All data should asign to 32. It's ok because all data is align to 32.
|
||||
pipe.InitBuffer(input_queue, BUFFER_NUM, local_buffer_size);
|
||||
pipe.InitBuffer(output_queue, BUFFER_NUM, local_buffer_size);
|
||||
}
|
||||
|
||||
__aicore__ inline void copy_in(uint32_t offset, size_t len) {
|
||||
LocalTensor<float> input_local = input_queue.AllocTensor<float>();
|
||||
const size_t elem_per_block = 32 / sizeof(float);
|
||||
size_t tail = len % elem_per_block;
|
||||
len = len & ~(elem_per_block - 1);
|
||||
if(tail != 0) {
|
||||
len += elem_per_block;
|
||||
}
|
||||
DataCopy(input_local, input_gm[offset], len);
|
||||
input_queue.EnQue(input_local);
|
||||
}
|
||||
|
||||
__aicore__ inline void copy_out(uint32_t offset, size_t len) {
|
||||
LocalTensor<float> output_local = output_queue.DeQue<float>();
|
||||
const size_t elem_per_block = 32 / sizeof(float);
|
||||
size_t tail = len % elem_per_block;
|
||||
len = len & ~(elem_per_block - 1);
|
||||
if (len > 0) {
|
||||
DataCopy(output_gm[offset], output_local, len);
|
||||
}
|
||||
|
||||
if(tail != 0) {
|
||||
#ifdef ASCEND_310P
|
||||
for (size_t i = tail; i < elem_per_block; i++) {
|
||||
output_local[len + i].SetValue(0, 0);
|
||||
}
|
||||
SetAtomicAdd<float>();
|
||||
DataCopy(output_gm[offset + len], output_local[len], elem_per_block);
|
||||
SetAtomicNone();
|
||||
#else
|
||||
DataCopyExtParams dataCopyParams;
|
||||
dataCopyParams.blockCount = 1;
|
||||
dataCopyParams.blockLen = tail * sizeof(float);
|
||||
DataCopyPad(output_gm[offset + len], output_local[len],
|
||||
dataCopyParams);
|
||||
#endif
|
||||
}
|
||||
output_queue.FreeTensor(output_local);
|
||||
}
|
||||
|
||||
__aicore__ inline void calculate_row(int64_t idx) {
|
||||
const int64_t indices_ne2_idx = idx / (indices_ne[0] * indices_ne[1]);
|
||||
const int64_t indices_ne1_idx =
|
||||
(idx - indices_ne2_idx * indices_ne[0] * indices_ne[1]) /
|
||||
indices_ne[0];
|
||||
const int64_t indices_ne0_idx =
|
||||
(idx - indices_ne2_idx * indices_ne[0] * indices_ne[1] -
|
||||
indices_ne1_idx * indices_ne[0]);
|
||||
|
||||
const int64_t indices_offset = indices_ne0_idx * indices_stride[0] +
|
||||
indices_ne1_idx * indices_stride[1] +
|
||||
indices_ne2_idx * indices_stride[2];
|
||||
const int32_t selected_row_idx = indices_gm.GetValue(indices_offset);
|
||||
|
||||
const int64_t input_offset = selected_row_idx * input_stride[1] +
|
||||
indices_ne1_idx * input_stride[2] +
|
||||
indices_ne2_idx * input_stride[3];
|
||||
|
||||
const int64_t output_offset = indices_ne0_idx * output_stride[1] +
|
||||
indices_ne1_idx * output_stride[2] +
|
||||
indices_ne2_idx * output_stride[3];
|
||||
|
||||
copy_in(input_offset, input_ne[0]);
|
||||
LocalTensor<float> input_local = input_queue.DeQue<float>();
|
||||
LocalTensor<float> output_local = output_queue.AllocTensor<float>();
|
||||
|
||||
DataCopy(output_local, input_local, local_buffer_elems);
|
||||
output_queue.EnQue(output_local);
|
||||
copy_out(output_offset, input_ne[0]);
|
||||
|
||||
input_queue.FreeTensor(input_local);
|
||||
}
|
||||
|
||||
__aicore__ inline void calculate() {
|
||||
for (int64_t i = ir; i < ir + dr; i++) {
|
||||
calculate_row(i);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t input_ne[4];
|
||||
size_t input_stride[4];
|
||||
|
||||
int64_t indices_ne[4];
|
||||
size_t indices_stride[4];
|
||||
|
||||
int64_t output_ne[4];
|
||||
size_t output_stride[4];
|
||||
|
||||
size_t local_buffer_elems;
|
||||
|
||||
int64_t ir;
|
||||
int64_t dr;
|
||||
|
||||
TPipe pipe;
|
||||
GlobalTensor<float> input_gm;
|
||||
GlobalTensor<int32_t> indices_gm;
|
||||
GlobalTensor<float> output_gm;
|
||||
TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
|
||||
TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
|
||||
int64_t op_block_idx;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
|
||||
auto gm_ptr = (__gm__ uint8_t *)gm;
|
||||
auto ub_ptr = (uint8_t *)(ub);
|
||||
for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
|
||||
*ub_ptr = *gm_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ __aicore__ void ascendc_get_row_f32(
|
||||
GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm,
|
||||
GM_ADDR input_ne_gm, GM_ADDR input_nb_gm, GM_ADDR indices_ne_gm,
|
||||
GM_ADDR indices_nb_gm, GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) {
|
||||
int64_t input_ne_ub[4];
|
||||
size_t input_nb_ub[4];
|
||||
int64_t indices_ne_ub[4];
|
||||
size_t indices_nb_ub[4];
|
||||
int64_t output_ne_ub[4];
|
||||
size_t output_nb_ub[4];
|
||||
|
||||
copy_to_ub(input_ne_gm, input_ne_ub, 32);
|
||||
copy_to_ub(input_nb_gm, input_nb_ub, 32);
|
||||
copy_to_ub(indices_ne_gm, indices_ne_ub, 32);
|
||||
copy_to_ub(indices_nb_gm, indices_nb_ub, 32);
|
||||
copy_to_ub(output_ne_gm, output_ne_ub, 32);
|
||||
copy_to_ub(output_nb_gm, output_nb_ub, 32);
|
||||
|
||||
GET_ROW_F32 op;
|
||||
op.init(input_gm, indices_gm, output_gm, input_ne_ub, input_nb_ub,
|
||||
indices_ne_ub, indices_nb_ub, output_ne_ub, output_nb_ub);
|
||||
op.calculate();
|
||||
}
|
204
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp
vendored
Normal file
204
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp
vendored
Normal file
|
@ -0,0 +1,204 @@
|
|||
#include "kernel_operator.h"
|
||||
|
||||
// optimize me. Use template to avoid copy code.
|
||||
using namespace AscendC;
|
||||
#ifdef ASCEND_310P // 310P not support 4bit get row
|
||||
extern "C" __global__ __aicore__ void ascendc_get_row_q4_0(
|
||||
GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm,
|
||||
GM_ADDR input_ne_gm, GM_ADDR indices_ne_gm, GM_ADDR indices_nb_gm,
|
||||
GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) {
|
||||
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
|
||||
printf("Ascend310P not support 4bit get row.\n");
|
||||
}
|
||||
#else
|
||||
|
||||
#define BUFFER_NUM 2
|
||||
|
||||
#define QK4_0 32
|
||||
|
||||
class GET_ROW_Q4_0 {
|
||||
public:
|
||||
__aicore__ inline GET_ROW_Q4_0() {}
|
||||
__aicore__ inline void init(GM_ADDR input, GM_ADDR indices, GM_ADDR output,
|
||||
int64_t *input_ne_ub, int64_t *indices_ne_ub,
|
||||
size_t *indices_nb_ub, int64_t *output_ne_ub,
|
||||
size_t *output_nb_ub) {
|
||||
int64_t op_block_num = GetBlockNum();
|
||||
int64_t op_block_idx = GetBlockIdx();
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
input_ne[i] = input_ne_ub[i];
|
||||
indices_ne[i] = indices_ne_ub[i];
|
||||
indices_stride[i] = indices_nb_ub[i] / indices_nb_ub[0];
|
||||
scale_ne[i] = input_ne_ub[i];
|
||||
output_ne[i] = output_ne_ub[i];
|
||||
output_stride[i] = output_nb_ub[i] / output_nb_ub[0];
|
||||
}
|
||||
|
||||
// one scale for a group.
|
||||
scale_ne[0] /= QK4_0;
|
||||
|
||||
input_stride[0] = 1;
|
||||
scale_stride[0] = 1;
|
||||
output_stride[0] = 1;
|
||||
for (int i = 1; i < 4; i++) {
|
||||
input_stride[i] = input_stride[i - 1] * input_ne[i - 1];
|
||||
scale_stride[i] = scale_stride[i - 1] * scale_ne[i - 1];
|
||||
}
|
||||
|
||||
group_size_in_row = input_ne[0] / QK4_0;
|
||||
int64_t scale_offset = input_ne[0] * input_ne[1] * input_ne[2] *
|
||||
input_ne[3] / 2;
|
||||
|
||||
// Indices has two dims. n_elements = all rows should get.
|
||||
// dr, all rows should this thread get.
|
||||
uint64_t n_elements =
|
||||
indices_ne[0] * indices_ne[1] * indices_ne[2] * indices_ne[3];
|
||||
dr = n_elements / op_block_num;
|
||||
|
||||
uint64_t tails = n_elements % op_block_num;
|
||||
if (op_block_idx < tails) {
|
||||
dr += 1;
|
||||
ir = dr * op_block_idx;
|
||||
} else {
|
||||
ir = dr * op_block_idx + tails;
|
||||
}
|
||||
|
||||
input_gm.SetGlobalBuffer((__gm__ int4b_t *)input);
|
||||
scale_gm.SetGlobalBuffer((__gm__ half *)(input + scale_offset));
|
||||
indices_gm.SetGlobalBuffer((__gm__ int32_t *)indices);
|
||||
output_gm.SetGlobalBuffer((__gm__ float *)output);
|
||||
|
||||
pipe.InitBuffer(input_queue, BUFFER_NUM, QK4_0 * sizeof(int4b_t));
|
||||
pipe.InitBuffer(cast_queue, BUFFER_NUM, QK4_0 * sizeof(half));
|
||||
pipe.InitBuffer(output_queue, BUFFER_NUM, QK4_0 * sizeof(float));
|
||||
}
|
||||
|
||||
__aicore__ inline void copy_in(uint32_t offset) {
|
||||
LocalTensor<int4b_t> input_local = input_queue.AllocTensor<int4b_t>();
|
||||
// 32 * sizeof(int4b_t) = 16, which is not aligned to 32, why no error?
|
||||
DataCopy(input_local, input_gm[offset], QK4_0);
|
||||
input_queue.EnQue(input_local);
|
||||
}
|
||||
|
||||
__aicore__ inline void copy_out(uint32_t offset) {
|
||||
LocalTensor<float> output_local = output_queue.DeQue<float>();
|
||||
DataCopy(output_gm[offset], output_local, QK4_0);
|
||||
output_queue.FreeTensor(output_local);
|
||||
}
|
||||
|
||||
__aicore__ inline void calculate_group(int64_t idx, int64_t group) {
|
||||
const int64_t indices_ne2_idx = idx / (indices_ne[0] * indices_ne[1]);
|
||||
const int64_t indices_ne1_idx =
|
||||
(idx - indices_ne2_idx * indices_ne[0] * indices_ne[1]) /
|
||||
indices_ne[0];
|
||||
const int64_t indices_ne0_idx =
|
||||
(idx - indices_ne2_idx * indices_ne[0] * indices_ne[1] -
|
||||
indices_ne1_idx * indices_ne[0]);
|
||||
|
||||
const int64_t indices_offset = indices_ne0_idx * indices_stride[0] +
|
||||
indices_ne1_idx * indices_stride[1] +
|
||||
indices_ne2_idx * indices_stride[2];
|
||||
const int32_t selected_row_idx = indices_gm.GetValue(indices_offset);
|
||||
|
||||
const int64_t input_offset = selected_row_idx * input_stride[1] +
|
||||
indices_ne1_idx * input_stride[2] +
|
||||
indices_ne2_idx * input_stride[3] +
|
||||
group * QK4_0;
|
||||
const int64_t scale_offset = selected_row_idx * scale_stride[1] +
|
||||
indices_ne1_idx * scale_stride[2] +
|
||||
indices_ne2_idx * scale_stride[3] + group;
|
||||
const int64_t output_offset = indices_ne0_idx * output_stride[1] +
|
||||
indices_ne1_idx * output_stride[2] +
|
||||
indices_ne2_idx * output_stride[3] +
|
||||
group * QK4_0;
|
||||
|
||||
copy_in(input_offset);
|
||||
LocalTensor<int4b_t> input_local = input_queue.DeQue<int4b_t>();
|
||||
LocalTensor<half> cast_local = cast_queue.AllocTensor<half>();
|
||||
LocalTensor<float> output_local = output_queue.AllocTensor<float>();
|
||||
|
||||
// TODO: cast more data to speed up.
|
||||
Cast(cast_local, input_local, RoundMode::CAST_NONE, QK4_0);
|
||||
Cast(output_local, cast_local, RoundMode::CAST_NONE, QK4_0);
|
||||
|
||||
// Only mul need compile by group.
|
||||
half scale = scale_gm.GetValue(scale_offset);
|
||||
|
||||
Muls(output_local, output_local, (float)scale, QK4_0);
|
||||
|
||||
input_queue.FreeTensor(input_local);
|
||||
cast_queue.FreeTensor(cast_local);
|
||||
output_queue.EnQue(output_local);
|
||||
|
||||
copy_out(output_offset);
|
||||
}
|
||||
|
||||
__aicore__ inline void calculate() {
|
||||
for (int64_t i = ir; i < ir + dr; i++) {
|
||||
for (int64_t j = 0; j < group_size_in_row; j++) {
|
||||
calculate_group(i, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t input_ne[4];
|
||||
size_t input_stride[4];
|
||||
|
||||
int64_t scale_ne[4];
|
||||
size_t scale_stride[4];
|
||||
|
||||
int64_t indices_ne[4];
|
||||
size_t indices_stride[4];
|
||||
|
||||
int64_t output_ne[4];
|
||||
size_t output_stride[4];
|
||||
|
||||
int64_t ir;
|
||||
int64_t dr;
|
||||
|
||||
int64_t group_size_in_row;
|
||||
|
||||
TPipe pipe;
|
||||
GlobalTensor<int4b_t> input_gm;
|
||||
GlobalTensor<half> scale_gm;
|
||||
GlobalTensor<int32_t> indices_gm;
|
||||
GlobalTensor<float> output_gm;
|
||||
TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
|
||||
TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
|
||||
TQue<QuePosition::VECIN, BUFFER_NUM> cast_queue;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
|
||||
auto gm_ptr = (__gm__ uint8_t *)gm;
|
||||
auto ub_ptr = (uint8_t *)(ub);
|
||||
for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
|
||||
*ub_ptr = *gm_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ __aicore__ void ascendc_get_row_q4_0(
|
||||
GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm,
|
||||
GM_ADDR input_ne_gm, GM_ADDR indices_ne_gm, GM_ADDR indices_nb_gm,
|
||||
GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) {
|
||||
int64_t input_ne_ub[4];
|
||||
int64_t indices_ne_ub[4];
|
||||
size_t indices_nb_ub[4];
|
||||
int64_t output_ne_ub[4];
|
||||
size_t output_nb_ub[4];
|
||||
|
||||
copy_to_ub(input_ne_gm, input_ne_ub, 32);
|
||||
copy_to_ub(indices_ne_gm, indices_ne_ub, 32);
|
||||
copy_to_ub(indices_nb_gm, indices_nb_ub, 32);
|
||||
copy_to_ub(output_ne_gm, output_ne_ub, 32);
|
||||
copy_to_ub(output_nb_gm, output_nb_ub, 32);
|
||||
|
||||
GET_ROW_Q4_0 op;
|
||||
op.init(input_gm, indices_gm, output_gm, input_ne_ub, indices_ne_ub,
|
||||
indices_nb_ub, output_ne_ub, output_nb_ub);
|
||||
op.calculate();
|
||||
}
|
||||
|
||||
#endif // #ifdef ASCEND_310P
|
191
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp
vendored
Normal file
191
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp
vendored
Normal file
|
@ -0,0 +1,191 @@
|
|||
#include "kernel_operator.h"
|
||||
|
||||
// optimize me. Use template to avoid copy code.
|
||||
using namespace AscendC;
|
||||
|
||||
#define BUFFER_NUM 2
|
||||
|
||||
#define QK8_0 32
|
||||
|
||||
class GET_ROW_Q8_0 {
|
||||
public:
|
||||
__aicore__ inline GET_ROW_Q8_0() {}
|
||||
__aicore__ inline void init(GM_ADDR input, GM_ADDR indices, GM_ADDR output,
|
||||
int64_t *input_ne_ub, int64_t *indices_ne_ub,
|
||||
size_t *indices_nb_ub, int64_t *output_ne_ub,
|
||||
size_t *output_nb_ub) {
|
||||
int64_t op_block_num = GetBlockNum();
|
||||
int64_t op_block_idx = GetBlockIdx();
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
input_ne[i] = input_ne_ub[i];
|
||||
indices_ne[i] = indices_ne_ub[i];
|
||||
indices_stride[i] = indices_nb_ub[i] / indices_nb_ub[0];
|
||||
scale_ne[i] = input_ne_ub[i];
|
||||
output_ne[i] = output_ne_ub[i];
|
||||
output_stride[i] = output_nb_ub[i] / output_nb_ub[0];
|
||||
}
|
||||
|
||||
// one scale for a group.
|
||||
scale_ne[0] /= QK8_0;
|
||||
|
||||
input_stride[0] = 1;
|
||||
scale_stride[0] = 1;
|
||||
output_stride[0] = 1;
|
||||
for (int i = 1; i < 4; i++) {
|
||||
input_stride[i] = input_stride[i - 1] * input_ne[i - 1];
|
||||
scale_stride[i] = scale_stride[i - 1] * scale_ne[i - 1];
|
||||
}
|
||||
|
||||
group_size_in_row = input_ne[0] / QK8_0;
|
||||
int64_t scale_offset = input_ne[0] * input_ne[1] * input_ne[2] *
|
||||
input_ne[3] * sizeof(int8_t);
|
||||
|
||||
// Indices has two dims. n_elements = all rows should get.
|
||||
// dr, all rows should this thread get.
|
||||
uint64_t n_elements =
|
||||
indices_ne[0] * indices_ne[1] * indices_ne[2] * indices_ne[3];
|
||||
dr = n_elements / op_block_num;
|
||||
|
||||
uint64_t tails = n_elements % op_block_num;
|
||||
if (op_block_idx < tails) {
|
||||
dr += 1;
|
||||
ir = dr * op_block_idx;
|
||||
} else {
|
||||
ir = dr * op_block_idx + tails;
|
||||
}
|
||||
|
||||
input_gm.SetGlobalBuffer((__gm__ int8_t *)input);
|
||||
scale_gm.SetGlobalBuffer((__gm__ half *)(input + scale_offset));
|
||||
indices_gm.SetGlobalBuffer((__gm__ int32_t *)indices);
|
||||
output_gm.SetGlobalBuffer((__gm__ float *)output);
|
||||
|
||||
pipe.InitBuffer(input_queue, BUFFER_NUM, QK8_0 * sizeof(int8_t));
|
||||
pipe.InitBuffer(cast_queue, BUFFER_NUM, QK8_0 * sizeof(half));
|
||||
pipe.InitBuffer(output_queue, BUFFER_NUM, QK8_0 * sizeof(float));
|
||||
}
|
||||
|
||||
__aicore__ inline void copy_in(uint32_t offset) {
|
||||
LocalTensor<int8_t> input_local = input_queue.AllocTensor<int8_t>();
|
||||
DataCopy(input_local, input_gm[offset], QK8_0);
|
||||
input_queue.EnQue(input_local);
|
||||
}
|
||||
|
||||
__aicore__ inline void copy_out(uint32_t offset) {
|
||||
LocalTensor<float> output_local = output_queue.DeQue<float>();
|
||||
DataCopy(output_gm[offset], output_local, QK8_0);
|
||||
output_queue.FreeTensor(output_local);
|
||||
}
|
||||
|
||||
__aicore__ inline void calculate_group(int64_t idx, int64_t group) {
|
||||
const int64_t indices_ne2_idx = idx / (indices_ne[0] * indices_ne[1]);
|
||||
const int64_t indices_ne1_idx =
|
||||
(idx - indices_ne2_idx * indices_ne[0] * indices_ne[1]) /
|
||||
indices_ne[0];
|
||||
const int64_t indices_ne0_idx =
|
||||
(idx - indices_ne2_idx * indices_ne[0] * indices_ne[1] -
|
||||
indices_ne1_idx * indices_ne[0]);
|
||||
|
||||
const int64_t indices_offset = indices_ne0_idx * indices_stride[0] +
|
||||
indices_ne1_idx * indices_stride[1] +
|
||||
indices_ne2_idx * indices_stride[2];
|
||||
const int32_t selected_row_idx = indices_gm.GetValue(indices_offset);
|
||||
|
||||
const int64_t input_offset = selected_row_idx * input_stride[1] +
|
||||
indices_ne1_idx * input_stride[2] +
|
||||
indices_ne2_idx * input_stride[3] +
|
||||
group * QK8_0;
|
||||
const int64_t scale_offset = selected_row_idx * scale_stride[1] +
|
||||
indices_ne1_idx * scale_stride[2] +
|
||||
indices_ne2_idx * scale_stride[3] + group;
|
||||
const int64_t output_offset = indices_ne0_idx * output_stride[1] +
|
||||
indices_ne1_idx * output_stride[2] +
|
||||
indices_ne2_idx * output_stride[3] +
|
||||
group * QK8_0;
|
||||
|
||||
copy_in(input_offset);
|
||||
LocalTensor<int8_t> input_local = input_queue.DeQue<int8_t>();
|
||||
LocalTensor<half> cast_local = cast_queue.AllocTensor<half>();
|
||||
LocalTensor<float> output_local = output_queue.AllocTensor<float>();
|
||||
|
||||
// TODO: cast more data to speed up.
|
||||
Cast(cast_local, input_local, RoundMode::CAST_NONE, QK8_0);
|
||||
Cast(output_local, cast_local, RoundMode::CAST_NONE, QK8_0);
|
||||
|
||||
// Only mul need compile by group.
|
||||
half scale = scale_gm.GetValue(scale_offset);
|
||||
Muls(output_local, output_local, (float)scale, QK8_0);
|
||||
|
||||
input_queue.FreeTensor(input_local);
|
||||
cast_queue.FreeTensor(cast_local);
|
||||
output_queue.EnQue(output_local);
|
||||
|
||||
copy_out(output_offset);
|
||||
}
|
||||
|
||||
__aicore__ inline void calculate() {
|
||||
for (int64_t i = ir; i < ir + dr; i++) {
|
||||
for (int64_t j = 0; j < group_size_in_row; j++) {
|
||||
calculate_group(i, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t input_ne[4];
|
||||
size_t input_stride[4];
|
||||
|
||||
int64_t scale_ne[4];
|
||||
size_t scale_stride[4];
|
||||
|
||||
int64_t indices_ne[4];
|
||||
size_t indices_stride[4];
|
||||
|
||||
int64_t output_ne[4];
|
||||
size_t output_stride[4];
|
||||
|
||||
int64_t ir;
|
||||
int64_t dr;
|
||||
|
||||
int64_t group_size_in_row;
|
||||
|
||||
TPipe pipe;
|
||||
GlobalTensor<int8_t> input_gm;
|
||||
GlobalTensor<half> scale_gm;
|
||||
GlobalTensor<int32_t> indices_gm;
|
||||
GlobalTensor<float> output_gm;
|
||||
TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
|
||||
TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
|
||||
TQue<QuePosition::VECIN, BUFFER_NUM> cast_queue;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
|
||||
auto gm_ptr = (__gm__ uint8_t *)gm;
|
||||
auto ub_ptr = (uint8_t *)(ub);
|
||||
for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
|
||||
*ub_ptr = *gm_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ __aicore__ void ascendc_get_row_q8_0(
|
||||
GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm,
|
||||
GM_ADDR input_ne_gm, GM_ADDR indices_ne_gm, GM_ADDR indices_nb_gm,
|
||||
GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) {
|
||||
int64_t input_ne_ub[4];
|
||||
int64_t indices_ne_ub[4];
|
||||
size_t indices_nb_ub[4];
|
||||
int64_t output_ne_ub[4];
|
||||
size_t output_nb_ub[4];
|
||||
|
||||
copy_to_ub(input_ne_gm, input_ne_ub, 32);
|
||||
copy_to_ub(indices_ne_gm, indices_ne_ub, 32);
|
||||
copy_to_ub(indices_nb_gm, indices_nb_ub, 32);
|
||||
copy_to_ub(output_ne_gm, output_ne_ub, 32);
|
||||
copy_to_ub(output_nb_gm, output_nb_ub, 32);
|
||||
|
||||
GET_ROW_Q8_0 op;
|
||||
op.init(input_gm, indices_gm, output_gm, input_ne_ub, indices_ne_ub,
|
||||
indices_nb_ub, output_ne_ub, output_nb_ub);
|
||||
op.calculate();
|
||||
}
|
218
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp
vendored
Normal file
218
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp
vendored
Normal file
|
@ -0,0 +1,218 @@
|
|||
#include "kernel_operator.h"
|
||||
|
||||
using namespace AscendC;
|
||||
#ifdef ASCEND_310P
|
||||
extern "C" __global__ __aicore__ void ascendc_quantize_f16_q8_0(
|
||||
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
|
||||
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
|
||||
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
|
||||
printf("Ascend310P not support f16->8bit quantization.\n");
|
||||
}
|
||||
#else
|
||||
|
||||
#define BUFFER_NUM 2
|
||||
#define QK8_0 32
|
||||
|
||||
class QUANTIZE_F16_Q8_0 {
|
||||
public:
|
||||
__aicore__ inline QUANTIZE_F16_Q8_0() {}
|
||||
__aicore__ inline void init(GM_ADDR input, GM_ADDR output,
|
||||
int64_t *input_ne_ub, size_t *input_nb_ub,
|
||||
int64_t *output_ne_ub) {
|
||||
int64_t op_block_num = GetBlockNum();
|
||||
int64_t op_block_idx = GetBlockIdx();
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
input_ne[i] = input_ne_ub[i];
|
||||
input_stride[i] = input_nb_ub[i] / input_nb_ub[0];
|
||||
|
||||
output_ne[i] = output_ne_ub[i];
|
||||
}
|
||||
|
||||
output_stride[0] = 1;
|
||||
for (int i = 1; i < 4; i++) {
|
||||
output_stride[i] = output_stride[i - 1] * output_ne[i - 1];
|
||||
}
|
||||
|
||||
scale_ne = input_ne;
|
||||
scale_stride[0] = 1;
|
||||
scale_stride[1] = input_ne[0] / QK8_0;
|
||||
for (int i = 2; i < 4; i++) {
|
||||
scale_stride[i] = scale_stride[i - 1] * scale_ne[i - 1];
|
||||
}
|
||||
|
||||
// split input tensor by rows.
|
||||
uint64_t nr = input_ne[1] * input_ne[2] * input_ne[3];
|
||||
dr = nr / op_block_num;
|
||||
|
||||
uint64_t tails = nr % op_block_num;
|
||||
if (op_block_idx < tails) {
|
||||
dr += 1;
|
||||
ir = dr * op_block_idx;
|
||||
} else {
|
||||
ir = dr * op_block_idx + tails;
|
||||
}
|
||||
|
||||
group_size_in_row = scale_stride[1];
|
||||
int64_t output_size = output_ne[0] * output_ne[1] * output_ne[2] *
|
||||
output_ne[3] * sizeof(uint8_t);
|
||||
|
||||
input_gm.SetGlobalBuffer((__gm__ half *)input);
|
||||
output_gm.SetGlobalBuffer((__gm__ int8_t *)output);
|
||||
scale_gm.SetGlobalBuffer((__gm__ half *)(output + output_size + ir *
|
||||
group_size_in_row *
|
||||
sizeof(half)));
|
||||
|
||||
pipe.InitBuffer(input_queue, BUFFER_NUM, QK8_0 * sizeof(half));
|
||||
pipe.InitBuffer(output_queue, BUFFER_NUM, QK8_0 * sizeof(int8_t));
|
||||
pipe.InitBuffer(work_queue, 1, 32);
|
||||
pipe.InitBuffer(max_queue, 1, 32);
|
||||
pipe.InitBuffer(abs_queue, 1, QK8_0 * sizeof(float));
|
||||
pipe.InitBuffer(scale_queue, 1, 32);
|
||||
pipe.InitBuffer(cast_queue ,1 ,QK8_0 * sizeof(float));
|
||||
}
|
||||
|
||||
__aicore__ inline void copy_in(uint32_t offset) {
|
||||
LocalTensor<half> input_local = input_queue.AllocTensor<half>();
|
||||
DataCopy(input_local, input_gm[offset], QK8_0);
|
||||
input_queue.EnQue(input_local);
|
||||
}
|
||||
|
||||
__aicore__ inline void copy_out(uint32_t offset) {
|
||||
LocalTensor<int8_t> output_local = output_queue.DeQue<int8_t>();
|
||||
DataCopy(output_gm[offset], output_local, QK8_0);
|
||||
output_queue.FreeTensor(output_local);
|
||||
}
|
||||
|
||||
__aicore__ inline half calculate_group(int64_t row, int64_t group) {
|
||||
const int64_t i3 = row / (input_ne[1] * input_ne[2]);
|
||||
const int64_t i2 = (row - i3 * input_ne[1] * input_ne[2]) / input_ne[1];
|
||||
const int64_t i1 =
|
||||
row - i3 * input_ne[1] * input_ne[2] - i2 * input_ne[1];
|
||||
|
||||
const int64_t input_offset = i1 * input_stride[1] +
|
||||
i2 * input_stride[2] +
|
||||
i3 * input_stride[3] + QK8_0 * group;
|
||||
|
||||
const int64_t output_offset = i1 * output_stride[1] +
|
||||
i2 * output_stride[2] +
|
||||
i3 * output_stride[3] + QK8_0 * group;
|
||||
|
||||
copy_in(input_offset);
|
||||
LocalTensor<half> input_local = input_queue.DeQue<half>();
|
||||
LocalTensor<int8_t> output_local = output_queue.AllocTensor<int8_t>();
|
||||
LocalTensor<float> work_local = work_queue.AllocTensor<float>();
|
||||
LocalTensor<float> abs_local = abs_queue.AllocTensor<float>();
|
||||
LocalTensor<float> max_local = max_queue.AllocTensor<float>();
|
||||
LocalTensor<float> cast_local = cast_queue.AllocTensor<float>();
|
||||
|
||||
Cast(cast_local, input_local, RoundMode::CAST_NONE, QK8_0);
|
||||
Abs(abs_local, cast_local, QK8_0);
|
||||
ReduceMax(max_local, abs_local, work_local, QK8_0);
|
||||
|
||||
pipe_barrier(PIPE_ALL);
|
||||
float d = max_local.GetValue(0);
|
||||
d = d / ((1 << 7) - 1);
|
||||
if (d != 0) {
|
||||
Muls(cast_local, cast_local, 1.0f / d, QK8_0);
|
||||
}
|
||||
|
||||
Cast(cast_local, cast_local, RoundMode::CAST_ROUND, QK8_0);
|
||||
Cast(input_local, cast_local, RoundMode::CAST_ROUND, QK8_0);
|
||||
Cast(output_local, input_local, RoundMode::CAST_ROUND, QK8_0);
|
||||
output_queue.EnQue(output_local);
|
||||
copy_out(output_offset);
|
||||
|
||||
input_queue.FreeTensor(input_local);
|
||||
work_queue.FreeTensor(work_local);
|
||||
abs_queue.FreeTensor(abs_local);
|
||||
max_queue.FreeTensor(max_local);
|
||||
cast_queue.FreeTensor(cast_local);
|
||||
return (half)d;
|
||||
}
|
||||
|
||||
__aicore__ inline void calculate() {
|
||||
LocalTensor<half> scale_local = scale_queue.AllocTensor<half>();
|
||||
uint32_t scale_local_offset = 0;
|
||||
uint32_t scale_global_offset = 0;
|
||||
for (int64_t i = ir; i < ir + dr; i++) {
|
||||
for (int64_t j = 0; j < group_size_in_row; j++) {
|
||||
half scale = calculate_group(i, j);
|
||||
scale_local.SetValue(scale_local_offset++, scale);
|
||||
if (scale_local_offset == 16) {
|
||||
scale_local_offset = 0;
|
||||
// TODO: OPTIMIZE ME
|
||||
pipe_barrier(PIPE_ALL);
|
||||
DataCopy(scale_gm[scale_global_offset], scale_local, 16);
|
||||
pipe_barrier(PIPE_ALL);
|
||||
scale_global_offset += 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (scale_local_offset != 0) {
|
||||
pipe_barrier(PIPE_ALL);
|
||||
DataCopyExtParams dataCopyParams;
|
||||
dataCopyParams.blockCount = 1;
|
||||
dataCopyParams.blockLen = scale_local_offset * sizeof(half);
|
||||
DataCopyPad(scale_gm[scale_global_offset], scale_local,
|
||||
dataCopyParams);
|
||||
pipe_barrier(PIPE_ALL);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t input_ne[4];
|
||||
size_t input_stride[4];
|
||||
|
||||
int64_t *scale_ne;
|
||||
size_t scale_stride[4];
|
||||
|
||||
int64_t output_ne[4];
|
||||
size_t output_stride[4];
|
||||
|
||||
int64_t group_size_in_row;
|
||||
|
||||
int64_t ir;
|
||||
int64_t dr;
|
||||
|
||||
TPipe pipe;
|
||||
GlobalTensor<half> input_gm;
|
||||
GlobalTensor<half> scale_gm;
|
||||
GlobalTensor<int8_t> output_gm;
|
||||
TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
|
||||
TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
|
||||
TQue<QuePosition::VECIN, 1> work_queue;
|
||||
TQue<QuePosition::VECOUT, 1> max_queue;
|
||||
TQue<QuePosition::VECIN, 1> abs_queue;
|
||||
TQue<QuePosition::VECOUT, 1> scale_queue;
|
||||
TQue<QuePosition::VECOUT, 1> cast_queue;
|
||||
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
|
||||
auto gm_ptr = (__gm__ uint8_t *)gm;
|
||||
auto ub_ptr = (uint8_t *)(ub);
|
||||
for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
|
||||
*ub_ptr = *gm_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ __aicore__ void ascendc_quantize_f16_q8_0(
|
||||
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
|
||||
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
|
||||
int64_t input_ne_ub[4];
|
||||
size_t input_nb_ub[4];
|
||||
int64_t output_ne_ub[4];
|
||||
|
||||
copy_to_ub(input_ne_gm, input_ne_ub, 32);
|
||||
copy_to_ub(input_nb_gm, input_nb_ub, 32);
|
||||
copy_to_ub(output_ne_gm, output_ne_ub, 32);
|
||||
|
||||
QUANTIZE_F16_Q8_0 op;
|
||||
op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
|
||||
op.calculate();
|
||||
}
|
||||
|
||||
#endif // #ifdef ASCEND_310P
|
216
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp
vendored
Normal file
216
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp
vendored
Normal file
|
@ -0,0 +1,216 @@
|
|||
#include "kernel_operator.h"
|
||||
|
||||
using namespace AscendC;
|
||||
#ifdef ASCEND_310P // 310P not support f32->8bit quantization
|
||||
extern "C" __global__ __aicore__ void ascendc_quantize_f32_q8_0(
|
||||
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
|
||||
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
|
||||
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
|
||||
printf("Ascend310P not support f32->8bit quantization.\n");
|
||||
}
|
||||
#else
|
||||
|
||||
#define BUFFER_NUM 2
|
||||
#define QK8_0 32
|
||||
|
||||
class QUANTIZE_F32_Q8_0 {
|
||||
public:
|
||||
__aicore__ inline QUANTIZE_F32_Q8_0() {}
|
||||
__aicore__ inline void init(GM_ADDR input, GM_ADDR output,
|
||||
int64_t *input_ne_ub, size_t *input_nb_ub,
|
||||
int64_t *output_ne_ub) {
|
||||
int64_t op_block_num = GetBlockNum();
|
||||
int64_t op_block_idx = GetBlockIdx();
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
input_ne[i] = input_ne_ub[i];
|
||||
input_stride[i] = input_nb_ub[i] / input_nb_ub[0];
|
||||
|
||||
output_ne[i] = output_ne_ub[i];
|
||||
}
|
||||
|
||||
output_stride[0] = 1;
|
||||
for (int i = 1; i < 4; i++) {
|
||||
output_stride[i] = output_stride[i - 1] * output_ne[i - 1];
|
||||
}
|
||||
|
||||
scale_ne = input_ne;
|
||||
scale_stride[0] = 1;
|
||||
scale_stride[1] = input_ne[0] / QK8_0;
|
||||
for (int i = 2; i < 4; i++) {
|
||||
scale_stride[i] = scale_stride[i - 1] * scale_ne[i - 1];
|
||||
}
|
||||
|
||||
// split input tensor by rows.
|
||||
uint64_t nr = input_ne[1] * input_ne[2] * input_ne[3];
|
||||
dr = nr / op_block_num;
|
||||
|
||||
uint64_t tails = nr % op_block_num;
|
||||
if (op_block_idx < tails) {
|
||||
dr += 1;
|
||||
ir = dr * op_block_idx;
|
||||
} else {
|
||||
ir = dr * op_block_idx + tails;
|
||||
}
|
||||
|
||||
group_size_in_row = scale_stride[1];
|
||||
int64_t output_size = output_ne[0] * output_ne[1] * output_ne[2] *
|
||||
output_ne[3] * sizeof(uint8_t);
|
||||
|
||||
input_gm.SetGlobalBuffer((__gm__ float *)input);
|
||||
output_gm.SetGlobalBuffer((__gm__ int8_t *)output);
|
||||
scale_gm.SetGlobalBuffer((__gm__ half *)(output + output_size +
|
||||
ir * group_size_in_row *
|
||||
sizeof(half)));
|
||||
|
||||
pipe.InitBuffer(input_queue, BUFFER_NUM, QK8_0 * sizeof(float));
|
||||
pipe.InitBuffer(output_queue, BUFFER_NUM, QK8_0 * sizeof(int8_t));
|
||||
pipe.InitBuffer(work_queue, 1, 32);
|
||||
pipe.InitBuffer(max_queue, 1, 32);
|
||||
pipe.InitBuffer(abs_queue, 1, QK8_0 * sizeof(float));
|
||||
pipe.InitBuffer(cast_queue, 1, QK8_0 * sizeof(half));
|
||||
pipe.InitBuffer(scale_queue, 1, 32);
|
||||
}
|
||||
|
||||
__aicore__ inline void copy_in(uint32_t offset) {
|
||||
LocalTensor<float> input_local = input_queue.AllocTensor<float>();
|
||||
DataCopy(input_local, input_gm[offset], QK8_0);
|
||||
input_queue.EnQue(input_local);
|
||||
}
|
||||
|
||||
__aicore__ inline void copy_out(uint32_t offset) {
|
||||
LocalTensor<int8_t> output_local = output_queue.DeQue<int8_t>();
|
||||
DataCopy(output_gm[offset], output_local, QK8_0);
|
||||
output_queue.FreeTensor(output_local);
|
||||
}
|
||||
|
||||
__aicore__ inline half calculate_group(int64_t row, int64_t group) {
|
||||
const int64_t i3 = row / (input_ne[1] * input_ne[2]);
|
||||
const int64_t i2 = (row - i3 * input_ne[1] * input_ne[2]) / input_ne[1];
|
||||
const int64_t i1 =
|
||||
row - i3 * input_ne[1] * input_ne[2] - i2 * input_ne[1];
|
||||
|
||||
const int64_t input_offset = i1 * input_stride[1] +
|
||||
i2 * input_stride[2] +
|
||||
i3 * input_stride[3] + QK8_0 * group;
|
||||
|
||||
const int64_t output_offset = i1 * output_stride[1] +
|
||||
i2 * output_stride[2] +
|
||||
i3 * output_stride[3] + QK8_0 * group;
|
||||
|
||||
copy_in(input_offset);
|
||||
LocalTensor<float> input_local = input_queue.DeQue<float>();
|
||||
LocalTensor<int8_t> output_local = output_queue.AllocTensor<int8_t>();
|
||||
LocalTensor<float> work_local = work_queue.AllocTensor<float>();
|
||||
LocalTensor<float> abs_local = abs_queue.AllocTensor<float>();
|
||||
LocalTensor<float> max_local = max_queue.AllocTensor<float>();
|
||||
LocalTensor<half> cast_local = cast_queue.AllocTensor<half>();
|
||||
|
||||
Abs(abs_local, input_local, QK8_0);
|
||||
ReduceMax(max_local, abs_local, work_local, QK8_0);
|
||||
pipe_barrier(PIPE_ALL);
|
||||
float d = max_local.GetValue(0);
|
||||
d = d / ((1 << 7) - 1);
|
||||
if (d != 0) {
|
||||
Muls(input_local, input_local, 1.0f / d, QK8_0);
|
||||
}
|
||||
|
||||
Cast(input_local, input_local, RoundMode::CAST_ROUND, QK8_0);
|
||||
Cast(cast_local, input_local, RoundMode::CAST_ROUND, QK8_0);
|
||||
Cast(output_local, cast_local, RoundMode::CAST_ROUND, QK8_0);
|
||||
output_queue.EnQue(output_local);
|
||||
copy_out(output_offset);
|
||||
|
||||
input_queue.FreeTensor(input_local);
|
||||
work_queue.FreeTensor(work_local);
|
||||
abs_queue.FreeTensor(abs_local);
|
||||
max_queue.FreeTensor(max_local);
|
||||
cast_queue.FreeTensor(cast_local);
|
||||
|
||||
return (half)d;
|
||||
}
|
||||
|
||||
__aicore__ inline void calculate() {
|
||||
LocalTensor<half> scale_local = scale_queue.AllocTensor<half>();
|
||||
uint32_t scale_local_offset = 0;
|
||||
uint32_t scale_global_offset = 0;
|
||||
for (int64_t i = ir; i < ir + dr; i++) {
|
||||
for (int64_t j = 0; j < group_size_in_row; j++) {
|
||||
half scale = calculate_group(i, j);
|
||||
scale_local.SetValue(scale_local_offset++, scale);
|
||||
if (scale_local_offset == 16) {
|
||||
scale_local_offset = 0;
|
||||
// TODO: OPTIMIZE ME
|
||||
pipe_barrier(PIPE_ALL);
|
||||
DataCopy(scale_gm[scale_global_offset], scale_local, 16);
|
||||
pipe_barrier(PIPE_ALL);
|
||||
scale_global_offset += 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (scale_local_offset != 0) {
|
||||
pipe_barrier(PIPE_ALL);
|
||||
DataCopyExtParams dataCopyParams;
|
||||
dataCopyParams.blockCount = 1;
|
||||
dataCopyParams.blockLen = scale_local_offset * sizeof(half);
|
||||
DataCopyPad(scale_gm[scale_global_offset], scale_local,
|
||||
dataCopyParams);
|
||||
pipe_barrier(PIPE_ALL);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t input_ne[4];
|
||||
size_t input_stride[4];
|
||||
|
||||
int64_t *scale_ne;
|
||||
size_t scale_stride[4];
|
||||
|
||||
int64_t output_ne[4];
|
||||
size_t output_stride[4];
|
||||
|
||||
int64_t group_size_in_row;
|
||||
|
||||
int64_t ir;
|
||||
int64_t dr;
|
||||
|
||||
TPipe pipe;
|
||||
GlobalTensor<float> input_gm;
|
||||
GlobalTensor<half> scale_gm;
|
||||
GlobalTensor<int8_t> output_gm;
|
||||
TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
|
||||
TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
|
||||
TQue<QuePosition::VECIN, 1> work_queue;
|
||||
TQue<QuePosition::VECOUT, 1> max_queue;
|
||||
TQue<QuePosition::VECIN, 1> abs_queue;
|
||||
TQue<QuePosition::VECIN, 1> cast_queue;
|
||||
TQue<QuePosition::VECOUT, 1> scale_queue;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
|
||||
auto gm_ptr = (__gm__ uint8_t *)gm;
|
||||
auto ub_ptr = (uint8_t *)(ub);
|
||||
for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
|
||||
*ub_ptr = *gm_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ __aicore__ void ascendc_quantize_f32_q8_0(
|
||||
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
|
||||
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
|
||||
int64_t input_ne_ub[4];
|
||||
size_t input_nb_ub[4];
|
||||
int64_t output_ne_ub[4];
|
||||
|
||||
copy_to_ub(input_ne_gm, input_ne_ub, 32);
|
||||
copy_to_ub(input_nb_gm, input_nb_ub, 32);
|
||||
copy_to_ub(output_ne_gm, output_ne_ub, 32);
|
||||
|
||||
QUANTIZE_F32_Q8_0 op;
|
||||
op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
|
||||
op.calculate();
|
||||
}
|
||||
|
||||
#endif // #ifdef ASCEND_310P
|
295
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp
vendored
Normal file
295
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp
vendored
Normal file
|
@ -0,0 +1,295 @@
|
|||
#include "kernel_operator.h"
|
||||
|
||||
using namespace AscendC;
|
||||
#ifdef ASCEND_310P // 310P not support float->4bit quantization
|
||||
extern "C" __global__ __aicore__ void ascendc_quantize_f32_to_q4_0(
|
||||
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
|
||||
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
|
||||
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
|
||||
printf("Ascend310P not support f32->4bit quantization.\n");
|
||||
}
|
||||
|
||||
extern "C" __global__ __aicore__ void ascendc_quantize_f16_to_q4_0(
|
||||
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
|
||||
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
|
||||
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
|
||||
printf("Ascend310P not support f16->4bit quantization.\n");
|
||||
}
|
||||
#else
|
||||
|
||||
#define BUFFER_NUM 2
|
||||
#define Group_Size 32
|
||||
|
||||
template <typename SRC_T>
|
||||
class QUANTIZE_FLOAT_TO_Q4_0 {
|
||||
public:
|
||||
__aicore__ inline QUANTIZE_FLOAT_TO_Q4_0() {}
|
||||
__aicore__ inline void init(GM_ADDR input, GM_ADDR output,
|
||||
int64_t *input_ne_ub, size_t *input_nb_ub,
|
||||
int64_t *output_ne_ub) {
|
||||
// TODO: fix test_case CPY(type_src=f16,type_dst=q4_0,ne=[256,4,4,4],
|
||||
// permute=[0,0,0,0]):
|
||||
// [CPY] NMSE = 0.000008343 > 0.000001000 FAIL
|
||||
int64_t op_block_num = GetBlockNum();
|
||||
int64_t op_block_idx = GetBlockIdx();
|
||||
|
||||
// input stride of data elements
|
||||
for (int i = 0; i < 4; i++) {
|
||||
input_ne[i] = input_ne_ub[i];
|
||||
input_stride[i] = input_nb_ub[i] / input_nb_ub[0];
|
||||
output_ne[i] = output_ne_ub[i];
|
||||
}
|
||||
|
||||
// output stride of data elements
|
||||
output_stride[0] = 1;
|
||||
for (int i = 1; i < 4; i++) {
|
||||
output_stride[i] = output_stride[i - 1] * output_ne[i - 1];
|
||||
}
|
||||
|
||||
// scale saved one by one after data:. [group1_scale, group2_scale, ...]
|
||||
scale_ne = input_ne;
|
||||
scale_stride[0] = 1;
|
||||
scale_stride[1] = input_ne[0] / Group_Size;
|
||||
for (int i = 2; i < 4; i++) {
|
||||
scale_stride[i] = scale_stride[i - 1] * scale_ne[i - 1];
|
||||
}
|
||||
|
||||
// split input tensor by rows.
|
||||
uint64_t nr = input_ne[1] * input_ne[2] * input_ne[3];
|
||||
dr = nr / op_block_num;
|
||||
|
||||
uint64_t tails = nr % op_block_num;
|
||||
if (op_block_idx < tails) {
|
||||
dr += 1;
|
||||
ir = dr * op_block_idx;
|
||||
} else {
|
||||
ir = dr * op_block_idx + tails;
|
||||
}
|
||||
|
||||
group_size_in_row = scale_stride[1];
|
||||
int64_t scale_offset = output_ne[0] * output_ne[1] * output_ne[2] *
|
||||
output_ne[3] * sizeof(uint8_t) / 2;
|
||||
|
||||
input_gm.SetGlobalBuffer((__gm__ SRC_T *)input);
|
||||
output_gm.SetGlobalBuffer((__gm__ int8_t *)output);
|
||||
scale_gm.SetGlobalBuffer((__gm__ half *)(output + scale_offset + ir *
|
||||
group_size_in_row *
|
||||
sizeof(half)));
|
||||
|
||||
pipe.InitBuffer(input_queue, BUFFER_NUM, Group_Size * sizeof(SRC_T));
|
||||
pipe.InitBuffer(output_queue, BUFFER_NUM,
|
||||
Group_Size * sizeof(int8_t) / 2);
|
||||
pipe.InitBuffer(cast_queue , 1, Group_Size * sizeof(float));
|
||||
pipe.InitBuffer(work_queue, 1, Group_Size * sizeof(float));
|
||||
pipe.InitBuffer(max_queue, 1, Group_Size * sizeof(float));
|
||||
pipe.InitBuffer(min_queue, 1, Group_Size * sizeof(float));
|
||||
pipe.InitBuffer(scale_queue, 1, Group_Size / 2 * sizeof(half));
|
||||
pipe.InitBuffer(int8_queue, 1, Group_Size * sizeof(int8_t));
|
||||
pipe.InitBuffer(half_queue, 1, Group_Size * sizeof(half));
|
||||
}
|
||||
|
||||
__aicore__ inline void copy_in(uint32_t offset) {
|
||||
LocalTensor<SRC_T> input_local = input_queue.AllocTensor<SRC_T>();
|
||||
DataCopy(input_local, input_gm[offset], Group_Size);
|
||||
input_queue.EnQue(input_local);
|
||||
}
|
||||
|
||||
__aicore__ inline void copy_out(uint32_t offset) {
|
||||
// reinterpretcast Group_Size(32) * int4b_t to Group_Size / 2 * int8_t,
|
||||
// and using DataCopyPad to avoid 32 bits align.
|
||||
LocalTensor<int4b_t> output_local = output_queue.DeQue<int4b_t>();
|
||||
LocalTensor<int8_t> output_int8_local =
|
||||
output_local.ReinterpretCast<int8_t>();
|
||||
|
||||
DataCopyExtParams dataCopyParams;
|
||||
dataCopyParams.blockCount = 1;
|
||||
dataCopyParams.blockLen = Group_Size / 2 * sizeof(int8_t);
|
||||
DataCopyPad(output_gm[offset], output_int8_local, dataCopyParams);
|
||||
|
||||
output_queue.FreeTensor(output_local);
|
||||
}
|
||||
|
||||
__aicore__ inline void input_to_cast(LocalTensor<float> cast_local,
|
||||
LocalTensor<float> input_local) {
|
||||
DataCopy(cast_local, input_local, Group_Size);
|
||||
}
|
||||
|
||||
__aicore__ inline void input_to_cast(LocalTensor<float> cast_local,
|
||||
LocalTensor<half> input_local) {
|
||||
Cast(cast_local, input_local, RoundMode::CAST_NONE, Group_Size);
|
||||
}
|
||||
|
||||
__aicore__ inline half calculate_group(int64_t row, int64_t group) {
|
||||
const int64_t i3 = row / (input_ne[1] * input_ne[2]);
|
||||
const int64_t i2 = (row - i3 * input_ne[1] * input_ne[2]) / input_ne[1];
|
||||
const int64_t i1 =
|
||||
row - i3 * input_ne[1] * input_ne[2] - i2 * input_ne[1];
|
||||
|
||||
const int64_t input_offset = i1 * input_stride[1] +
|
||||
i2 * input_stride[2] +
|
||||
i3 * input_stride[3] + Group_Size * group;
|
||||
|
||||
// output_offset is stride for output_gm which datatype is int8_t and
|
||||
// divided by 2 is needed for int4b_t.
|
||||
const int64_t output_offset = (i1 * output_stride[1] +
|
||||
i2 * output_stride[2] +
|
||||
i3 * output_stride[3] +
|
||||
Group_Size * group) / 2;
|
||||
copy_in(input_offset);
|
||||
|
||||
LocalTensor<SRC_T> input_local = input_queue.DeQue<SRC_T>();
|
||||
LocalTensor<int4b_t> output_local = output_queue.AllocTensor<int4b_t>();
|
||||
LocalTensor<float> cast_local = cast_queue.AllocTensor<float>();
|
||||
LocalTensor<float> work_local = work_queue.AllocTensor<float>();
|
||||
LocalTensor<float> max_local = max_queue.AllocTensor<float>();
|
||||
LocalTensor<float> min_local = min_queue.AllocTensor<float>();
|
||||
LocalTensor<int8_t> int8_local = int8_queue.AllocTensor<int8_t>();
|
||||
LocalTensor<half> half_local = half_queue.AllocTensor<half>();
|
||||
|
||||
input_to_cast(cast_local, input_local);
|
||||
|
||||
ReduceMax(max_local, cast_local, work_local, Group_Size);
|
||||
ReduceMin(min_local, cast_local, work_local, Group_Size);
|
||||
const float max_value = max_local.GetValue(0);
|
||||
const float min_value = min_local.GetValue(0);
|
||||
float d = max_value;
|
||||
if (min_value < 0 && (-1 * min_value) > max_value) {
|
||||
d = min_value;
|
||||
}
|
||||
|
||||
d = d / (-8);
|
||||
if (d != 0) {
|
||||
Muls(cast_local, cast_local, 1.0f / d, Group_Size);
|
||||
}
|
||||
|
||||
// range: [-8,8] -> [0.5,16.5] -> [0,16] -> [0,15] -> [-8,7]
|
||||
float scalar = 8.5f;
|
||||
Adds(cast_local, cast_local, scalar, Group_Size);
|
||||
Cast(cast_local, cast_local, RoundMode::CAST_FLOOR, Group_Size);
|
||||
scalar = 15.0f;
|
||||
Mins(cast_local, cast_local, scalar, Group_Size);
|
||||
scalar = -8.0f;
|
||||
Adds(cast_local, cast_local, scalar, Group_Size);
|
||||
|
||||
// float->half->int4b
|
||||
Cast(half_local, cast_local, RoundMode::CAST_NONE, Group_Size);
|
||||
Cast(output_local, half_local, RoundMode::CAST_NONE, Group_Size);
|
||||
|
||||
output_queue.EnQue(output_local);
|
||||
copy_out(output_offset);
|
||||
|
||||
input_queue.FreeTensor(input_local);
|
||||
work_queue.FreeTensor(work_local);
|
||||
max_queue.FreeTensor(max_local);
|
||||
min_queue.FreeTensor(min_local);
|
||||
int8_queue.FreeTensor(int8_local);
|
||||
half_queue.FreeTensor(half_local);
|
||||
cast_queue.FreeTensor(cast_local);
|
||||
return (half)d;
|
||||
}
|
||||
|
||||
__aicore__ inline void calculate() {
|
||||
LocalTensor<half> scale_local = scale_queue.AllocTensor<half>();
|
||||
uint32_t scale_local_offset = 0;
|
||||
uint32_t scale_global_offset = 0;
|
||||
for (int64_t i = ir; i < ir + dr; i++) {
|
||||
for (int64_t j = 0; j < group_size_in_row; j++) {
|
||||
half scale = calculate_group(i, j);
|
||||
scale_local.SetValue(scale_local_offset++, scale);
|
||||
// Copy Group_Size/2 length data each time.
|
||||
if (scale_local_offset == Group_Size / 2) {
|
||||
scale_local_offset = 0;
|
||||
// TODO: OPTIMIZE ME
|
||||
pipe_barrier(PIPE_ALL);
|
||||
DataCopy(scale_gm[scale_global_offset], scale_local,
|
||||
Group_Size / 2);
|
||||
pipe_barrier(PIPE_ALL);
|
||||
scale_global_offset += Group_Size / 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (scale_local_offset != 0) {
|
||||
pipe_barrier(PIPE_ALL);
|
||||
DataCopyExtParams dataCopyParams;
|
||||
dataCopyParams.blockCount = 1;
|
||||
dataCopyParams.blockLen = scale_local_offset * sizeof(half);
|
||||
DataCopyPad(scale_gm[scale_global_offset], scale_local,
|
||||
dataCopyParams);
|
||||
pipe_barrier(PIPE_ALL);
|
||||
}
|
||||
scale_queue.FreeTensor(scale_local);
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t input_ne[4];
|
||||
size_t input_stride[4];
|
||||
|
||||
int64_t *scale_ne;
|
||||
size_t scale_stride[4];
|
||||
|
||||
int64_t output_ne[4];
|
||||
size_t output_stride[4];
|
||||
|
||||
int64_t group_size_in_row;
|
||||
|
||||
int64_t ir;
|
||||
int64_t dr;
|
||||
|
||||
TPipe pipe;
|
||||
GlobalTensor<SRC_T> input_gm;
|
||||
GlobalTensor<half> scale_gm;
|
||||
GlobalTensor<int8_t> output_gm;
|
||||
TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
|
||||
TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
|
||||
TQue<QuePosition::VECIN, BUFFER_NUM> work_queue;
|
||||
TQue<QuePosition::VECOUT, BUFFER_NUM> max_queue;
|
||||
TQue<QuePosition::VECOUT, BUFFER_NUM> min_queue;
|
||||
TQue<QuePosition::VECOUT, BUFFER_NUM> scale_queue;
|
||||
TQue<QuePosition::VECOUT, BUFFER_NUM> cast_queue;
|
||||
TQue<QuePosition::VECOUT, BUFFER_NUM> int8_queue;
|
||||
TQue<QuePosition::VECOUT, BUFFER_NUM> half_queue;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__aicore__ inline void copy_to_ub(GM_ADDR gm, T *ub, size_t size) {
|
||||
auto gm_ptr = (__gm__ uint8_t *)gm;
|
||||
auto ub_ptr = (uint8_t *)(ub);
|
||||
for (int32_t i = 0; i < size; ++i, ++ub_ptr, ++gm_ptr) {
|
||||
*ub_ptr = *gm_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ __aicore__ void ascendc_quantize_f16_to_q4_0(
|
||||
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
|
||||
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
|
||||
int64_t input_ne_ub[4];
|
||||
size_t input_nb_ub[4];
|
||||
int64_t output_ne_ub[4];
|
||||
|
||||
copy_to_ub(input_ne_gm, input_ne_ub, 32);
|
||||
copy_to_ub(input_nb_gm, input_nb_ub, 32);
|
||||
copy_to_ub(output_ne_gm, output_ne_ub, 32);
|
||||
|
||||
QUANTIZE_FLOAT_TO_Q4_0<half> op;
|
||||
op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
|
||||
op.calculate();
|
||||
}
|
||||
|
||||
extern "C" __global__ __aicore__ void ascendc_quantize_f32_to_q4_0(
|
||||
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
|
||||
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
|
||||
int64_t input_ne_ub[4];
|
||||
size_t input_nb_ub[4];
|
||||
int64_t output_ne_ub[4];
|
||||
|
||||
copy_to_ub(input_ne_gm, input_ne_ub, 32);
|
||||
copy_to_ub(input_nb_gm, input_nb_ub, 32);
|
||||
copy_to_ub(output_ne_gm, output_ne_ub, 32);
|
||||
|
||||
QUANTIZE_FLOAT_TO_Q4_0<float> op;
|
||||
op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
|
||||
op.calculate();
|
||||
}
|
||||
|
||||
#endif // #ifdef ASCEND_310P
|
File diff suppressed because it is too large
Load Diff
342
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/CMakeLists.txt
vendored
Normal file
342
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/CMakeLists.txt
vendored
Normal file
|
@ -0,0 +1,342 @@
|
|||
function(ggml_add_cpu_backend_variant_impl tag_name)
|
||||
if (tag_name)
|
||||
set(GGML_CPU_NAME ggml-cpu-${tag_name})
|
||||
else()
|
||||
set(GGML_CPU_NAME ggml-cpu)
|
||||
endif()
|
||||
|
||||
ggml_add_backend_library(${GGML_CPU_NAME})
|
||||
|
||||
list (APPEND GGML_CPU_SOURCES
|
||||
ggml-cpu/ggml-cpu.c
|
||||
ggml-cpu/ggml-cpu.cpp
|
||||
ggml-cpu/ggml-cpu-aarch64.cpp
|
||||
ggml-cpu/ggml-cpu-aarch64.h
|
||||
ggml-cpu/ggml-cpu-hbm.cpp
|
||||
ggml-cpu/ggml-cpu-hbm.h
|
||||
ggml-cpu/ggml-cpu-quants.c
|
||||
ggml-cpu/ggml-cpu-quants.h
|
||||
ggml-cpu/ggml-cpu-traits.cpp
|
||||
ggml-cpu/ggml-cpu-traits.h
|
||||
ggml-cpu/amx/amx.cpp
|
||||
ggml-cpu/amx/amx.h
|
||||
ggml-cpu/amx/mmq.cpp
|
||||
ggml-cpu/amx/mmq.h
|
||||
ggml-cpu/ggml-cpu-impl.h
|
||||
)
|
||||
|
||||
target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
|
||||
target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)
|
||||
|
||||
if (APPLE AND GGML_ACCELERATE)
|
||||
find_library(ACCELERATE_FRAMEWORK Accelerate)
|
||||
if (ACCELERATE_FRAMEWORK)
|
||||
message(STATUS "Accelerate framework found")
|
||||
|
||||
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_ACCELERATE)
|
||||
target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_NEW_LAPACK)
|
||||
target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_LAPACK_ILP64)
|
||||
|
||||
target_link_libraries(${GGML_CPU_NAME} PRIVATE ${ACCELERATE_FRAMEWORK})
|
||||
else()
|
||||
message(WARNING "Accelerate framework not found")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (GGML_OPENMP)
|
||||
find_package(OpenMP)
|
||||
if (OpenMP_FOUND)
|
||||
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_OPENMP)
|
||||
|
||||
target_link_libraries(${GGML_CPU_NAME} PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
|
||||
else()
|
||||
message(WARNING "OpenMP not found")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (GGML_LLAMAFILE)
|
||||
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_LLAMAFILE)
|
||||
|
||||
list(APPEND GGML_CPU_SOURCES
|
||||
ggml-cpu/llamafile/sgemm.cpp
|
||||
ggml-cpu/llamafile/sgemm.h)
|
||||
endif()
|
||||
|
||||
if (GGML_CPU_HBM)
|
||||
find_library(memkind memkind REQUIRED)
|
||||
|
||||
message(STATUS "Using memkind for CPU HBM")
|
||||
|
||||
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_HBM)
|
||||
|
||||
target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind)
|
||||
endif()
|
||||
|
||||
if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
|
||||
CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
|
||||
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
||||
CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
|
||||
|
||||
message(STATUS "ARM detected")
|
||||
|
||||
if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
||||
message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
|
||||
else()
|
||||
check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
||||
if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
||||
list(APPEND ARCH_FLAGS -mfp16-format=ieee)
|
||||
endif()
|
||||
|
||||
if (GGML_NATIVE)
|
||||
# -mcpu=native does not always enable all the features in some compilers,
|
||||
# so we check for them manually and enable them if available
|
||||
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_C_COMPILER} -mcpu=native -E -v -
|
||||
INPUT_FILE "/dev/null"
|
||||
OUTPUT_QUIET
|
||||
ERROR_VARIABLE ARM_MCPU
|
||||
RESULT_VARIABLE ARM_MCPU_RESULT
|
||||
)
|
||||
if (NOT ARM_MCPU_RESULT)
|
||||
string(REGEX MATCH "-mcpu=[^ ']+" ARM_MCPU_FLAG "${ARM_MCPU}")
|
||||
endif()
|
||||
if ("${ARM_MCPU_FLAG}" STREQUAL "")
|
||||
set(ARM_MCPU_FLAG -mcpu=native)
|
||||
message(STATUS "ARM -mcpu not found, -mcpu=native will be used")
|
||||
endif()
|
||||
|
||||
include(CheckCXXSourceRuns)
|
||||
|
||||
function(check_arm_feature tag code)
|
||||
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
|
||||
set(CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG}+${tag}")
|
||||
check_cxx_source_runs(
|
||||
"${code}"
|
||||
GGML_MACHINE_SUPPORTS_${tag}
|
||||
)
|
||||
if (GGML_MACHINE_SUPPORTS_${tag})
|
||||
set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+${tag}" PARENT_SCOPE)
|
||||
else()
|
||||
set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+no${tag}" PARENT_SCOPE)
|
||||
endif()
|
||||
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
|
||||
endfunction()
|
||||
|
||||
check_arm_feature(dotprod "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }")
|
||||
check_arm_feature(i8mm "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }")
|
||||
check_arm_feature(sve "#include <arm_sve.h>\nint main() { svfloat32_t _a, _b; volatile svfloat32_t _c = svadd_f32_z(svptrue_b8(), _a, _b); return 0; }")
|
||||
|
||||
list(APPEND ARCH_FLAGS "${ARM_MCPU_FLAG}${ARM_MCPU_FLAG_FIX}")
|
||||
else()
|
||||
if (GGML_CPU_ARM_ARCH)
|
||||
list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# show enabled features
|
||||
if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows")
|
||||
set(FEAT_INPUT_FILE "NUL")
|
||||
else()
|
||||
set(FEAT_INPUT_FILE "/dev/null")
|
||||
endif()
|
||||
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E -
|
||||
INPUT_FILE ${FEAT_INPUT_FILE}
|
||||
OUTPUT_VARIABLE ARM_FEATURE
|
||||
RESULT_VARIABLE ARM_FEATURE_RESULT
|
||||
)
|
||||
if (ARM_FEATURE_RESULT)
|
||||
message(WARNING "Failed to get ARM features")
|
||||
else()
|
||||
foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC)
|
||||
string(FIND "${ARM_FEATURE}" "__ARM_FEATURE_${feature} 1" feature_pos)
|
||||
if (NOT ${feature_pos} EQUAL -1)
|
||||
message(STATUS "ARM feature ${feature} enabled")
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
endif()
|
||||
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
|
||||
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
||||
CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$"))
|
||||
|
||||
message(STATUS "x86 detected")
|
||||
|
||||
if (MSVC)
|
||||
# instruction set detection for MSVC only
|
||||
if (GGML_NATIVE)
|
||||
include(ggml-cpu/cmake/FindSIMD.cmake)
|
||||
endif ()
|
||||
if (GGML_AVX512)
|
||||
list(APPEND ARCH_FLAGS /arch:AVX512)
|
||||
# /arch:AVX512 includes: __AVX512F__, __AVX512CD__, __AVX512BW__, __AVX512DQ__, and __AVX512VL__
|
||||
# MSVC has no compile-time flags enabling specific
|
||||
# AVX512 extensions, neither it defines the
|
||||
# macros corresponding to the extensions.
|
||||
# Do it manually.
|
||||
list(APPEND ARCH_DEFINITIONS GGML_AVX512)
|
||||
if (GGML_AVX512_VBMI)
|
||||
list(APPEND ARCH_DEFINITIONS __AVX512VBMI__)
|
||||
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
||||
list(APPEND ARCH_FLAGS -mavx512vbmi)
|
||||
endif()
|
||||
endif()
|
||||
if (GGML_AVX512_VNNI)
|
||||
list(APPEND ARCH_DEFINITIONS __AVX512VNNI__ GGML_AVX512_VNNI)
|
||||
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
||||
list(APPEND ARCH_FLAGS -mavx512vnni)
|
||||
endif()
|
||||
endif()
|
||||
if (GGML_AVX512_BF16)
|
||||
list(APPEND ARCH_DEFINITIONS __AVX512BF16__ GGML_AVX512_BF16)
|
||||
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
||||
list(APPEND ARCH_FLAGS -mavx512bf16)
|
||||
endif()
|
||||
endif()
|
||||
if (GGML_AMX_TILE)
|
||||
list(APPEND ARCH_DEFINITIONS __AMX_TILE__ GGML_AMX_TILE)
|
||||
endif()
|
||||
if (GGML_AMX_INT8)
|
||||
list(APPEND ARCH_DEFINITIONS __AMX_INT8__ GGML_AMX_INT8)
|
||||
endif()
|
||||
if (GGML_AMX_BF16)
|
||||
list(APPEND ARCH_DEFINITIONS __AMX_BF16__ GGML_AMX_BF16)
|
||||
endif()
|
||||
elseif (GGML_AVX2)
|
||||
list(APPEND ARCH_FLAGS /arch:AVX2)
|
||||
list(APPEND ARCH_DEFINITIONS GGML_AVX2 GGML_FMA GGML_F16C)
|
||||
elseif (GGML_AVX)
|
||||
list(APPEND ARCH_FLAGS /arch:AVX)
|
||||
list(APPEND ARCH_DEFINITIONS GGML_AVX)
|
||||
else ()
|
||||
list(APPEND ARCH_FLAGS /arch:SSE4.2)
|
||||
list(APPEND ARCH_DEFINITIONS GGML_SSE42)
|
||||
endif()
|
||||
if (GGML_AVX_VNNI)
|
||||
list(APPEND ARCH_DEFINITIONS __AVXVNNI__ GGML_AVX_VNNI)
|
||||
endif()
|
||||
else ()
|
||||
if (GGML_NATIVE)
|
||||
list(APPEND ARCH_FLAGS -march=native)
|
||||
else ()
|
||||
list(APPEND ARCH_FLAGS -msse4.2)
|
||||
list(APPEND ARCH_DEFINITIONS GGML_SSE42)
|
||||
if (GGML_F16C)
|
||||
list(APPEND ARCH_FLAGS -mf16c)
|
||||
list(APPEND ARCH_DEFINITIONS GGML_F16C)
|
||||
endif()
|
||||
if (GGML_FMA)
|
||||
list(APPEND ARCH_FLAGS -mfma)
|
||||
list(APPEND ARCH_DEFINITIONS GGML_FMA)
|
||||
endif()
|
||||
if (GGML_AVX)
|
||||
list(APPEND ARCH_FLAGS -mavx)
|
||||
list(APPEND ARCH_DEFINITIONS GGML_AVX)
|
||||
endif()
|
||||
if (GGML_AVX2)
|
||||
list(APPEND ARCH_FLAGS -mavx2)
|
||||
list(APPEND ARCH_DEFINITIONS GGML_AVX2)
|
||||
endif()
|
||||
if (GGML_AVX_VNNI)
|
||||
list(APPEND ARCH_FLAGS -mavxvnni)
|
||||
list(APPEND ARCH_DEFINITIONS GGML_AVX_VNNI)
|
||||
endif()
|
||||
if (GGML_AVX512)
|
||||
list(APPEND ARCH_FLAGS -mavx512f)
|
||||
list(APPEND ARCH_FLAGS -mavx512cd)
|
||||
list(APPEND ARCH_FLAGS -mavx512vl)
|
||||
list(APPEND ARCH_FLAGS -mavx512dq)
|
||||
list(APPEND ARCH_FLAGS -mavx512bw)
|
||||
list(APPEND ARCH_DEFINITIONS GGML_AVX512)
|
||||
endif()
|
||||
if (GGML_AVX512_VBMI)
|
||||
list(APPEND ARCH_FLAGS -mavx512vbmi)
|
||||
list(APPEND ARCH_DEFINITIONS GGML_AVX512_VBMI)
|
||||
endif()
|
||||
if (GGML_AVX512_VNNI)
|
||||
list(APPEND ARCH_FLAGS -mavx512vnni)
|
||||
list(APPEND ARCH_DEFINITIONS GGML_AVX512_VNNI)
|
||||
endif()
|
||||
if (GGML_AVX512_BF16)
|
||||
list(APPEND ARCH_FLAGS -mavx512bf16)
|
||||
list(APPEND ARCH_DEFINITIONS GGML_AVX512_BF16)
|
||||
endif()
|
||||
if (GGML_AMX_TILE)
|
||||
list(APPEND ARCH_FLAGS -mamx-tile)
|
||||
list(APPEND ARCH_DEFINITIONS GGML_AMX_TILE)
|
||||
endif()
|
||||
if (GGML_AMX_INT8)
|
||||
list(APPEND ARCH_FLAGS -mamx-int8)
|
||||
list(APPEND ARCH_DEFINITIONS GGML_AMX_INT8)
|
||||
endif()
|
||||
if (GGML_AMX_BF16)
|
||||
list(APPEND ARCH_FLAGS -mamx-bf16)
|
||||
list(APPEND ARCH_DEFINITIONS GGML_AMX_BF16)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
|
||||
message(STATUS "PowerPC detected")
|
||||
execute_process(COMMAND bash -c "grep POWER /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER_M)
|
||||
if (${POWER_M} MATCHES "POWER10")
|
||||
list(APPEND ARCH_FLAGS -mcpu=power10)
|
||||
elseif (${POWER_M} MATCHES "POWER9")
|
||||
list(APPEND ARCH_FLAGS -mcpu=power9)
|
||||
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
|
||||
list(APPEND ARCH_FLAGS -mcpu=powerpc64le -mtune=native)
|
||||
else()
|
||||
list(APPEND ARCH_FLAGS -mcpu=powerpc64 -mtune=native)
|
||||
endif()
|
||||
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
|
||||
message(STATUS "loongarch64 detected")
|
||||
|
||||
list(APPEND ARCH_FLAGS -march=loongarch64)
|
||||
if (GGML_LASX)
|
||||
list(APPEND ARCH_FLAGS -mlasx)
|
||||
endif()
|
||||
if (GGML_LSX)
|
||||
list(APPEND ARCH_FLAGS -mlsx)
|
||||
endif()
|
||||
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64")
|
||||
message(STATUS "RISC-V detected")
|
||||
if (GGML_RVV)
|
||||
list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d)
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "Unknown architecture")
|
||||
endif()
|
||||
|
||||
if (GGML_CPU_AARCH64)
|
||||
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_AARCH64)
|
||||
endif()
|
||||
|
||||
message(STATUS "Adding CPU backend variant ${GGML_CPU_NAME}: ${ARCH_FLAGS} ${ARCH_DEFINITIONS}")
|
||||
target_sources(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_SOURCES})
|
||||
target_compile_options(${GGML_CPU_NAME} PRIVATE ${ARCH_FLAGS})
|
||||
target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS})
|
||||
|
||||
if (GGML_BACKEND_DL)
|
||||
if (GGML_NATIVE)
|
||||
# the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
|
||||
message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
|
||||
endif()
|
||||
|
||||
# The feature detection code is compiled as a separate target so that
|
||||
# it can be built without the architecture flags
|
||||
# Since multiple variants of the CPU backend may be included in the same
|
||||
# build, using set_source_files_properties() to set the arch flags is not possible
|
||||
set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
|
||||
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp)
|
||||
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
|
||||
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
|
||||
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
|
||||
set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME})
|
||||
endif()
|
||||
|
||||
if (EMSCRIPTEN)
|
||||
set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128")
|
||||
endif()
|
||||
endfunction()
|
220
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/amx.cpp
vendored
Normal file
220
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/amx.cpp
vendored
Normal file
|
@ -0,0 +1,220 @@
|
|||
#include "amx.h"
|
||||
#include "common.h"
|
||||
#include "mmq.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml-impl.h"
|
||||
#include "ggml-cpu.h"
|
||||
#include "ggml-cpu-traits.h"
|
||||
|
||||
#if defined(__gnu_linux__)
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
|
||||
#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
|
||||
|
||||
// AMX type_trais
|
||||
namespace ggml::cpu::amx {
|
||||
class tensor_traits : public ggml::cpu::tensor_traits {
|
||||
bool work_size(int /* n_threads */, const struct ggml_tensor * op, size_t & size) override {
|
||||
size = ggml_backend_amx_desired_wsize(op);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) override {
|
||||
if (op->op == GGML_OP_MUL_MAT) {
|
||||
ggml_backend_amx_mul_mat(params, op);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
static ggml::cpu::tensor_traits * get_tensor_traits(ggml_backend_buffer_t, struct ggml_tensor *) {
|
||||
static tensor_traits traits;
|
||||
return &traits;
|
||||
}
|
||||
} // namespace ggml::cpu::amx
|
||||
|
||||
// AMX buffer interface
|
||||
static void ggml_backend_amx_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||
free(buffer->context);
|
||||
}
|
||||
|
||||
static void * ggml_backend_amx_buffer_get_base(ggml_backend_buffer_t buffer) {
|
||||
return (void *) (buffer->context);
|
||||
}
|
||||
|
||||
static void ggml_backend_amx_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
|
||||
tensor->extra = (void *) ggml::cpu::amx::get_tensor_traits(buffer, tensor);
|
||||
|
||||
GGML_UNUSED(buffer);
|
||||
}
|
||||
|
||||
static void ggml_backend_amx_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,
|
||||
uint8_t value, size_t offset, size_t size) {
|
||||
memset((char *) tensor->data + offset, value, size);
|
||||
|
||||
GGML_UNUSED(buffer);
|
||||
}
|
||||
|
||||
static void ggml_backend_amx_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,
|
||||
const void * data, size_t offset, size_t size) {
|
||||
if (qtype_has_amx_kernels(tensor->type)) {
|
||||
GGML_LOG_DEBUG("%s: amx repack tensor %s of type %s\n", __func__, tensor->name, ggml_type_name(tensor->type));
|
||||
ggml_backend_amx_convert_weight(tensor, data, offset, size);
|
||||
} else {
|
||||
memcpy((char *) tensor->data + offset, data, size);
|
||||
}
|
||||
|
||||
GGML_UNUSED(buffer);
|
||||
}
|
||||
|
||||
/*
|
||||
// need to figure what we need to do with buffer->extra.
|
||||
static void ggml_backend_amx_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
||||
GGML_ASSERT(!qtype_has_amx_kernels(tensor->type));
|
||||
memcpy(data, (const char *)tensor->data + offset, size);
|
||||
|
||||
GGML_UNUSED(buffer);
|
||||
}
|
||||
|
||||
static bool ggml_backend_amx_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) {
|
||||
if (ggml_backend_buffer_is_host(src->buffer)) {
|
||||
if (qtype_has_amx_kernels(src->type)) {
|
||||
ggml_backend_amx_convert_weight(dst, src->data, 0, ggml_nbytes(dst));
|
||||
} else {
|
||||
memcpy(dst->data, src->data, ggml_nbytes(src));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
GGML_UNUSED(buffer);
|
||||
}
|
||||
*/
|
||||
|
||||
static void ggml_backend_amx_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
||||
memset(buffer->context, value, buffer->size);
|
||||
}
|
||||
|
||||
static ggml_backend_buffer_i ggml_backend_amx_buffer_interface = {
|
||||
/* .free_buffer = */ ggml_backend_amx_buffer_free_buffer,
|
||||
/* .get_base = */ ggml_backend_amx_buffer_get_base,
|
||||
/* .init_tensor = */ ggml_backend_amx_buffer_init_tensor,
|
||||
/* .memset_tensor = */ ggml_backend_amx_buffer_memset_tensor,
|
||||
/* .set_tensor = */ ggml_backend_amx_buffer_set_tensor,
|
||||
/* .get_tensor = */ nullptr,
|
||||
/* .cpy_tensor = */ nullptr,
|
||||
/* .clear = */ ggml_backend_amx_buffer_clear,
|
||||
/* .reset = */ nullptr,
|
||||
};
|
||||
|
||||
static const char * ggml_backend_amx_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
|
||||
return "AMX";
|
||||
|
||||
GGML_UNUSED(buft);
|
||||
}
|
||||
|
||||
static ggml_backend_buffer_t ggml_backend_amx_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
||||
void * data = ggml_aligned_malloc(size);
|
||||
if (data == NULL) {
|
||||
fprintf(stderr, "%s: failed to allocate buffer of size %zu\n", __func__, size);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ggml_backend_buffer_init(buft, ggml_backend_amx_buffer_interface, data, size);
|
||||
}
|
||||
|
||||
static size_t ggml_backend_amx_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
|
||||
return TENSOR_ALIGNMENT;
|
||||
|
||||
GGML_UNUSED(buft);
|
||||
}
|
||||
|
||||
namespace ggml::cpu::amx {
|
||||
class extra_buffer_type : ggml::cpu::extra_buffer_type {
|
||||
bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override {
|
||||
// handle only 2d gemm for now
|
||||
auto is_contiguous_2d = [](const struct ggml_tensor * t) {
|
||||
return ggml_is_contiguous(t) && t->ne[3] == 1 && t->ne[2] == 1;
|
||||
};
|
||||
|
||||
if (op->op == GGML_OP_MUL_MAT && is_contiguous_2d(op->src[0]) && // src0 must be contiguous
|
||||
is_contiguous_2d(op->src[1]) && // src1 must be contiguous
|
||||
op->src[0]->buffer && op->src[0]->buffer->buft == ggml_backend_amx_buffer_type() &&
|
||||
op->ne[0] % (TILE_N * 2) == 0 && // out_features is 32x
|
||||
(qtype_has_amx_kernels(op->src[0]->type) || (op->src[0]->type == GGML_TYPE_F16))) {
|
||||
// src1 must be host buffer
|
||||
if (op->src[1]->buffer && !ggml_backend_buft_is_host(op->src[1]->buffer->buft)) {
|
||||
return false;
|
||||
}
|
||||
// src1 must be float32
|
||||
if (op->src[1]->type == GGML_TYPE_F32) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
ggml::cpu::tensor_traits * get_tensor_traits(const struct ggml_tensor * op) override {
|
||||
if (op->op == GGML_OP_MUL_MAT && op->src[0]->buffer &&
|
||||
op->src[0]->buffer->buft == ggml_backend_amx_buffer_type()) {
|
||||
return (ggml::cpu::tensor_traits *) op->src[0]->extra;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
};
|
||||
} // namespace ggml::cpu::amx
|
||||
|
||||
static size_t ggml_backend_amx_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
|
||||
return ggml_backend_amx_get_alloc_size(tensor);
|
||||
|
||||
GGML_UNUSED(buft);
|
||||
}
|
||||
|
||||
#define ARCH_GET_XCOMP_PERM 0x1022
|
||||
#define ARCH_REQ_XCOMP_PERM 0x1023
|
||||
#define XFEATURE_XTILECFG 17
|
||||
#define XFEATURE_XTILEDATA 18
|
||||
|
||||
static bool ggml_amx_init() {
|
||||
#if defined(__gnu_linux__)
|
||||
if (syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA)) {
|
||||
fprintf(stderr, "AMX is not ready to be used!\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
#elif defined(_WIN32)
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_amx_buffer_type() {
|
||||
static struct ggml_backend_buffer_type ggml_backend_buffer_type_amx = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_amx_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_amx_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_amx_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_amx_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ nullptr,
|
||||
},
|
||||
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
|
||||
/* .context = */ new ggml::cpu::amx::extra_buffer_type(),
|
||||
};
|
||||
|
||||
if (!ggml_amx_init()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return &ggml_backend_buffer_type_amx;
|
||||
}
|
||||
|
||||
#endif // defined(__AMX_INT8__) && defined(__AVX512VNNI__)
|
|
@ -0,0 +1,8 @@
|
|||
#include "ggml-backend.h"
|
||||
#include "ggml-cpu-impl.h"
|
||||
|
||||
// GGML internal header
|
||||
|
||||
#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
|
||||
ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void);
|
||||
#endif
|
91
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/common.h
vendored
Normal file
91
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/common.h
vendored
Normal file
|
@ -0,0 +1,91 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-cpu-impl.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
||||
#if defined(GGML_USE_OPENMP)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#define TILE_M 16
|
||||
#define TILE_N 16
|
||||
#define TILE_K 32
|
||||
#define VNNI_BLK 4
|
||||
|
||||
#define AMX_BLK_SIZE 32
|
||||
|
||||
#define TMM0 0
|
||||
#define TMM1 1
|
||||
#define TMM2 2
|
||||
#define TMM3 3
|
||||
#define TMM4 4
|
||||
#define TMM5 5
|
||||
#define TMM6 6
|
||||
#define TMM7 7
|
||||
|
||||
// parallel routines
|
||||
template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
|
||||
inline T div_up(T x, T y) { return (x + y - 1) / y; }
|
||||
|
||||
template <typename T>
|
||||
inline void balance211(T n, T nth, T ith, T& n_start, T& n_end) {
|
||||
#if 0
|
||||
// onednn partition pattern
|
||||
T& n_my = n_end;
|
||||
if (nth <= 1 || n == 0) {
|
||||
n_start = 0;
|
||||
n_my = n;
|
||||
} else {
|
||||
T n1 = div_up(n, nth);
|
||||
T n2 = n1 - 1;
|
||||
T T1 = n - n2 * nth;
|
||||
n_my = ith < T1 ? n1 : n2;
|
||||
n_start = ith <= T1 ? ith*n1 : T1 * n1 + (ith - T1) * n2;
|
||||
}
|
||||
n_end += n_start;
|
||||
#else
|
||||
// pytorch aten partition pattern
|
||||
T n_my = div_up(n, nth);
|
||||
n_start = ith * n_my;
|
||||
n_end = std::min(n_start + n_my, n);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename func_t>
|
||||
inline void parallel_for(int n, const func_t& f) {
|
||||
#if defined(GGML_USE_OPENMP)
|
||||
#pragma omp parallel
|
||||
{
|
||||
int nth = omp_get_num_threads();
|
||||
int ith = omp_get_thread_num();
|
||||
int tbegin, tend;
|
||||
balance211(n, nth, ith, tbegin, tend);
|
||||
f(tbegin, tend);
|
||||
}
|
||||
#else
|
||||
f(0, n);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename func_t>
|
||||
inline void parallel_for_ggml(const ggml_compute_params * params, int n, const func_t & f) {
|
||||
int tbegin, tend;
|
||||
balance211(n, params->nth, params->ith, tbegin, tend);
|
||||
f(tbegin, tend);
|
||||
}
|
||||
|
||||
// quantized types that have AMX support
|
||||
inline bool qtype_has_amx_kernels(const enum ggml_type type) {
|
||||
// TODO: fix padding for vnni format
|
||||
return (type == GGML_TYPE_Q4_0) ||
|
||||
(type == GGML_TYPE_Q4_1) ||
|
||||
(type == GGML_TYPE_Q8_0) ||
|
||||
(type == GGML_TYPE_Q4_K) ||
|
||||
(type == GGML_TYPE_Q5_K) ||
|
||||
(type == GGML_TYPE_Q6_K) ||
|
||||
(type == GGML_TYPE_IQ4_XS);
|
||||
}
|
2511
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/mmq.cpp
vendored
Normal file
2511
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/amx/mmq.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,10 @@
|
|||
#pragma once
|
||||
#include "common.h"
|
||||
|
||||
size_t ggml_backend_amx_desired_wsize(const struct ggml_tensor * dst);
|
||||
|
||||
size_t ggml_backend_amx_get_alloc_size(const struct ggml_tensor * tensor);
|
||||
|
||||
void ggml_backend_amx_convert_weight(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||
|
||||
void ggml_backend_amx_mul_mat(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
100
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake
vendored
Normal file
100
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake
vendored
Normal file
|
@ -0,0 +1,100 @@
|
|||
include(CheckCSourceRuns)
|
||||
|
||||
set(AVX_CODE "
|
||||
#include <immintrin.h>
|
||||
int main()
|
||||
{
|
||||
__m256 a;
|
||||
a = _mm256_set1_ps(0);
|
||||
return 0;
|
||||
}
|
||||
")
|
||||
|
||||
set(AVX512_CODE "
|
||||
#include <immintrin.h>
|
||||
int main()
|
||||
{
|
||||
__m512i a = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0);
|
||||
__m512i b = a;
|
||||
__mmask64 equality_mask = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_EQ);
|
||||
return 0;
|
||||
}
|
||||
")
|
||||
|
||||
set(AVX2_CODE "
|
||||
#include <immintrin.h>
|
||||
int main()
|
||||
{
|
||||
__m256i a = {0};
|
||||
a = _mm256_abs_epi16(a);
|
||||
__m256i x;
|
||||
_mm256_extract_epi64(x, 0); // we rely on this in our AVX2 code
|
||||
return 0;
|
||||
}
|
||||
")
|
||||
|
||||
set(FMA_CODE "
|
||||
#include <immintrin.h>
|
||||
int main()
|
||||
{
|
||||
__m256 acc = _mm256_setzero_ps();
|
||||
const __m256 d = _mm256_setzero_ps();
|
||||
const __m256 p = _mm256_setzero_ps();
|
||||
acc = _mm256_fmadd_ps( d, p, acc );
|
||||
return 0;
|
||||
}
|
||||
")
|
||||
|
||||
macro(check_sse type flags)
|
||||
set(__FLAG_I 1)
|
||||
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
|
||||
foreach (__FLAG ${flags})
|
||||
if (NOT ${type}_FOUND)
|
||||
set(CMAKE_REQUIRED_FLAGS ${__FLAG})
|
||||
check_c_source_runs("${${type}_CODE}" HAS_${type}_${__FLAG_I})
|
||||
if (HAS_${type}_${__FLAG_I})
|
||||
set(${type}_FOUND TRUE CACHE BOOL "${type} support")
|
||||
set(${type}_FLAGS "${__FLAG}" CACHE STRING "${type} flags")
|
||||
endif()
|
||||
math(EXPR __FLAG_I "${__FLAG_I}+1")
|
||||
endif()
|
||||
endforeach()
|
||||
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
|
||||
|
||||
if (NOT ${type}_FOUND)
|
||||
set(${type}_FOUND FALSE CACHE BOOL "${type} support")
|
||||
set(${type}_FLAGS "" CACHE STRING "${type} flags")
|
||||
endif()
|
||||
|
||||
mark_as_advanced(${type}_FOUND ${type}_FLAGS)
|
||||
endmacro()
|
||||
|
||||
# flags are for MSVC only!
|
||||
check_sse("AVX" " ;/arch:AVX")
|
||||
if (NOT ${AVX_FOUND})
|
||||
set(GGML_AVX OFF)
|
||||
else()
|
||||
set(GGML_AVX ON)
|
||||
endif()
|
||||
|
||||
check_sse("AVX2" " ;/arch:AVX2")
|
||||
check_sse("FMA" " ;/arch:AVX2")
|
||||
if ((NOT ${AVX2_FOUND}) OR (NOT ${FMA_FOUND}))
|
||||
set(GGML_AVX2 OFF)
|
||||
else()
|
||||
set(GGML_AVX2 ON)
|
||||
endif()
|
||||
|
||||
check_sse("AVX512" " ;/arch:AVX512")
|
||||
if (NOT ${AVX512_FOUND})
|
||||
set(GGML_AVX512 OFF)
|
||||
else()
|
||||
set(GGML_AVX512 ON)
|
||||
endif()
|
323
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp
vendored
Normal file
323
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp
vendored
Normal file
|
@ -0,0 +1,323 @@
|
|||
#include "ggml-backend-impl.h"
|
||||
|
||||
#if defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64))
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
#include <bitset>
|
||||
#include <array>
|
||||
#include <string>
|
||||
|
||||
// ref: https://cdrdv2-public.intel.com/782156/325383-sdm-vol-2abcd.pdf
|
||||
struct cpuid_x86 {
|
||||
bool SSE3(void) { return f_1_ecx[0]; }
|
||||
bool PCLMULQDQ(void) { return f_1_ecx[1]; }
|
||||
bool MONITOR(void) { return f_1_ecx[3]; }
|
||||
bool SSSE3(void) { return f_1_ecx[9]; }
|
||||
bool FMA(void) { return f_1_ecx[12]; }
|
||||
bool CMPXCHG16B(void) { return f_1_ecx[13]; }
|
||||
bool SSE41(void) { return f_1_ecx[19]; }
|
||||
bool SSE42(void) { return f_1_ecx[20]; }
|
||||
bool MOVBE(void) { return f_1_ecx[22]; }
|
||||
bool POPCNT(void) { return f_1_ecx[23]; }
|
||||
bool AES(void) { return f_1_ecx[25]; }
|
||||
bool XSAVE(void) { return f_1_ecx[26]; }
|
||||
bool OSXSAVE(void) { return f_1_ecx[27]; }
|
||||
bool AVX(void) { return f_1_ecx[28]; }
|
||||
bool F16C(void) { return f_1_ecx[29]; }
|
||||
bool RDRAND(void) { return f_1_ecx[30]; }
|
||||
|
||||
bool MSR(void) { return f_1_edx[5]; }
|
||||
bool CX8(void) { return f_1_edx[8]; }
|
||||
bool SEP(void) { return f_1_edx[11]; }
|
||||
bool CMOV(void) { return f_1_edx[15]; }
|
||||
bool CLFSH(void) { return f_1_edx[19]; }
|
||||
bool MMX(void) { return f_1_edx[23]; }
|
||||
bool FXSR(void) { return f_1_edx[24]; }
|
||||
bool SSE(void) { return f_1_edx[25]; }
|
||||
bool SSE2(void) { return f_1_edx[26]; }
|
||||
|
||||
bool FSGSBASE(void) { return f_7_ebx[0]; }
|
||||
bool BMI1(void) { return f_7_ebx[3]; }
|
||||
bool HLE(void) { return is_intel && f_7_ebx[4]; }
|
||||
bool AVX2(void) { return f_7_ebx[5]; }
|
||||
bool BMI2(void) { return f_7_ebx[8]; }
|
||||
bool ERMS(void) { return f_7_ebx[9]; }
|
||||
bool INVPCID(void) { return f_7_ebx[10]; }
|
||||
bool RTM(void) { return is_intel && f_7_ebx[11]; }
|
||||
bool AVX512F(void) { return f_7_ebx[16]; }
|
||||
bool AVX512DQ(void) { return f_7_ebx[17]; }
|
||||
bool RDSEED(void) { return f_7_ebx[18]; }
|
||||
bool ADX(void) { return f_7_ebx[19]; }
|
||||
bool AVX512PF(void) { return f_7_ebx[26]; }
|
||||
bool AVX512ER(void) { return f_7_ebx[27]; }
|
||||
bool AVX512CD(void) { return f_7_ebx[28]; }
|
||||
bool AVX512BW(void) { return f_7_ebx[30]; }
|
||||
bool AVX512VL(void) { return f_7_ebx[31]; }
|
||||
|
||||
bool SHA(void) { return f_7_ebx[29]; }
|
||||
|
||||
bool PREFETCHWT1(void) { return f_7_ecx[0]; }
|
||||
|
||||
bool LAHF(void) { return f_81_ecx[0]; }
|
||||
bool LZCNT(void) { return is_intel && f_81_ecx[5]; }
|
||||
bool ABM(void) { return is_amd && f_81_ecx[5]; }
|
||||
bool SSE4a(void) { return is_amd && f_81_ecx[6]; }
|
||||
bool XOP(void) { return is_amd && f_81_ecx[11]; }
|
||||
bool TBM(void) { return is_amd && f_81_ecx[21]; }
|
||||
|
||||
bool SYSCALL(void) { return is_intel && f_81_edx[11]; }
|
||||
bool MMXEXT(void) { return is_amd && f_81_edx[22]; }
|
||||
bool RDTSCP(void) { return is_intel && f_81_edx[27]; }
|
||||
bool _3DNOWEXT(void) { return is_amd && f_81_edx[30]; }
|
||||
bool _3DNOW(void) { return is_amd && f_81_edx[31]; }
|
||||
|
||||
bool AVX512_VBMI(void) { return f_7_ecx[1]; }
|
||||
bool AVX512_VNNI(void) { return f_7_ecx[11]; }
|
||||
bool AVX512_FP16(void) { return f_7_edx[23]; }
|
||||
bool AVX512_BF16(void) { return f_7_1_eax[5]; }
|
||||
bool AVX_VNNI(void) { return f_7_1_eax[4]; }
|
||||
|
||||
bool AMX_TILE(void) { return f_7_edx[24]; }
|
||||
bool AMX_INT8(void) { return f_7_edx[25]; }
|
||||
bool AMX_FP16(void) { return f_7_1_eax[21]; }
|
||||
bool AMX_BF16(void) { return f_7_edx[22]; }
|
||||
|
||||
#ifdef _MSC_VER
|
||||
static void cpuid(int cpu_info[4], int eax) {
|
||||
__cpuid(cpu_info, eax);
|
||||
}
|
||||
static void cpuidex(int cpu_info[4], int eax, int ecx) {
|
||||
__cpuidex(cpu_info, eax, ecx);
|
||||
}
|
||||
#else
|
||||
static void cpuid(int cpu_info[4], int eax) {
|
||||
__asm__ __volatile__(
|
||||
"cpuid"
|
||||
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
|
||||
: "a"(eax), "c"(0));
|
||||
}
|
||||
static void cpuidex(int cpu_info[4], int eax, int ecx) {
|
||||
__asm__ __volatile__(
|
||||
"cpuid"
|
||||
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
|
||||
: "a"(eax), "c"(ecx));
|
||||
}
|
||||
#endif
|
||||
|
||||
cpuid_x86() {
|
||||
std::array<int, 4> cpui;
|
||||
std::vector<std::array<int, 4>> data;
|
||||
|
||||
// calling __cpuid with 0x0 as the function_id argument
|
||||
// gets the number of the highest valid function ID.
|
||||
cpuid(cpui.data(), 0);
|
||||
int n_ids = cpui[0];
|
||||
|
||||
for (int i = 0; i <= n_ids; ++i) {
|
||||
cpuidex(cpui.data(), i, 0);
|
||||
data.push_back(cpui);
|
||||
}
|
||||
|
||||
// capture vendor string
|
||||
char vendor[0x20] = {};
|
||||
*reinterpret_cast<int *>(vendor) = data[0][1];
|
||||
*reinterpret_cast<int *>(vendor + 4) = data[0][3];
|
||||
*reinterpret_cast<int *>(vendor + 8) = data[0][2];
|
||||
this->vendor = vendor;
|
||||
if (this->vendor == "GenuineIntel") {
|
||||
is_intel = true;
|
||||
} else if (this->vendor == "AuthenticAMD") {
|
||||
is_amd = true;
|
||||
}
|
||||
|
||||
// load bitset with flags for function 0x00000001
|
||||
if (n_ids >= 1) {
|
||||
f_1_ecx = data[1][2];
|
||||
f_1_edx = data[1][3];
|
||||
}
|
||||
|
||||
// load bitset with flags for function 0x00000007
|
||||
if (n_ids >= 7) {
|
||||
f_7_ebx = data[7][1];
|
||||
f_7_ecx = data[7][2];
|
||||
f_7_edx = data[7][3];
|
||||
cpuidex(cpui.data(), 7, 1);
|
||||
f_7_1_eax = cpui[0];
|
||||
}
|
||||
|
||||
// calling __cpuid with 0x80000000 as the function_id argument
|
||||
// gets the number of the highest valid extended ID.
|
||||
cpuid(cpui.data(), 0x80000000);
|
||||
unsigned int n_ex_ids = cpui[0];
|
||||
|
||||
std::vector<std::array<int, 4>> ext_data;
|
||||
for (unsigned int i = 0x80000000; i <= n_ex_ids; ++i) {
|
||||
cpuidex(cpui.data(), i, 0);
|
||||
ext_data.push_back(cpui);
|
||||
}
|
||||
|
||||
// load bitset with flags for function 0x80000001
|
||||
if (n_ex_ids >= 0x80000001) {
|
||||
f_81_ecx = ext_data[1][2];
|
||||
f_81_edx = ext_data[1][3];
|
||||
}
|
||||
|
||||
// interpret CPU brand string if reported
|
||||
char brand[0x40] = {};
|
||||
if (n_ex_ids >= 0x80000004) {
|
||||
std::memcpy(brand, ext_data[2].data(), sizeof(cpui));
|
||||
std::memcpy(brand + 16, ext_data[3].data(), sizeof(cpui));
|
||||
std::memcpy(brand + 32, ext_data[4].data(), sizeof(cpui));
|
||||
this->brand = brand;
|
||||
}
|
||||
}
|
||||
|
||||
bool is_intel = false;
|
||||
bool is_amd = false;
|
||||
std::string vendor;
|
||||
std::string brand;
|
||||
std::bitset<32> f_1_ecx;
|
||||
std::bitset<32> f_1_edx;
|
||||
std::bitset<32> f_7_ebx;
|
||||
std::bitset<32> f_7_ecx;
|
||||
std::bitset<32> f_7_edx;
|
||||
std::bitset<32> f_7_1_eax;
|
||||
std::bitset<32> f_81_ecx;
|
||||
std::bitset<32> f_81_edx;
|
||||
};
|
||||
|
||||
#if 0
|
||||
void test_x86_is() {
|
||||
cpuid_x86 is;
|
||||
printf("CPU Vendor: %s\n", is.vendor.c_str());
|
||||
printf("Brand: %s\n", is.brand.c_str());
|
||||
printf("is_intel: %d\n", is.is_intel);
|
||||
printf("is_amd: %d\n", is.is_amd);
|
||||
printf("sse3: %d\n", is.SSE3());
|
||||
printf("pclmulqdq: %d\n", is.PCLMULQDQ());
|
||||
printf("ssse3: %d\n", is.SSSE3());
|
||||
printf("fma: %d\n", is.FMA());
|
||||
printf("cmpxchg16b: %d\n", is.CMPXCHG16B());
|
||||
printf("sse41: %d\n", is.SSE41());
|
||||
printf("sse42: %d\n", is.SSE42());
|
||||
printf("movbe: %d\n", is.MOVBE());
|
||||
printf("popcnt: %d\n", is.POPCNT());
|
||||
printf("aes: %d\n", is.AES());
|
||||
printf("xsave: %d\n", is.XSAVE());
|
||||
printf("osxsave: %d\n", is.OSXSAVE());
|
||||
printf("avx: %d\n", is.AVX());
|
||||
printf("f16c: %d\n", is.F16C());
|
||||
printf("rdrand: %d\n", is.RDRAND());
|
||||
printf("msr: %d\n", is.MSR());
|
||||
printf("cx8: %d\n", is.CX8());
|
||||
printf("sep: %d\n", is.SEP());
|
||||
printf("cmov: %d\n", is.CMOV());
|
||||
printf("clflush: %d\n", is.CLFSH());
|
||||
printf("mmx: %d\n", is.MMX());
|
||||
printf("fxsr: %d\n", is.FXSR());
|
||||
printf("sse: %d\n", is.SSE());
|
||||
printf("sse2: %d\n", is.SSE2());
|
||||
printf("fsgsbase: %d\n", is.FSGSBASE());
|
||||
printf("bmi1: %d\n", is.BMI1());
|
||||
printf("hle: %d\n", is.HLE());
|
||||
printf("avx2: %d\n", is.AVX2());
|
||||
printf("bmi2: %d\n", is.BMI2());
|
||||
printf("erms: %d\n", is.ERMS());
|
||||
printf("invpcid: %d\n", is.INVPCID());
|
||||
printf("rtm: %d\n", is.RTM());
|
||||
printf("avx512f: %d\n", is.AVX512F());
|
||||
printf("rdseed: %d\n", is.RDSEED());
|
||||
printf("adx: %d\n", is.ADX());
|
||||
printf("avx512pf: %d\n", is.AVX512PF());
|
||||
printf("avx512er: %d\n", is.AVX512ER());
|
||||
printf("avx512cd: %d\n", is.AVX512CD());
|
||||
printf("sha: %d\n", is.SHA());
|
||||
printf("prefetchwt1: %d\n", is.PREFETCHWT1());
|
||||
printf("lahf: %d\n", is.LAHF());
|
||||
printf("lzcnt: %d\n", is.LZCNT());
|
||||
printf("abm: %d\n", is.ABM());
|
||||
printf("sse4a: %d\n", is.SSE4a());
|
||||
printf("xop: %d\n", is.XOP());
|
||||
printf("tbm: %d\n", is.TBM());
|
||||
printf("syscall: %d\n", is.SYSCALL());
|
||||
printf("mmxext: %d\n", is.MMXEXT());
|
||||
printf("rdtscp: %d\n", is.RDTSCP());
|
||||
printf("3dnowext: %d\n", is._3DNOWEXT());
|
||||
printf("3dnow: %d\n", is._3DNOW());
|
||||
printf("avx512_vbmi: %d\n", is.AVX512_VBMI());
|
||||
printf("avx512_vnni: %d\n", is.AVX512_VNNI());
|
||||
printf("avx512_fp16: %d\n", is.AVX512_FP16());
|
||||
printf("avx512_bf16: %d\n", is.AVX512_BF16());
|
||||
printf("amx_tile: %d\n", is.AMX_TILE());
|
||||
printf("amx_int8: %d\n", is.AMX_INT8());
|
||||
printf("amx_fp16: %d\n", is.AMX_FP16());
|
||||
printf("amx_bf16: %d\n", is.AMX_BF16());
|
||||
}
|
||||
#endif
|
||||
|
||||
static int ggml_backend_cpu_x86_score() {
|
||||
// FIXME: this does not check for OS support
|
||||
|
||||
int score = 0;
|
||||
cpuid_x86 is;
|
||||
|
||||
#ifdef GGML_FMA
|
||||
if (!is.FMA()) { return 0; }
|
||||
score += 1;
|
||||
#endif
|
||||
#ifdef GGML_F16C
|
||||
if (!is.F16C()) { return 0; }
|
||||
score += 1<<1;
|
||||
#endif
|
||||
#ifdef GGML_SSE42
|
||||
if (!is.SSE42()) { return 0; }
|
||||
score += 1<<2;
|
||||
#endif
|
||||
#ifdef GGML_AVX
|
||||
if (!is.AVX()) { return 0; }
|
||||
score += 1<<4;
|
||||
#endif
|
||||
#ifdef GGML_AVX2
|
||||
if (!is.AVX2()) { return 0; }
|
||||
score += 1<<5;
|
||||
#endif
|
||||
#ifdef GGML_AVX_VNNI
|
||||
if (!is.AVX_VNNI()) { return 0; }
|
||||
score += 1<<6;
|
||||
#endif
|
||||
#ifdef GGML_AVX512
|
||||
if (!is.AVX512F()) { return 0; }
|
||||
if (!is.AVX512CD()) { return 0; }
|
||||
if (!is.AVX512VL()) { return 0; }
|
||||
if (!is.AVX512DQ()) { return 0; }
|
||||
if (!is.AVX512BW()) { return 0; }
|
||||
score += 1<<7;
|
||||
#endif
|
||||
#ifdef GGML_AVX512_VBMI
|
||||
if (!is.AVX512_VBMI()) { return 0; }
|
||||
score += 1<<8;
|
||||
#endif
|
||||
#ifdef GGML_AVX512_BF16
|
||||
if (!is.AVX512_BF16()) { return 0; }
|
||||
score += 1<<9;
|
||||
#endif
|
||||
#ifdef GGML_AVX512_VNNI
|
||||
if (!is.AVX512_VNNI()) { return 0; }
|
||||
score += 1<<10;
|
||||
#endif
|
||||
#ifdef GGML_AMX_INT8
|
||||
if (!is.AMX_INT8()) { return 0; }
|
||||
score += 1<<11;
|
||||
#endif
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_x86_score)
|
||||
|
||||
#endif // defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64))
|
4247
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
vendored
Normal file
4247
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
8
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h
vendored
Normal file
8
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h
vendored
Normal file
|
@ -0,0 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml-cpu-traits.h"
|
||||
#include "ggml.h"
|
||||
|
||||
// GGML internal header
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);
|
55
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp
vendored
Normal file
55
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp
vendored
Normal file
|
@ -0,0 +1,55 @@
|
|||
#ifdef GGML_USE_CPU_HBM
|
||||
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-cpu.h"
|
||||
#include "ggml-impl.h"
|
||||
|
||||
#include "ggml-cpu-hbm.h"
|
||||
|
||||
// buffer type HBM
|
||||
|
||||
#include <hbwmalloc.h>
|
||||
|
||||
static const char * ggml_backend_cpu_hbm_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
|
||||
return "CPU_HBM";
|
||||
|
||||
GGML_UNUSED(buft);
|
||||
}
|
||||
|
||||
static void ggml_backend_cpu_hbm_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||
hbw_free(buffer->context);
|
||||
}
|
||||
|
||||
static ggml_backend_buffer_t ggml_backend_cpu_hbm_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
|
||||
size_t size) {
|
||||
void * ptr;
|
||||
int result = hbw_posix_memalign(&ptr, ggml_backend_cpu_buffer_type_get_alignment(buft), size);
|
||||
if (result != 0) {
|
||||
GGML_LOG_ERROR("failed to allocate HBM buffer of size %zu\n", size);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(ptr, size);
|
||||
buffer->buft = buft;
|
||||
buffer->iface.free_buffer = ggml_backend_cpu_hbm_buffer_free_buffer;
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void) {
|
||||
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_hbm = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_cpu_hbm_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes
|
||||
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
|
||||
},
|
||||
/* .context = */ nullptr,
|
||||
};
|
||||
|
||||
return &ggml_backend_cpu_buffer_type_hbm;
|
||||
}
|
||||
#endif
|
8
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h
vendored
Normal file
8
packages/app-mobile/android/vendor/whisper.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h
vendored
Normal file
|
@ -0,0 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml.h"
|
||||
|
||||
// GGML CPU internal header
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue