Many new PCI IDs with Maxwell and Volta support

This commit is contained in:
Krutav Shah 2021-04-11 09:14:11 -07:00 committed by GitHub
parent cce7b5d824
commit 2fe296a81e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 1460 additions and 1387 deletions

View file

@ -1,220 +1,261 @@
#!/bin/python3 #!/bin/python3
# #
# vGPU unlock script for consumer GPUs. # vGPU unlock script for consumer GPUs.
# #
# Copyright 2021 Jonathan Johansson # Copyright 2021 Jonathan Johansson
# This file is part of the "vgpu_unlock" project, and is distributed under the MIT License. # This file is part of the "vgpu_unlock" project, and is distributed under the MIT License.
# See the LICENSE file for more details. # See the LICENSE file for more details.
# #
# Contributions from the vGPU Unlocking community included :)
#
import errno
import frida import errno
import os import frida
import queue import os
import subprocess import queue
import sys import subprocess
import time import sys
import time
script_source = r"""
// Value of the "request" argument used by nvidia-vgpud and nvidia-vgpu-mgr script_source = r"""
// when calling ioctl to read the PCI device ID and type (and possibly // Value of the "request" argument used by nvidia-vgpud and nvidia-vgpu-mgr
// other things) from the GPU. // when calling ioctl to read the PCI device ID and type (and possibly
var REQ_QUERY_GPU = ptr("0xC020462A"); // other things) from the GPU.
var REQ_QUERY_GPU = ptr("0xC020462A");
// When issuing ioctl with REQ_QUERY_GPU then the "argp" argument is a
// pointer to a structure something like this: // When issuing ioctl with REQ_QUERY_GPU then the "argp" argument is a
// // pointer to a structure something like this:
// struct arg { //
// uint32_t unknown_1; // Initialized prior to call. // struct arg {
// uint32_t unknown_2; // Initialized prior to call. // uint32_t unknown_1; // Initialized prior to call.
// uint32_t op_type; // Operation type, see comment below. // uint32_t unknown_2; // Initialized prior to call.
// uint32_t padding_1; // Always set to 0 prior to call. // uint32_t op_type; // Operation type, see comment below.
// void* result; // Pointer initialized prior to call. // uint32_t padding_1; // Always set to 0 prior to call.
// // Pointee initialized to 0 prior to call. // void* result; // Pointer initialized prior to call.
// // Pointee is written by ioctl call. // // Pointee initialized to 0 prior to call.
// uint32_t unknown_4; // Set to 0x10 for READ_PCI_ID and set to 4 for // // Pointee is written by ioctl call.
// READ_DEV_TYPE prior to call. // uint32_t unknown_4; // Set to 0x10 for READ_PCI_ID and set to 4 for
// uint32_t status; // Written by ioctl call. See comment below. // READ_DEV_TYPE prior to call.
// } // uint32_t status; // Written by ioctl call. See comment below.
// }
// These are the observed values for the op_type member.
var OP_READ_DEV_TYPE = 0x800289; // *result type is uint64_t. // These are the observed values for the op_type member.
var OP_READ_PCI_ID = 0x20801801; // *result type is uint16_t[4], the second var OP_READ_DEV_TYPE = 0x800289; // *result type is uint64_t.
// element (index 1) is the device ID, the var OP_READ_PCI_ID = 0x20801801; // *result type is uint16_t[4], the second
// forth element (index 3) is the subsystem // element (index 1) is the device ID, the
// ID. // forth element (index 3) is the subsystem
// ID.
// nvidia-vgpu-mgr expects this value for a vGPU capable GPU.
var DEV_TYPE_VGPU_CAPABLE = uint64(3); // nvidia-vgpu-mgr expects this value for a vGPU capable GPU.
var DEV_TYPE_VGPU_CAPABLE = uint64(3);
// When ioctl returns success (retval >= 0) but sets the status value of
// the arg structure to 3 then nvidia-vgpud will sleep for a bit (first // When ioctl returns success (retval >= 0) but sets the status value of
// 0.1s then 1s then 10s) then issue the same ioctl call again until the // the arg structure to 3 then nvidia-vgpud will sleep for a bit (first
// status differs from 3. It will attempt this for up to 24h before giving // 0.1s then 1s then 10s) then issue the same ioctl call again until the
// up. // status differs from 3. It will attempt this for up to 24h before giving
var STATUS_TRY_AGAIN = 3; // up.
var STATUS_TRY_AGAIN = 3;
Interceptor.attach(Module.getExportByName(null, "ioctl"), {
onEnter(args) { Interceptor.attach(Module.getExportByName(null, "ioctl"), {
this.request = args[1]; onEnter(args) {
this.argp = args[2]; this.request = args[1];
}, this.argp = args[2];
onLeave(retVal) { },
if(!this.request.equals(REQ_QUERY_GPU)) { onLeave(retVal) {
// Not a call we care about. if(!this.request.equals(REQ_QUERY_GPU)) {
return; // Not a call we care about.
} return;
}
if(retVal.toInt32() < 0) {
// Call failed. if(retVal.toInt32() < 0) {
return; // Call failed.
} return;
}
// Lookup status value according to struct above.
var status = this.argp.add(0x1C).readU32(); // Lookup status value according to struct above.
var status = this.argp.add(0x1C).readU32();
if(status == STATUS_TRY_AGAIN) {
// Driver will try again. if(status == STATUS_TRY_AGAIN) {
return; // Driver will try again.
} return;
}
var op_type = this.argp.add(8).readU32();
var op_type = this.argp.add(8).readU32();
if(op_type == OP_READ_PCI_ID) {
// Lookup address of the device and subsystem IDs. if(op_type == OP_READ_PCI_ID) {
var devid_ptr = this.argp.add(0x10).readPointer().add(2); // Lookup address of the device and subsystem IDs.
var subsysid_ptr = this.argp.add(0x10).readPointer().add(6); var devid_ptr = this.argp.add(0x10).readPointer().add(2);
var subsysid_ptr = this.argp.add(0x10).readPointer().add(6);
// Now we replace the device ID with a spoofed value that needs to
// be determined such that the spoofed value represents a GPU with // Now we replace the device ID with a spoofed value that needs to
// vGPU support that uses the same GPU chip as our actual GPU. // be determined such that the spoofed value represents a GPU with
var actual_devid = devid_ptr.readU16(); // vGPU support that uses the same GPU chip as our actual GPU.
var spoofed_devid = actual_devid; var actual_devid = devid_ptr.readU16();
var actual_subsysid = subsysid_ptr.readU16(); var spoofed_devid = actual_devid;
var spoofed_subsysid = actual_subsysid; var actual_subsysid = subsysid_ptr.readU16();
var spoofed_subsysid = actual_subsysid;
// GP102
if(actual_devid == 0x1b00 || // TITAN X (Pascal) // GK104 (For experimental testing)
actual_devid == 0x1b02 || // TITAN Xp if(actual_devid == 0x1183 || // GTX 660 Ti
actual_devid == 0x1b06 || // GTX 1080 Ti actual_devid == 0x1189 || // GTX 670
actual_devid == 0x1b30) { // Quadro P6000 actual_devid == 0x1180 || // GTX 680
spoofed_devid = 0x1b38; // Tesla P40 actual_devid == 0x1188 || // GTX 690
} actual_devid == 0x1187 || // GTX 760
actual_devid == 0x11ba) { // Quadro K5000
// GP104 spoofed_devid = 0x13f2; // Tesla M60 for testing purposes
if(actual_devid == 0x1b80 || // GTX 1080 }
actual_devid == 0x1b81 || // GTX 1070
actual_devid == 0x1b82 || // GTX 1070 Ti // GM204
actual_devid == 0x1b83 || // GTX 1060 6GB if(actual_devid == 0x13c3 || // GTX 960 OEM
actual_devid == 0x1b84 || // GTX 1060 3GB actual_devid == 0x13d9 || // GTX 965M
actual_devid == 0x1bb0) { // Quadro P5000 actual_devid == 0x13d8 || // GTX 970M
spoofed_devid = 0x1bb3; // Tesla P4 actual_devid == 0x13c2 || // GTX 970
} actual_devid == 0x13d7 || // GTX 980M
actual_devid == 0x13c0 || // GTX 980
// TU102 actual_devid == 0x13f1 || // Quadro M4000
if(actual_devid == 0x1e02 || // TITAN RTX actual_devid == 0x13f0) { // Quadro M5000
actual_devid == 0x1e04 || // RTX 2080 Ti spoofed_devid = 0x13f2; // Tesla M60
actual_devid == 0x1e07) { // RTX 2080 Ti Rev. A }
spoofed_devid = 0x1e30; // Quadro RTX 6000
spoofed_subsysid = 0x12ba; // GP102
} if(actual_devid == 0x1b00 || // TITAN X (Pascal)
actual_devid == 0x1b02 || // TITAN Xp
// TU104 actual_devid == 0x1b06 || // GTX 1080 Ti
if(actual_devid == 0x1e81 || // RTX 2080 Super actual_devid == 0x1b30) { // Quadro P6000
actual_devid == 0x1e82 || // RTX 2080 spoofed_devid = 0x1b38; // Tesla P40
actual_devid == 0x1e84 || // RTX 2070 Super }
actual_devid == 0x1e87 || // RTX 2080 Rev. A
actual_devid == 0x1e89 || // RTX 2060 // GP104
actual_devid == 0x1eb0 || // Quadro RTX 5000 if(actual_devid == 0x1b80 || // GTX 1080
actual_devid == 0x1eb1) { // Quadro RTX 4000 actual_devid == 0x1b81 || // GTX 1070
spoofed_devid = 0x1eb8; // Tesla T4 actual_devid == 0x1b82 || // GTX 1070 Ti
} actual_devid == 0x1b83 || // GTX 1060 6GB GP104 Refresh
actual_devid == 0x1b84 || // GTX 1060 3GB GP104 Refresh
// GA102 actual_devid == 0x1bb0) { // Quadro P5000
if(actual_devid == 0x2204 || // RTX 3090 spoofed_devid = 0x1bb3; // Tesla P4
actual_devid == 0x2205 || // RTX 3080 Ti }
actual_devid == 0x2206) { // RTX 3080
spoofed_devid = 0x2235; // RTX A40 // GP106 (Somehow works with Tesla P4 ID)
} if(actual_devid == 0x1c03 || // GTX 1060 6GB
actual_devid == 0x1c04 || // GTX 1060 5GB
devid_ptr.writeU16(spoofed_devid); actual_devid == 0x1c02 || // GTX 1060 3GB
subsysid_ptr.writeU16(spoofed_subsysid); actual_devid == 0x1c07 || // P106-100 6GB
} actual_devid == 0x1c09 || // P106-90 3GB
actual_devid == 0x1c30 || // Quadro P2000
if(op_type == OP_READ_DEV_TYPE) { actual_devid == 0x1c31) { // Quadro P2200
// Set device type to vGPU capable. spoofed_devid = 0x1bb3; // Tesla P4
var dev_type_ptr = this.argp.add(0x10).readPointer(); }
dev_type_ptr.writeU64(DEV_TYPE_VGPU_CAPABLE);
} // GV100 (For the one person who owns a Titan Volta)
} if(actual_devid == 0x1d81 || // TITAN V
}); actual_devid == 0x1db6) { // TITAN V CEO Edition 32GB
""" actual_devid == 0x1dbA) { // Quadro GV100
spoofed_devid = 0x1db4; // Tesla V100
device = frida.get_local_device() }
child_processes = queue.Queue()
// TU102
def instrument(pid): if(actual_devid == 0x1e02 || // TITAN RTX
"""Instrument and resume process. actual_devid == 0x1e04 || // RTX 2080 Ti
actual_devid == 0x1e07) { // RTX 2080 Ti Rev. A
:param pid: Process identifier spoofed_devid = 0x1e30; // Quadro RTX 6000
""" spoofed_subsysid = 0x12ba;
}
session = device.attach(pid)
# We need to also instrument the children since nvidia-vgpud forks itself // TU104
# when initially launched. if(actual_devid == 0x1e81 || // RTX 2080 Super
session.enable_child_gating() actual_devid == 0x1e82 || // RTX 2080
script = session.create_script(script_source) actual_devid == 0x1e84 || // RTX 2070 Super
script.load() actual_devid == 0x1e87 || // RTX 2080 Rev. A
device.resume(pid) actual_devid == 0x1e89 || // RTX 2060
actual_devid == 0x1eb0 || // Quadro RTX 5000
actual_devid == 0x1eb1) { // Quadro RTX 4000
def on_child_added(child): spoofed_devid = 0x1eb8; // Tesla T4
"""Callback for when a new child process has been created. }
:param child: The newly created child process. // GA102
""" if(actual_devid == 0x2204 || // RTX 3090
actual_devid == 0x2205 || // RTX 3080 Ti
child_processes.put(child.pid) actual_devid == 0x2206) { // RTX 3080
instrument(child.pid) spoofed_devid = 0x2235; // RTX A40
}
def wait_exit(pid): devid_ptr.writeU16(spoofed_devid);
"""Wait for a process to terminate. subsysid_ptr.writeU16(spoofed_subsysid);
}
:param pid: Process ID of the target process.
""" if(op_type == OP_READ_DEV_TYPE) {
// Set device type to vGPU capable.
while 1: var dev_type_ptr = this.argp.add(0x10).readPointer();
time.sleep(.1) dev_type_ptr.writeU64(DEV_TYPE_VGPU_CAPABLE);
}
try: }
os.kill(pid, 0) });
"""
except OSError as e:
if e.errno == errno.ESRCH: device = frida.get_local_device()
break child_processes = queue.Queue()
def instrument(pid):
def main(): """Instrument and resume process.
"""Entrypoint."""
:param pid: Process identifier
# Behave at least a little bit like a forking service. """
if sys.argv[1] != "-f":
subprocess.Popen([sys.argv[0], "-f"] + sys.argv[1:]) session = device.attach(pid)
exit() # We need to also instrument the children since nvidia-vgpud forks itself
# when initially launched.
device.on("child-added", on_child_added) session.enable_child_gating()
pid = device.spawn(sys.argv[2]) script = session.create_script(script_source)
instrument(pid) script.load()
device.resume(pid)
# Wait for everything to terminate before exiting.
wait_exit(pid)
def on_child_added(child):
while not child_processes.empty(): """Callback for when a new child process has been created.
wait_exit(child_processes.get_nowait())
:param child: The newly created child process.
"""
if __name__ == "__main__":
main() child_processes.put(child.pid)
instrument(child.pid)
def wait_exit(pid):
"""Wait for a process to terminate.
:param pid: Process ID of the target process.
"""
while 1:
time.sleep(.1)
try:
os.kill(pid, 0)
except OSError as e:
if e.errno == errno.ESRCH:
break
def main():
"""Entrypoint."""
# Behave at least a little bit like a forking service.
if sys.argv[1] != "-f":
subprocess.Popen([sys.argv[0], "-f"] + sys.argv[1:])
exit()
device.on("child-added", on_child_added)
pid = device.spawn(sys.argv[2])
instrument(pid)
# Wait for everything to terminate before exiting.
wait_exit(pid)
while not child_processes.empty():
wait_exit(child_processes.get_nowait())
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load diff