diff options
| author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2020-11-30 15:38:21 +0100 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2020-11-30 15:38:21 +0100 |
| commit | cd5f82dbbad04418afca092e8c7c6250e6815554 (patch) | |
| tree | 58e09eb0ae0899232ecde2c7b90c3febc15c1e07 /include | |
| parent | 3473065927a877e0a15c4d4b64f245ccaba24e3f (diff) | |
| parent | f44afb5b5a5d04448da843b2fe872e01669bc317 (diff) | |
Merge tag 'misc-habanalabs-next-2020-11-30' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next
This tag contains habanalabs driver changes for v5.11-rc1:
- Add support for ability to perform collective stream sync. This is basically
a synchronization between compute and network streams.
- Add initialization of NIC QMANs and security configuration. This is a
pre-requisite for upstreaming the NIC ETH and RDMA code.
- Add option to scrub all internal memory (SRAM and DRAM) when the user
closes the file-descriptor
- Support new firmware that provide enhanced device security. This includes
many changes that basically amounts to moving certain configurations to
the firmware and stop reading registers directly and instead receiving the
information from the firmware. For example:
- Retrieve HBM ECC error information
- Retrieve PLL configuration
- Configure of internal credits, rate-limitation
- Support new firmware that performs the GAUDI device reset instead of the
driver. The driver now asks the firmware to do it.
- Some changes were done as Pre-requisite for future ASICs support:
- Add option to put the device's PCI MMU page tables on the host memory.
- Support loading multiple types of firmware.
- Adding option to user to inquire about usage counter of Command buffer.
- Support taking timestamp of Command Submission when it completes and
providing it to the user.
- Change aggregate cs counters to atomic and fix the cs counters structure
to support addition of new counters in the future
- Update email address nad git repo of the driver in MAINTAINERS
- Many small bug fixes and improvements, such as:
- Refactoring in MMU code to move code from ASIC-dependant files to
common code
- Minimize driver prints when no errors occur
- Using enums, defines instead of hard-coded values
- Refactoring of Command Submission flow to make it more readable now that
we have multiple types of Command Submissions.
* tag 'misc-habanalabs-next-2020-11-30' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: (76 commits)
habanalabs: Add CB IOCTL opcode to retrieve CB information
habanalabs: Modify the cs_cnt of a CB to be atomic
habanalabs: Add mask for CS type bits in CS flags
habanalabs: change messages to debug level
habanalabs: free host huge va_range if not used
habanalabs/gaudi: handle reset when f/w is in preboot
habanalabs: add missing counter update
habanalabs: add ull to PLL masks
habanalabs: add support for cs with timestamp
habanalabs: indicate to user that a cs is gone
habanalabs/gaudi: print ECC type field
habanalabs: update firmware files
habanalabs: gaudi_ctx_fini() can be static
habanalabs: goya_reset_sob_group() can be static
habanalabs: fetch pll frequency from firmware
habanalabs: mmu map wrapper for sizes larger than a page
habanalabs: print CS type when it is stuck
habanalabs/gaudi: align to new FW reset scheme
habanalabs: firmware returns 64bit argument
habanalabs: fix MMU debugfs operations
...
Diffstat (limited to 'include')
| -rw-r--r-- | include/uapi/misc/habanalabs.h | 126 |
1 files changed, 97 insertions, 29 deletions
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 9705b8adb60c..8c15a7d336a0 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -18,8 +18,18 @@ #define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */ #define GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START 0x80 /* 128 bytes */ -#define GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT 48 -#define GAUDI_FIRST_AVAILABLE_W_S_MONITOR 24 +/* + * 128 SOBs reserved for collective wait + * 16 SOBs reserved for sync stream + */ +#define GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT 144 + +/* + * 64 monitors reserved for collective wait + * 8 monitors reserved for sync stream + */ +#define GAUDI_FIRST_AVAILABLE_W_S_MONITOR 72 + /* * Goya queue Numbering * @@ -76,10 +86,10 @@ enum gaudi_queue_id { GAUDI_QUEUE_ID_DMA_4_1 = 18, /* internal */ GAUDI_QUEUE_ID_DMA_4_2 = 19, /* internal */ GAUDI_QUEUE_ID_DMA_4_3 = 20, /* internal */ - GAUDI_QUEUE_ID_DMA_5_0 = 21, /* external */ - GAUDI_QUEUE_ID_DMA_5_1 = 22, /* external */ - GAUDI_QUEUE_ID_DMA_5_2 = 23, /* external */ - GAUDI_QUEUE_ID_DMA_5_3 = 24, /* external */ + GAUDI_QUEUE_ID_DMA_5_0 = 21, /* internal */ + GAUDI_QUEUE_ID_DMA_5_1 = 22, /* internal */ + GAUDI_QUEUE_ID_DMA_5_2 = 23, /* internal */ + GAUDI_QUEUE_ID_DMA_5_3 = 24, /* internal */ GAUDI_QUEUE_ID_DMA_6_0 = 25, /* internal */ GAUDI_QUEUE_ID_DMA_6_1 = 26, /* internal */ GAUDI_QUEUE_ID_DMA_6_2 = 27, /* internal */ @@ -232,7 +242,8 @@ enum gaudi_engine_id { enum hl_device_status { HL_DEVICE_STATUS_OPERATIONAL, HL_DEVICE_STATUS_IN_RESET, - HL_DEVICE_STATUS_MALFUNCTION + HL_DEVICE_STATUS_MALFUNCTION, + HL_DEVICE_STATUS_NEEDS_RESET }; /* Opcode for management ioctl @@ -284,6 +295,7 @@ enum hl_device_status { #define HL_INFO_CLK_THROTTLE_REASON 13 #define HL_INFO_SYNC_MANAGER 14 #define HL_INFO_TOTAL_ENERGY 15 +#define HL_INFO_PLL_FREQUENCY 16 #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 @@ -385,6 +397,12 @@ struct hl_info_energy { __u64 total_energy_consumption; }; +#define HL_PLL_NUM_OUTPUTS 4 + +struct hl_pll_frequency_info { + __u16 output[HL_PLL_NUM_OUTPUTS]; +}; + /** * struct hl_info_sync_manager - sync manager information * @first_available_sync_object: first available sob @@ -397,23 +415,28 @@ struct hl_info_sync_manager { /** * struct hl_info_cs_counters - command submission counters - * @out_of_mem_drop_cnt: dropped due to memory allocation issue - * @parsing_drop_cnt: dropped due to error in packet parsing - * @queue_full_drop_cnt: dropped due to queue full - * @device_in_reset_drop_cnt: dropped due to device in reset - * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight + * @total_out_of_mem_drop_cnt: total dropped due to memory allocation issue + * @ctx_out_of_mem_drop_cnt: context dropped due to memory allocation issue + * @total_parsing_drop_cnt: total dropped due to error in packet parsing + * @ctx_parsing_drop_cnt: context dropped due to error in packet parsing + * @total_queue_full_drop_cnt: total dropped due to queue full + * @ctx_queue_full_drop_cnt: context dropped due to queue full + * @total_device_in_reset_drop_cnt: total dropped due to device in reset + * @ctx_device_in_reset_drop_cnt: context dropped due to device in reset + * @total_max_cs_in_flight_drop_cnt: total dropped due to maximum CS in-flight + * @ctx_max_cs_in_flight_drop_cnt: context dropped due to maximum CS in-flight */ -struct hl_cs_counters { - __u64 out_of_mem_drop_cnt; - __u64 parsing_drop_cnt; - __u64 queue_full_drop_cnt; - __u64 device_in_reset_drop_cnt; - __u64 max_cs_in_flight_drop_cnt; -}; - struct hl_info_cs_counters { - struct hl_cs_counters cs_counters; - struct hl_cs_counters ctx_cs_counters; + __u64 total_out_of_mem_drop_cnt; + __u64 ctx_out_of_mem_drop_cnt; + __u64 total_parsing_drop_cnt; + __u64 ctx_parsing_drop_cnt; + __u64 total_queue_full_drop_cnt; + __u64 ctx_queue_full_drop_cnt; + __u64 total_device_in_reset_drop_cnt; + __u64 ctx_device_in_reset_drop_cnt; + __u64 total_max_cs_in_flight_drop_cnt; + __u64 ctx_max_cs_in_flight_drop_cnt; }; enum gaudi_dcores { @@ -449,6 +472,8 @@ struct hl_info_args { * resolution. */ __u32 period_ms; + /* PLL frequency retrieval */ + __u32 pll_index; }; __u32 pad; @@ -458,6 +483,8 @@ struct hl_info_args { #define HL_CB_OP_CREATE 0 /* Opcode to destroy previously created command buffer */ #define HL_CB_OP_DESTROY 1 +/* Opcode to retrieve information about a command buffer */ +#define HL_CB_OP_INFO 2 /* 2MB minus 32 bytes for 2xMSG_PROT */ #define HL_MAX_CB_SIZE (0x200000 - 32) @@ -481,8 +508,17 @@ struct hl_cb_in { }; struct hl_cb_out { - /* Handle of CB */ - __u64 cb_handle; + union { + /* Handle of CB */ + __u64 cb_handle; + + /* Information about CB */ + struct { + /* Usage count of CB */ + __u32 usage_cnt; + __u32 pad; + }; + }; }; union hl_cb_args { @@ -490,6 +526,22 @@ union hl_cb_args { struct hl_cb_out out; }; +/* HL_CS_CHUNK_FLAGS_ values + * + * HL_CS_CHUNK_FLAGS_USER_ALLOC_CB: + * Indicates if the CB was allocated and mapped by userspace. + * User allocated CB is a command buffer allocated by the user, via malloc + * (or similar). After allocating the CB, the user invokes “memory ioctl” + * to map the user memory into a device virtual address. The user provides + * this address via the cb_handle field. The interface provides the + * ability to create a large CBs, Which aren’t limited to + * “HL_MAX_CB_SIZE”. Therefore, it increases the PCI-DMA queues + * throughput. This CB allocation method also reduces the use of Linux + * DMA-able memory pool. Which are limited and used by other Linux + * sub-systems. + */ +#define HL_CS_CHUNK_FLAGS_USER_ALLOC_CB 0x1 + /* * This structure size must always be fixed to 64-bytes for backward * compatibility @@ -507,7 +559,8 @@ struct hl_cs_chunk { */ __u64 cb_handle; - /* Relevant only when HL_CS_FLAGS_WAIT is set. + /* Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set. * This holds address of array of u64 values that contain * signal CS sequence numbers. The wait described by this job * will listen on all those signals (wait event per signal) @@ -525,7 +578,8 @@ struct hl_cs_chunk { */ __u32 cb_size; - /* Relevant only when HL_CS_FLAGS_WAIT is set. + /* Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set. * Number of entries in signal_seq_arr */ __u32 num_signal_seq_arr; @@ -534,14 +588,22 @@ struct hl_cs_chunk { /* HL_CS_CHUNK_FLAGS_* */ __u32 cs_chunk_flags; + /* Relevant only when HL_CS_FLAGS_COLLECTIVE_WAIT is set. + * This holds the collective engine ID. The wait described by this job + * will sync with this engine and with all NICs before completion. + */ + __u32 collective_engine_id; + /* Align structure to 64 bytes */ - __u32 pad[11]; + __u32 pad[10]; }; -/* SIGNAL and WAIT flags are mutually exclusive */ +/* SIGNAL and WAIT/COLLECTIVE_WAIT flags are mutually exclusive */ #define HL_CS_FLAGS_FORCE_RESTORE 0x1 #define HL_CS_FLAGS_SIGNAL 0x2 #define HL_CS_FLAGS_WAIT 0x4 +#define HL_CS_FLAGS_COLLECTIVE_WAIT 0x8 +#define HL_CS_FLAGS_TIMESTAMP 0x20 #define HL_CS_STATUS_SUCCESS 0 @@ -612,10 +674,16 @@ struct hl_wait_cs_in { #define HL_WAIT_CS_STATUS_ABORTED 3 #define HL_WAIT_CS_STATUS_INTERRUPTED 4 +#define HL_WAIT_CS_STATUS_FLAG_GONE 0x1 +#define HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD 0x2 + struct hl_wait_cs_out { /* HL_WAIT_CS_STATUS_* */ __u32 status; - __u32 pad; + /* HL_WAIT_CS_STATUS_FLAG* */ + __u32 flags; + /* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set */ + __s64 timestamp_nsec; }; union hl_wait_cs_args { |
