Skip to content
Merged
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
caf5a61
fix compilation issues under Windows
elikosan May 20, 2016
8fe85b5
Update torch-scm-1.rockspec
elikosan May 20, 2016
c7fbe8f
m.lib not found/necessary under Windows
elikosan May 20, 2016
8efe159
remove m.lib from list of required MKL blas libs for Windows
elikosan May 27, 2016
4d6ea12
FindMKL.cmake typo
elikosan May 27, 2016
62e51be
restore torch in rockspec
elikosan May 31, 2016
76cb36c
define ssize_t on Windows
elikosan Jun 1, 2016
3bce1c8
define ssize_t on Windows
elikosan Jun 1, 2016
be729d6
define ssize_t on Windows
elikosan Jun 1, 2016
7d4a2e8
Merge pull request #1 from torch/master
elikosan Jun 2, 2016
89ab8e4
support for MKL ilp64
elikosan Dec 6, 2016
933ff33
support for MKL ilp64
elikosan Dec 6, 2016
2118356
blas decl compat with ilp64 model
elikosan Dec 7, 2016
c8951bd
make ilp64 patch work on Windows
elikosan Dec 7, 2016
e4a8557
define MKL_ILP64
elikosan Dec 7, 2016
ba78c0f
avoid crashing on Windows with ilp64
elikosan Dec 12, 2016
8ed047a
use MKL_LP64 compiler flag to trigger 32bit ints on 64bits platforms
elikosan Dec 12, 2016
980eb85
Merge pull request #2 from torch/master
elikosan Dec 13, 2016
a87827f
Merge remote-tracking branch 'refs/remotes/torch/master'
elikosan Mar 2, 2017
3777373
uninitialized variable
elikosan Mar 3, 2017
8c7f188
fix broken link, rephrase ugly sentence.
elikosan Mar 9, 2017
211a321
Update THStorage.c
elikosan Mar 17, 2017
281699f
Update THStorage.c
elikosan Mar 17, 2017
d255c20
Update THStorage.c
elikosan Mar 17, 2017
3ea6595
Update THStorage.c
elikosan Mar 17, 2017
e6999a4
Update THStorage.c
elikosan Mar 17, 2017
f896da1
Update THStorage.c
elikosan Mar 17, 2017
b72d4d1
Merge pull request #3 from torch/master
elikosan Mar 17, 2017
a05a3d9
missing dllexport for Windows!
elikosan Mar 17, 2017
1b94d52
missing dllexport for Windows!
elikosan Mar 17, 2017
97c39e5
missing include
elikosan Mar 20, 2017
ca226c4
missing include
elikosan Mar 20, 2017
42ec69f
Update AVX.c
elikosan Mar 20, 2017
db90eaa
remove ifdef __AVX__
elikosan Mar 20, 2017
259b222
remove ifdef __AVX2__
elikosan Mar 20, 2017
48da138
revert
elikosan Apr 3, 2017
0293729
revert
elikosan Apr 3, 2017
3fb5afc
update from master
elikosan Apr 3, 2017
d3999bc
revert
elikosan Apr 3, 2017
2777b04
revert
elikosan Apr 3, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions doc/tensor.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
The `Tensor` class is probably the most important class in
`Torch`. Almost every package depends on this class. It is *__the__*
class for handling numeric data. As with pretty much anything in
[Torch7](./../index.md), tensors are
[Torch7](./index.md), tensors are
[serializable](file.md#torch.File.serialization).

__Multi-dimensional matrix__

A `Tensor` is a potentially multi-dimensional matrix. The number of
dimensions is unlimited that can be created using
[LongStorage](storage.md) with more dimensions.
A `Tensor` is a multi-dimensional matrix. The number of
dimensions is unlimited (up to what can be created using
[LongStorage](storage.md)).

Example:
```lua
Expand Down
22 changes: 17 additions & 5 deletions lib/TH/cmake/FindBLAS.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -242,32 +242,44 @@ endif()
# Determine if blas was compiled with the f2c conventions
IF (BLAS_LIBRARIES)
SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})

CHECK_C_SOURCE_RUNS("
#include <stdlib.h>
#include <stdio.h>
float x[4] = { 1, 2, 3, 4 };
float y[4] = { .1, .01, .001, .0001 };
int four = 4;
int one = 1;
#ifdef WIN32
typedef __int64 BLINT;
#else
typedef long BLINT;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure this typedef is correct? It used to give sdot_ an int* on non WIN32 platforms, now it uses long*, but sizeof(long) != sizeof(int) on most x86_64 systems. The same problem seems to be present in other parts of this PR too.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#endif
BLINT four = 4;
BLINT one = 1;
extern double sdot_();
int main() {
int i;
double r = sdot_(&four, x, &one, y, &one);
exit((float)r != (float).1234);
}" BLAS_F2C_DOUBLE_WORKS )

CHECK_C_SOURCE_RUNS("
#include <stdlib.h>
#include <stdio.h>
float x[4] = { 1, 2, 3, 4 };
float y[4] = { .1, .01, .001, .0001 };
int four = 4;
int one = 1;
#ifdef WIN32
typedef __int64 BLINT;
#else
typedef long BLINT;
#endif
BLINT four = 4;
BLINT one = 1;
extern float sdot_();
int main() {
int i;
double r = sdot_(&four, x, &one, y, &one);
exit((float)r != (float).1234);
}" BLAS_F2C_FLOAT_WORKS )

IF (BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS)
MESSAGE(STATUS "This BLAS uses the F2C return conventions")
SET(BLAS_F2C TRUE)
Expand Down
2 changes: 1 addition & 1 deletion lib/TH/cmake/FindMKL.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ CHECK_TYPE_SIZE("void*" SIZE_OF_VOIDP)
IF ("${SIZE_OF_VOIDP}" EQUAL 8)
SET(mklvers "em64t")
SET(iccvers "intel64")
SET(mkl64s "_lp64")
SET(mkl64s "_ilp64")
ELSE ("${SIZE_OF_VOIDP}" EQUAL 8)
SET(mklvers "32")
SET(iccvers "ia32")
Expand Down
2 changes: 1 addition & 1 deletion lib/TH/cmake/FindSSE.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ SET(AVX2_CODE "

int main()
{
__m256i a;
__m256i a = {0};
a = _mm256_abs_epi16(a);
return 0;
}
Expand Down
105 changes: 59 additions & 46 deletions lib/TH/generic/THBlas.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,37 @@
# define ffloat float
#endif

TH_EXTERNC void dswap_(int *n, double *x, int *incx, double *y, int *incy);
TH_EXTERNC void sswap_(int *n, float *x, int *incx, float *y, int *incy);
TH_EXTERNC void dscal_(int *n, double *a, double *x, int *incx);
TH_EXTERNC void sscal_(int *n, float *a, float *x, int *incx);
TH_EXTERNC void dcopy_(int *n, double *x, int *incx, double *y, int *incy);
TH_EXTERNC void scopy_(int *n, float *x, int *incx, float *y, int *incy);
TH_EXTERNC void daxpy_(int *n, double *a, double *x, int *incx, double *y, int *incy);
TH_EXTERNC void saxpy_(int *n, float *a, float *x, int *incx, float *y, int *incy);
TH_EXTERNC double ddot_(int *n, double *x, int *incx, double *y, int *incy);
TH_EXTERNC ffloat sdot_(int *n, float *x, int *incx, float *y, int *incy);
TH_EXTERNC void dgemv_(char *trans, int *m, int *n, double *alpha, double *a, int *lda, double *x, int *incx, double *beta, double *y, int *incy);
TH_EXTERNC void sgemv_(char *trans, int *m, int *n, float *alpha, float *a, int *lda, float *x, int *incx, float *beta, float *y, int *incy);
TH_EXTERNC void dger_(int *m, int *n, double *alpha, double *x, int *incx, double *y, int *incy, double *a, int *lda);
TH_EXTERNC void sger_(int *m, int *n, float *alpha, float *x, int *incx, float *y, int *incy, float *a, int *lda);
TH_EXTERNC void dgemm_(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc);
TH_EXTERNC void sgemm_(char *transa, char *transb, int *m, int *n, int *k, float *alpha, float *a, int *lda, float *b, int *ldb, float *beta, float *c, int *ldc);
// define MKL_LP64 to get 32bit ints on 64bit platforms
#ifndef MKL_LP64
// 64bit ints
#ifdef WIN32
#define BLAS_INT __int64
#else
#define BLAS_INT long
#endif
#else
// 32bit ints
#define BLAS_INT int
#endif


TH_EXTERNC void dswap_(BLAS_INT *n, double *x, BLAS_INT *incx, double *y, BLAS_INT *incy);
TH_EXTERNC void sswap_(BLAS_INT *n, float *x, BLAS_INT *incx, float *y, BLAS_INT *incy);
TH_EXTERNC void dscal_(BLAS_INT *n, double *a, double *x, BLAS_INT *incx);
TH_EXTERNC void sscal_(BLAS_INT *n, float *a, float *x, BLAS_INT *incx);
TH_EXTERNC void dcopy_(BLAS_INT *n, double *x, BLAS_INT *incx, double *y, BLAS_INT *incy);
TH_EXTERNC void scopy_(BLAS_INT *n, float *x, BLAS_INT *incx, float *y, BLAS_INT *incy);
TH_EXTERNC void daxpy_(BLAS_INT *n, double *a, double *x, BLAS_INT *incx, double *y, BLAS_INT *incy);
TH_EXTERNC void saxpy_(BLAS_INT *n, float *a, float *x, BLAS_INT *incx, float *y, BLAS_INT *incy);
TH_EXTERNC double ddot_(BLAS_INT *n, double *x, BLAS_INT *incx, double *y, BLAS_INT *incy);
TH_EXTERNC ffloat sdot_(BLAS_INT *n, float *x, BLAS_INT *incx, float *y, BLAS_INT *incy);
TH_EXTERNC void dgemv_(char *trans, BLAS_INT *m, BLAS_INT *n, double *alpha, double *a, BLAS_INT *lda, double *x, BLAS_INT *incx, double *beta, double *y, BLAS_INT *incy);
TH_EXTERNC void sgemv_(char *trans, BLAS_INT *m, BLAS_INT *n, float *alpha, float *a, BLAS_INT *lda, float *x, BLAS_INT *incx, float *beta, float *y, BLAS_INT *incy);
TH_EXTERNC void dger_(BLAS_INT *m, BLAS_INT *n, double *alpha, double *x, BLAS_INT *incx, double *y, BLAS_INT *incy, double *a, BLAS_INT *lda);
TH_EXTERNC void sger_(BLAS_INT *m, BLAS_INT *n, float *alpha, float *x, BLAS_INT *incx, float *y, BLAS_INT *incy, float *a, BLAS_INT *lda);
TH_EXTERNC void dgemm_(char *transa, char *transb, BLAS_INT *m, BLAS_INT *n, BLAS_INT *k, double *alpha, double *a, BLAS_INT *lda, double *b, BLAS_INT *ldb, double *beta, double *c, BLAS_INT *ldc);
TH_EXTERNC void sgemm_(char *transa, char *transb, BLAS_INT *m, BLAS_INT *n, BLAS_INT *k, float *alpha, float *a, BLAS_INT *lda, float *b, BLAS_INT *ldb, float *beta, float *c, BLAS_INT *ldc);


void THBlas_(swap)(long n, real *x, long incx, real *y, long incy)
{
Expand All @@ -39,9 +52,9 @@ void THBlas_(swap)(long n, real *x, long incx, real *y, long incy)
#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
{
int i_n = (int)n;
int i_incx = (int)incx;
int i_incy = (int)incy;
BLAS_INT i_n = (BLAS_INT)n;
BLAS_INT i_incx = (BLAS_INT)incx;
BLAS_INT i_incy = (BLAS_INT)incy;

#if defined(TH_REAL_IS_DOUBLE)
dswap_(&i_n, x, &i_incx, y, &i_incy);
Expand Down Expand Up @@ -70,8 +83,8 @@ void THBlas_(scal)(long n, real a, real *x, long incx)
#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
if( (n <= INT_MAX) && (incx <= INT_MAX) )
{
int i_n = (int)n;
int i_incx = (int)incx;
BLAS_INT i_n = (BLAS_INT)n;
BLAS_INT i_incx = (BLAS_INT)incx;

#if defined(TH_REAL_IS_DOUBLE)
dscal_(&i_n, &a, x, &i_incx);
Expand Down Expand Up @@ -99,9 +112,9 @@ void THBlas_(copy)(long n, real *x, long incx, real *y, long incy)
#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
{
int i_n = (int)n;
int i_incx = (int)incx;
int i_incy = (int)incy;
BLAS_INT i_n = (BLAS_INT)n;
BLAS_INT i_incx = (BLAS_INT)incx;
BLAS_INT i_incy = (BLAS_INT)incy;

#if defined(TH_REAL_IS_DOUBLE)
dcopy_(&i_n, x, &i_incx, y, &i_incy);
Expand Down Expand Up @@ -129,9 +142,9 @@ void THBlas_(axpy)(long n, real a, real *x, long incx, real *y, long incy)
#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
{
int i_n = (int)n;
int i_incx = (int)incx;
int i_incy = (int)incy;
BLAS_INT i_n = (BLAS_INT)n;
BLAS_INT i_incx = (BLAS_INT)incx;
BLAS_INT i_incy = (BLAS_INT)incy;

#if defined(TH_REAL_IS_DOUBLE)
daxpy_(&i_n, &a, x, &i_incx, y, &i_incy);
Expand Down Expand Up @@ -159,9 +172,9 @@ real THBlas_(dot)(long n, real *x, long incx, real *y, long incy)
#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
{
int i_n = (int)n;
int i_incx = (int)incx;
int i_incy = (int)incy;
BLAS_INT i_n = (BLAS_INT)n;
BLAS_INT i_incx = (BLAS_INT)incx;
BLAS_INT i_incy = (BLAS_INT)incy;

#if defined(TH_REAL_IS_DOUBLE)
return (real) ddot_(&i_n, x, &i_incx, y, &i_incy);
Expand Down Expand Up @@ -190,11 +203,11 @@ void THBlas_(gemv)(char trans, long m, long n, real alpha, real *a, long lda, re
(incx > 0) && (incx <= INT_MAX) &&
(incy > 0) && (incy <= INT_MAX) )
{
int i_m = (int)m;
int i_n = (int)n;
int i_lda = (int)lda;
int i_incx = (int)incx;
int i_incy = (int)incy;
BLAS_INT i_m = (BLAS_INT)m;
BLAS_INT i_n = (BLAS_INT)n;
BLAS_INT i_lda = (BLAS_INT)lda;
BLAS_INT i_incx = (BLAS_INT)incx;
BLAS_INT i_incy = (BLAS_INT)incy;

#if defined(TH_REAL_IS_DOUBLE)
dgemv_(&trans, &i_m, &i_n, &alpha, a, &i_lda, x, &i_incx, &beta, y, &i_incy);
Expand Down Expand Up @@ -245,11 +258,11 @@ void THBlas_(ger)(long m, long n, real alpha, real *x, long incx, real *y, long
#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
if( (m <= INT_MAX) && (n <= INT_MAX) && (lda <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
{
int i_m = (int)m;
int i_n = (int)n;
int i_lda = (int)lda;
int i_incx = (int)incx;
int i_incy = (int)incy;
BLAS_INT i_m = (BLAS_INT)m;
BLAS_INT i_n = (BLAS_INT)n;
BLAS_INT i_lda = (BLAS_INT)lda;
BLAS_INT i_incx = (BLAS_INT)incx;
BLAS_INT i_incy = (BLAS_INT)incy;

#if defined(TH_REAL_IS_DOUBLE)
dger_(&i_m, &i_n, &alpha, x, &i_incx, y, &i_incy, a, &i_lda);
Expand Down Expand Up @@ -304,12 +317,12 @@ void THBlas_(gemm)(char transa, char transb, long m, long n, long k, real alpha,
#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
if( (m <= INT_MAX) && (n <= INT_MAX) && (k <= INT_MAX) && (lda <= INT_MAX) && (ldb <= INT_MAX) && (ldc <= INT_MAX) )
{
int i_m = (int)m;
int i_n = (int)n;
int i_k = (int)k;
int i_lda = (int)lda;
int i_ldb = (int)ldb;
int i_ldc = (int)ldc;
BLAS_INT i_m = (BLAS_INT)m;
BLAS_INT i_n = (BLAS_INT)n;
BLAS_INT i_k = (BLAS_INT)k;
BLAS_INT i_lda = (BLAS_INT)lda;
BLAS_INT i_ldb = (BLAS_INT)ldb;
BLAS_INT i_ldc = (BLAS_INT)ldc;

#if defined(TH_REAL_IS_DOUBLE)
dgemm_(&transa, &transb, &i_m, &i_n, &i_k, &alpha, a, &i_lda, b, &i_ldb, &beta, c, &i_ldc);
Expand Down
2 changes: 0 additions & 2 deletions lib/TH/vector/AVX.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#if defined(__AVX__)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Though it is harmless to remove the macro check for msvc, I think it can be kept because I am not sure the behavior of other platforms. Besides, this macro is the source I found that caused the compilation broken. #991

#ifndef _MSC_VER
#include <x86intrin.h>
#else
Expand Down Expand Up @@ -271,4 +270,3 @@ void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdi
}
}

#endif // defined(__AVX__)
33 changes: 17 additions & 16 deletions lib/TH/vector/AVX.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,23 @@
#define TH_AVX_H

#include <stddef.h>
#include "THGeneral.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AVX.h is included after THGeneral.h in file ./lib/TH/THVector.c. So there is no need to include THGeneral.h here.


void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n);
void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n);
void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n);
void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n);
void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n);
void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n);
void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n);
void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n);
void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n);
void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n);
void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n);
void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n);
void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, const ptrdiff_t n);
void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n);
void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n);
void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n);
TH_API void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These apis are not supposed to be exported, but are used for specific implementations for different instructions internally. Please refer to lib/TH/generic/THVectorDispatch.c

TH_API void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n);
TH_API void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n);
TH_API void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n);
TH_API void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n);
TH_API void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n);
TH_API void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n);
TH_API void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n);
TH_API void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n);
TH_API void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n);
TH_API void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n);
TH_API void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n);
TH_API void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, const ptrdiff_t n);
TH_API void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n);
TH_API void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n);
TH_API void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n);

#endif
3 changes: 0 additions & 3 deletions lib/TH/vector/AVX2.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#if defined(__AVX2__)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment as AVX.c

#ifndef _MSC_VER
#include <x86intrin.h>
#else
Expand Down Expand Up @@ -43,5 +42,3 @@ void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const flo
z[i] = x[i] + y[i] * c;
}
}

#endif // defined(__AVX2__)
5 changes: 3 additions & 2 deletions lib/TH/vector/AVX2.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
#define TH_AVX2_H

#include <stddef.h>
#include "THGeneral.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment as AVX.h


void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n);
void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n);
TH_API void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n);
TH_API void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n);

#endif