Python源码分析（二）对象概述

https://github.com/python/cpython1. Print1.1 修改print函数在Python3中print是函数Python --> bltinmodule.c --> builtin_print()line:1850了解编译python源代码：在PCbuild同目录层次下会生成Lib文件夹，在PCbuild目录下会生成win32文件夹，里面根据你编译的是

平凡的久月

1166人浏览 · 2022-01-07 14:34:40

平凡的久月 · 2022-01-07 14:34:40 发布

bilibili视频讲解：https://space.bilibili.com/431392724
用户名：平凡的久月

1. 从Print函数出发

1.1 修改print函数

在Python3中print是函数

Python --> bltinmodule.c --> builtin_print() line:1850

了解编译python源代码：在PCbuild同目录层次下会生成Lib文件夹，在PCbuild目录下会生成win32文件夹，里面根据你编译的是debug还是release版本，会生成python_d.exe或者python.exe，打开它，就能运行了。

2. Object

在Python中，对象就是为C中的结构体在堆上申请一块内存，一般来说是不能被静态初始化的，并且也不能在栈空间上生存。但是Python中的类型对象是例外（相当于类），都是被静态初始化的。

在Python中一个对象一旦被创建，在内存中的大小就是不变的。所以维护容纳一个可变长度数据的对象（如List类型）就必须在对象内维护一个指向一块可变大小的内存区域的指针。

2.1 PyObject

PyObject 是整个 Python 对象机制的核心。

在 Python 的世界一切皆对象，不论是数字，还是字符串，甚至连数据类型、函数等都是一种对象。

// 宏定义：有参宏、无参宏
#define PI 3.14
#define STU struct Student

typedef int INT1;

// Include/object.h
typedef struct _object {
    _PyObject_HEAD_EXTRA
    Py_ssize_t ob_refcnt;
    struct _typeobject *ob_type;
} PyObject;

在PyObject的定义中，变量ob_refcnt与Python的内存管理机制有关，实现了基于引用计数的垃圾收集机制。对象A的引用计数减少至0时，A就可以从堆上被删除以释放内存。

前提：了解类型对象（如List对象）与实例对象的区别

结构体_typeobject指向对象类型的类型对象

// Include/object.h
#ifdef Py_LIMITED_API
typedef struct _typeobject PyTypeObject; /* opaque */
#else
typedef struct _typeobject {
    PyObject_VAR_HEAD
    const char *tp_name; /* For printing, in format "<module>.<name>" */
    Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */

    /* Methods to implement standard operations */

    destructor tp_dealloc;
    printfunc tp_print;
    getattrfunc tp_getattr;
    setattrfunc tp_setattr;
    PyAsyncMethods *tp_as_async; /* formerly known as tp_compare (Python 2)
                                    or tp_reserved (Python 3) */
    reprfunc tp_repr;

    /* Method suites for standard classes */

    PyNumberMethods *tp_as_number;
    PySequenceMethods *tp_as_sequence;
    PyMappingMethods *tp_as_mapping;

    /* More standard operations (here for binary compatibility) */

    hashfunc tp_hash;
    ternaryfunc tp_call;
    reprfunc tp_str;
    getattrofunc tp_getattro;
    setattrofunc tp_setattro;

    /* Functions to access object as input/output buffer */
    PyBufferProcs *tp_as_buffer;

    /* Flags to define presence of optional/expanded features */
    unsigned long tp_flags;

    const char *tp_doc; /* Documentation string */

    /* Assigned meaning in release 2.0 */
    /* call function for all accessible objects */
    traverseproc tp_traverse;

    /* delete references to contained objects */
    inquiry tp_clear;

    /* Assigned meaning in release 2.1 */
    /* rich comparisons */
    richcmpfunc tp_richcompare;

    /* weak reference enabler */
    Py_ssize_t tp_weaklistoffset;

    /* Iterators */
    getiterfunc tp_iter;
    iternextfunc tp_iternext;

    /* Attribute descriptor and subclassing stuff */
    struct PyMethodDef *tp_methods;
    struct PyMemberDef *tp_members;
    struct PyGetSetDef *tp_getset;
    struct _typeobject *tp_base;
    PyObject *tp_dict;
    descrgetfunc tp_descr_get;
    descrsetfunc tp_descr_set;
    Py_ssize_t tp_dictoffset;
    initproc tp_init;
    allocfunc tp_alloc;
    newfunc tp_new;
    freefunc tp_free; /* Low-level free-memory routine */
    inquiry tp_is_gc; /* For PyObject_IS_GC */
    PyObject *tp_bases;
    PyObject *tp_mro; /* method resolution order */
    PyObject *tp_cache;
    PyObject *tp_subclasses;
    PyObject *tp_weaklist;
    destructor tp_del;

    /* Type attribute cache version tag. Added in version 2.6 */
    unsigned int tp_version_tag;

    destructor tp_finalize;

#ifdef COUNT_ALLOCS
    /* these must be last and never explicitly initialized */
    Py_ssize_t tp_allocs;
    Py_ssize_t tp_frees;
    Py_ssize_t tp_maxalloc;
    struct _typeobject *tp_prev;
    struct _typeobject *tp_next;
#endif
} PyTypeObject;
#endif

2.2 定长对象与变长对象

// Include/object.h
typedef struct {
    PyObject ob_base;
    Py_ssize_t ob_size; /* Number of items in variable part */
} PyVarObject;

各种对象都拥有相同的对象头部，因此只需要一个PyObject *指针就可以引用任意一个对象，而不论该对象实际是一个什么对象。
思考以下两个问题，下一章节将详细解析。

怎么理解整型（LongObject）是变长对象？
整型在内存中的字节数是多少？

2.3 可变对象与不可变对象

总结：不可变数据类型更改后地址发生改变，可变数据类型更改地址不发生改变

不可变数据：Number, String, Tuple, None

（Number 数据类型用于存储数值。数据类型是不允许改变的,这就意味着如果改变 Number 数据类型的值，将重新分配内存空间。）

可变数据：List, Dictionary, Set

①Number：bool, long, float, complex.

②String：单引号与双引号使用完全相同；使用三引号可以指定一个多行字符串；转义符‘\’，但使用r可以让反斜杠不发生转义；字符串可以用+运算符连接在一起，用*运算符重复。

**注：**word[0] = 'm’会导致错误。

③Tuple: 元组中元素类型可以不相同；虽然tuple元素不可改变，但它可以包含可变的对象，比如list列表。

注：构造包含0个或1个元素的元组（tuple()，tuple(20,)），元组中只包含一个元素时，需要在元素后面添加逗号，否则括号会被当作运算符使用。

④List：列表中的元素类型可以不相同，可以嵌套列表。

注: 列表元素可以改变，但不可越界赋值。例如：List = [] # true，List[0] = 2 # False

⑤Set: 可以用来删除重复元素。

⑥Dictionary：字典是一种映射类型，是一个无序的键（key）：值（value）对集合，并且键必须是惟一的；字典的关键字必须为不可变类型，且不能重复。

**注意：**为什么要设计str、None这样的不变对象呢？因为不变对象一旦创建，对象内部的数据就不能修改，这样就减少了由于修改数据导致的错误。此外，由于对象不变，多任务环境下同时读取对象不需要加锁，同时读一点问题都没有。

2.4 PyLongObject

// Includre/longobject.h
typedef struct _longobject PyLongObject; /* Revealed in longintrepr.h */

/* Long integer representation.
   The absolute value of a number is equal to
        SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
   Negative numbers are represented with ob_size < 0;
   zero is represented by ob_size == 0.
   In a normalized number, ob_digit[abs(ob_size)-1] (the most significant
   digit) is never zero.  Also, in all cases, for all valid i,
        0 <= ob_digit[i] <= MASK.
   The allocation function takes care of allocating extra memory
   so that ob_digit[0] ... ob_digit[abs(ob_size)-1] are actually available.

   CAUTION:  Generic code manipulating subtypes of PyVarObject has to
   aware that ints abuse ob_size's sign bit.
*/


// Include/longintrepr.h
struct _longobject {
    PyObject_VAR_HEAD  
    digit ob_digit[1];
};

// Include/object.h
/* PyObject_VAR_HEAD defines the initial segment of all variable-size
 * container objects.  These end with a declaration of an array with 1
 * element, but enough space is malloc'ed so that the array actually
 * has room for ob_size elements.  Note that ob_size is an element count,
 * not necessarily a byte count.
 */
// --------------这里非常重要-----------------
#define PyObject_VAR_HEAD  PyVarObject ob_base;
#define Py_INVALID_SIZE (Py_ssize_t)-1

// 等价于下列表达
typedef struct { 
    int ob_refcnt;       //引用计数 
    struct _typeobject *ob_type;   //变量类型 
    int ob_size;        //用来记录变长对象PyLongObject一共由多少bit位组成 
    digit ob_digit[1];  //digit类型的数组,默认长度为1，具体大小取决于PyLongObject 
} PyLongObject; 

// 从这个角度看PyLongObject是一个变长对象，因为其具有变长对象的头部。

思考：1. 如何实现整数相加而内存不溢出？2. 不可变对象指什么？与变长对象有联系吗？

// 参考代码
// Include/longintrepr.h
#if PYLONG_BITS_IN_DIGIT == 30
typedef uint32_t digit;
typedef int32_t sdigit; /* signed variant of digit */
typedef uint64_t twodigits;
typedef int64_t stwodigits; /* signed variant of twodigits */
#define PyLong_SHIFT    30
#define _PyLong_DECIMAL_SHIFT   9 /* max(e such that 10**e fits in a digit) */
#define _PyLong_DECIMAL_BASE    ((digit)1000000000) /* 10 ** DECIMAL_SHIFT */
#elif PYLONG_BITS_IN_DIGIT == 15
typedef unsigned short digit;
typedef short sdigit; /* signed variant of digit */
typedef unsigned long twodigits;
typedef long stwodigits; /* signed variant of twodigits */
#define PyLong_SHIFT    15
#define _PyLong_DECIMAL_SHIFT   4 /* max(e such that 10**e fits in a digit) */
#define _PyLong_DECIMAL_BASE    ((digit)10000) /* 10 ** DECIMAL_SHIFT */
#else
#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"
#endif
#define PyLong_BASE     ((digit)1 << PyLong_SHIFT)
#define PyLong_MASK     ((digit)(PyLong_BASE - 1))

#if PyLong_SHIFT % 5 != 0
#error "longobject.c requires that PyLong_SHIFT be divisible by 5"
#endif

2.5 PyTypeObject

类型对象存储对象的四种元信息

类型名：tp_name，主要是Python内部以及调试时使用。
占用内存空间大小：创建该对象时分配，即Py_ssize_t tp_basicsize, tp_itemsize。
与该类型对象相关联的操作信息（类似tp_print这样的函数指针）。

类型（其实是一个对象）的类型信息。

struct _typeobject *ob_type;  // PyTypeObject

// Objects/typeobject.c line:3553
PyTypeObject PyType_Type = {
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
    "type",                                     /* tp_name */
    sizeof(PyHeapTypeObject),                   /* tp_basicsize */
    sizeof(PyMemberDef),                        /* tp_itemsize */
    (destructor)type_dealloc,                   /* tp_dealloc */
    0,                                          /* tp_print */
    // .......
}

所以PyType_Type 在Python中被称为 metaclass(元类)。

2.5.1 对象的创建

创建对象的方法：

通过Python C API来创建
- C API分为两类：AOL（范型的API，用于任何python对象）、COL（与类型相关的API，用于某一种类型的对象）
- 直接分配内存
- PyObject* longObj = PyObject_New(PyObject, &PyLong_Type)
通过设计的类型对象创建实例对象

class A(object) --> new A()

创建对象的流程

tp_new对应到 C++ 中 , 可以视为new操作符 , Python中则是__new__操作符。
tp_init则是 Python 中的__init__ （相当于类的构造函数 , 即对创建的新对象进行初始化）。

__new__ does object creation and __init__ does object initialization.

分别对应Python源码中的tp_new与tp_init

// tp_new
// typeobject.c line:3669
static PyObject *
object_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
// ...
}

// tp_init
static int
object_init(PyObject *self, PyObject *args, PyObject *kwds)
{
    PyTypeObject *type = Py_TYPE(self);
    if (excess_args(args, kwds)) {
        if (type->tp_init != object_init) {
            PyErr_SetString(PyExc_TypeError, "object.__init__() takes no arguments");
            return -1;
        }
        if (type->tp_new == object_new) {
            PyErr_Format(PyExc_TypeError, "%.200s().__init__() takes no arguments",
                         type->tp_name);
            return -1;
        }
    }
    return 0;
}

2.5.2 对象的行为

在PyTypeObject中定义了大量的函数指针，这些函数指针最终都会指向某个函数或者null。这些函数指针可以视为类型对象中所定义的操作，而这些操作直接决定一个对象在运行时所表现出的行为。

// object.h
hashfunc tp_hash;

typedef Py_hash_t (*hashfunc)(PyObject *);

也正是这些不同的操作信息，使得对象之间存在区别。非常重要的操作族：

tp_as_number （指向PyNumberMethods）
tp_as_sequence（指向PySequenceMethods）

tp_as_mapping（指向PyMappingMethods）

/* Method suites for standard classes */

PyNumberMethods *tp_as_number;
PySequenceMethods *tp_as_sequence;
PyMappingMethods *tp_as_mapping;


#ifndef Py_LIMITED_API
typedef struct {
    /* Number implementations must check *both*
       arguments for proper type and implement the necessary conversions
       in the slot functions themselves. */

    binaryfunc nb_add;
    binaryfunc nb_subtract;
    binaryfunc nb_multiply;
    binaryfunc nb_remainder;
    binaryfunc nb_divmod;
    ternaryfunc nb_power;
    unaryfunc nb_negative;
    unaryfunc nb_positive;
    unaryfunc nb_absolute;
    inquiry nb_bool;
    unaryfunc nb_invert;
    binaryfunc nb_lshift;
    binaryfunc nb_rshift;
    binaryfunc nb_and;
    binaryfunc nb_xor;
    binaryfunc nb_or;
    unaryfunc nb_int;
    void *nb_reserved;  /* the slot formerly known as nb_long */
    unaryfunc nb_float;

    binaryfunc nb_inplace_add;
    binaryfunc nb_inplace_subtract;
    binaryfunc nb_inplace_multiply;
    binaryfunc nb_inplace_remainder;
    ternaryfunc nb_inplace_power;
    binaryfunc nb_inplace_lshift;
    binaryfunc nb_inplace_rshift;
    binaryfunc nb_inplace_and;
    binaryfunc nb_inplace_xor;
    binaryfunc nb_inplace_or;

    binaryfunc nb_floor_divide;
    binaryfunc nb_true_divide;
    binaryfunc nb_inplace_floor_divide;
    binaryfunc nb_inplace_true_divide;

    unaryfunc nb_index;

    binaryfunc nb_matrix_multiply;
    binaryfunc nb_inplace_matrix_multiply;
} PyNumberMethods;

typedef struct {
    lenfunc sq_length;
    binaryfunc sq_concat;
    ssizeargfunc sq_repeat;
    ssizeargfunc sq_item;
    void *was_sq_slice;
    ssizeobjargproc sq_ass_item;
    void *was_sq_ass_slice;
    objobjproc sq_contains;

    binaryfunc sq_inplace_concat;
    ssizeargfunc sq_inplace_repeat;
} PySequenceMethods;

typedef struct {
    lenfunc mp_length;
    binaryfunc mp_subscript;
    objobjargproc mp_ass_subscript;
} PyMappingMethods;

typedef struct {
    unaryfunc am_await;
    unaryfunc am_aiter;
    unaryfunc am_anext;
} PyAsyncMethods;

typedef struct {
     getbufferproc bf_getbuffer;
     releasebufferproc bf_releasebuffer;
} PyBufferProcs;
#endif /* Py_LIMITED_API */

比如对于一个整数对象，自然对应到数值对象方法（即PuNumberMethods），所以可以通过tp_as_number.nb_add指定对该对象进行加法操作时的具体行为。（其实就是类似通过类决定函数调用的过程）

思考：设计一种类型同时支持int|list|dict三种特性？

// 提示
PyObject *
PyDict_GetItem(PyObject *op, PyObject *key)
{
    Py_hash_t hash;
    Py_ssize_t ix;
    PyDictObject *mp = (PyDictObject *)op;
    PyThreadState *tstate;
    PyObject *value;

    if (!PyDict_Check(op))
        return NULL;
    if (!PyUnicode_CheckExact(key) ||
        (hash = ((PyASCIIObject *) key)->hash) == -1)
    {
        hash = PyObject_Hash(key);
        if (hash == -1) {
            PyErr_Clear();
            return NULL;
        }
    }

    /* We can arrive here with a NULL tstate during initialization: try
       running "python -Wi" for an example related to string interning.
       Let's just hope that no exception occurs then...  This must be
       _PyThreadState_Current and not PyThreadState_GET() because in debug
       mode, the latter complains if tstate is NULL. */
    tstate = PyThreadState_GET();
    if (tstate != NULL && tstate->curexc_type != NULL) {
        /* preserve the existing exception */
        PyObject *err_type, *err_value, *err_tb;
        PyErr_Fetch(&err_type, &err_value, &err_tb);
        ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
        /* ignore errors */
        PyErr_Restore(err_type, err_value, err_tb);
        if (ix < 0)
            return NULL;
    }
    else {
        ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
        if (ix < 0) {
            PyErr_Clear();
            return NULL;
        }
    }
    return value;
}

2.5.3 类型的类型

每个对象对应着一种类型，类型的类型就是指类型对象的类型。

所有用户自定义class所对应的PyTypeObject对象都是通过PyType_Type这个对象创建的。

PyType_Type与MeatClass后续章节会详细阐述。

PyLongObject如何与PyType_Type建立关系？

每一个对象将自己的引用计数、类型信息保存在开始部分中，方便对这部分内存的初始化。

PyTypeObject PyLong_Type = {
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
    "int",                                      /* tp_name */
    offsetof(PyLongObject, ob_digit),           /* tp_basicsize */
    sizeof(digit),                              /* tp_itemsize */
    long_dealloc,                               /* tp_dealloc */
    0,                                          /* tp_print */
   // ......
    0,                                          /* tp_init */
    0,                                          /* tp_alloc */
    long_new,                                   /* tp_new */
    PyObject_Del,                               /* tp_free */
};

2.6 多态性

多态（函数重写与重载）

函数之间传递的都是一种范型指针-PyObject *，这个指针所指对象的类型从对象的ob_type域动态进行判断（从而实现多态机制）。

同一个函数在不同情况下表现出不同的行为，比如print()函数会根据传进去的参数的不同调用对应类型对象中的输出操作。

2.7 引用计数

Python垃圾收集机制，代替程序进行繁重的内存管理工作。

Python通过对一个对象的引用计数的管理来维护对象在内存中的存在与否。（ob_refcnt）

// Include/Object.h
#define _Py_NewReference(op) (                          \
    _Py_INC_TPALLOCS(op) _Py_COUNT_ALLOCS_COMMA         \
    _Py_INC_REFTOTAL  _Py_REF_DEBUG_COMMA               \
    Py_REFCNT(op) = 1)

#define _Py_ForgetReference(op) _Py_INC_TPFREES(op)

#ifdef Py_LIMITED_API
PyAPI_FUNC(void) _Py_Dealloc(PyObject *);
#else
// 类似C++中的析构函数，销毁对象（在类型对象中定义函数指针tp_dealloc指定）
#define _Py_Dealloc(op) (                               \
    _Py_INC_TPFREES(op) _Py_COUNT_ALLOCS_COMMA          \
    (*Py_TYPE(op)->tp_dealloc)((PyObject *)(op)))
#endif
#endif /* !Py_TRACE_REFS */

#define Py_INCREF(op) (                         \
    _Py_INC_REFTOTAL  _Py_REF_DEBUG_COMMA       \
    ((PyObject *)(op))->ob_refcnt++)

#define Py_DECREF(op)                                   \
    do {                                                \
        PyObject *_py_decref_tmp = (PyObject *)(op);    \
        if (_Py_DEC_REFTOTAL  _Py_REF_DEBUG_COMMA       \
        --(_py_decref_tmp)->ob_refcnt != 0)             \
            _Py_CHECK_REFCNT(_py_decref_tmp)            \
        else                                            \
            _Py_Dealloc(_py_decref_tmp);                \
    } while (0)