D = [[0.3, 10, 1000],[0.5,2,509],[0.4, 8, 789]]
print(D)

[[0.3, 10, 1000], [0.5, 2, 509], [0.4, 8, 789]]

D = [[30000, 'Married', 1],[20000,'Single', 0],[45000, 'Maried', 0]]
print(D)

[[30000, 'Married', 1], [20000, 'Single', 0], [45000, 'Maried', 0]]

import numpy as np

#1-dimensional array
x = np.array([2,5,18,14,4])
print ("\n Deterministic 1-dimensional array \n")
print (x)

#2-dimensional array
x = np.array([[2,5,18,14,4], [12,15,1,2,8]])
print ("\n Deterministic 2-dimensional array \n")
print (x)

 Deterministic 1-dimensional array 

[ 2  5 18 14  4]

 Deterministic 2-dimensional array 

[[ 2  5 18 14  4]
 [12 15  1  2  8]]

x = np.random.rand(5,5)
print ("\n Random 5x5 2-dimensional array \n")
print (x)

x = np.ones((4,4))
print ("\n 4x4 array with ones \n")
print (x)

x = np.diag([1,2,3])
print ("\n Diagonal matrix\n")
print(x)

 Random 5x5 2-dimensional array 

[[0.03573091 0.48721998 0.27345982 0.74677877 0.73333542]
 [0.35490938 0.67864398 0.16007005 0.50396204 0.70817655]
 [0.47903301 0.99878053 0.77743757 0.84333336 0.89785166]
 [0.09531723 0.51871558 0.91669484 0.74306461 0.75330863]
 [0.24522124 0.49625669 0.37934991 0.5472943  0.76281352]]

 4x4 array with ones 

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]

 Diagonal matrix

[[1 0 0]
 [0 2 0]
 [0 0 3]]

x = np.random.randint(10,size=(2,3))
print("\n Random 2x3 array with integers")
print(x)

#Matrix transpose
print ("\n Transpose of the matrix \n")
print (x.T)

#multiplication and addition with scalar value
print("\n Matrix 2x+1 \n")
print(2*x+1)

 Random 2x3 array with integers
[[6 4 4]
 [9 0 7]]

 Transpose of the matrix 

[[6 9]
 [4 0]
 [4 7]]

 Matrix 2x+1 

[[13  9  9]
 [19  1 15]]

lx = [list(y) for y in x]
lx

[[6, 4, 4], [9, 0, 7]]

import pandas as pd #The pandas library
from pandas import Series, DataFrame #Main pandas data structures

#Creating a data frame from a list of lists

df = pd.DataFrame([[1,2,3],[9,10,12]])
print(df)

# Each list becomes a row
# Names of columns are 0,1,3
# Rows are indexed by position numbers 0,1

   0   1   2
0  1   2   3
1  9  10  12

#Creating a data frame from a numpy array

df = pd.DataFrame(np.array([[1,2,3],[9,10,12]]))
print(df)

# Specifying column names
df = pd.DataFrame(np.array([[1,2,3],[9,10,12]]), columns=['A','B','C'])
print(df)

   A   B   C
0  1   2   3
1  9  10  12

#Creating a data frame from a dictionary
# Keys are column names, values are lists with column values

dfe = pd.DataFrame({'A':[1,2,3], 'B':['a','b','c']})
print(dfe)

   A  B
0  1  a
1  2  b
2  3  c

# Reading from a csv file:
df = pd.read_csv('example.csv')
print(df)

# The first row of the file is used for the column names
# The property columns gives us the column names
print(df.columns)
print(list(df.columns))

   NUMBER CHAR
0       1    a
1       2    b
2       3    c
Index(['NUMBER', 'CHAR'], dtype='object')
['NUMBER', 'CHAR']

# Reading from a csv file without header:
df = pd.read_csv('no-header.csv',header = None)
print(df)

   0  1
0  1  a
1  2  b
2  3  c

# Reading from am excel file:
df = pd.read_excel('example.xlsx')
print(df)

   NUMBER CHAR
0       1    a
1       2    b
2       3    c

#Writing to a csv file:
df.to_csv('example2.csv')
for x in open('example2.csv').readlines():
    print(x.strip())

,NUMBER,CHAR
0,1,a
1,2,b
2,3,c

# By default the row index is added as a column, we can remove it by seting index=False
df.to_csv('example2.csv',index = False)
for x in open('example2.csv').readlines():
    print(x.strip())

NUMBER,CHAR
1,a
2,b
3,c

from datetime import datetime #For handling dates

!pip install tiingo
import os

Collecting tiingo
  Downloading tiingo-0.15.6-py2.py3-none-any.whl.metadata (15 kB)
Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from tiingo) (2.32.3)
Requirement already satisfied: websocket-client in /usr/local/lib/python3.10/dist-packages (from tiingo) (1.8.0)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->tiingo) (3.4.0)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->tiingo) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->tiingo) (2.2.3)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->tiingo) (2024.8.30)
Downloading tiingo-0.15.6-py2.py3-none-any.whl (15 kB)
Installing collected packages: tiingo
Successfully installed tiingo-0.15.6

from tiingo import TiingoClient

import pandas_datareader.data as web # For accessing web data


client = TiingoClient({'api_key':'614c1590a592cc6696f6082f83b2666cd83882ef'})
start = datetime(2018,1,1)
end = datetime(2018,12,31)
stocks_data = client.get_dataframe('META',frequency='daily',startDate=start,endDate=end)
stocks_data = stocks_data[['open','close','low','high','volume']]

# the method info() outputs basic information for our data frame
stocks_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 251 entries, 2018-01-02 00:00:00+00:00 to 2018-12-31 00:00:00+00:00
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   open    251 non-null    float64
 1   close   251 non-null    float64
 2   low     251 non-null    float64
 3   high    251 non-null    float64
 4   volume  251 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 11.8 KB

len(stocks_data)

251

#the medthod head() outputs the top rows of the data frame
stocks_data.head()

#the medthod tail() outputs the last rows of the data frame
stocks_data.tail()

stocks_data.close

#trying to access the date column will give an error

stocks_data.date

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-17-d893f040ef09> in <cell line: 3>()
      1 #trying to access the date column will give an error
      2 
----> 3 stocks_data.date

/usr/local/lib/python3.10/dist-packages/pandas/core/generic.py in __getattr__(self, name)
   6297         ):
   6298             return self[name]
-> 6299         return object.__getattribute__(self, name)
   6300 
   6301     @final

AttributeError: 'DataFrame' object has no attribute 'date'

stocks_data.to_csv('stocks_data.csv')
for x in open('stocks_data.csv').readlines()[0:10]: # Read and print First 10 lines
    print(x.strip())
df = pd.read_csv('stocks_data.csv')

date,open,close,low,high,volume
2018-01-02 00:00:00+00:00,177.68,181.42,177.55,181.58,17694891
2018-01-03 00:00:00+00:00,181.88,184.67,181.33,184.78,16595495
2018-01-04 00:00:00+00:00,184.9,184.33,184.1,186.21,13554357
2018-01-05 00:00:00+00:00,185.59,186.85,184.93,186.9,13042388
2018-01-08 00:00:00+00:00,187.2,188.28,186.33,188.9,14719216
2018-01-09 00:00:00+00:00,188.7,187.87,187.1,188.8,12342722
2018-01-10 00:00:00+00:00,186.94,187.84,185.63,187.89,10464528
2018-01-11 00:00:00+00:00,188.4,187.77,187.38,188.4,8855144
2018-01-12 00:00:00+00:00,178.06,179.37,177.4,181.48,76645626

len(df)

251

#the medthod head() outputs the top rows of the data frame
df.head()

df.date

#an object that refers to the names of the columns
df.columns

Index(['date', 'open', 'close', 'low', 'high', 'volume'], dtype='object')

df = df.rename(columns = {'volume':'V'})
print(list(df.columns))
df.columns = ['date', 'open', 'high', 'low', 'close', 'vol']
df.head()

['date', 'open', 'close', 'low', 'high', 'V']

df['open'].head()

df.open.head()

df[['open','close']].head()

df.open.values[:10]

array([177.68, 181.88, 184.9 , 185.59, 187.2 , 188.7 , 186.94, 188.4 ,
       178.06, 181.5 ])

type(df.open.values[:10])

numpy.ndarray

type(df[['open','close']])

pandas.core.frame.DataFramedef __init__(data=None, index: Axes | None=None, columns: Axes | None=None, dtype: Dtype | None=None, copy: bool | None=None) -> None

/usr/local/lib/python3.10/dist-packages/pandas/core/frame.pyTwo-dimensional, size-mutable, potentially heterogeneous tabular data.

Data structure also contains labeled axes (rows and columns).
Arithmetic operations align on both row and column labels. Can be
thought of as a dict-like container for Series objects. The primary
pandas data structure.

Parameters
----------
data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame
    Dict can contain Series, arrays, constants, dataclass or list-like objects. If
    data is a dict, column order follows insertion-order. If a dict contains Series
    which have an index defined, it is aligned by its index. This alignment also
    occurs if data is a Series or a DataFrame itself. Alignment is done on
    Series/DataFrame inputs.

    If data is a list of dicts, column order follows insertion-order.

index : Index or array-like
    Index to use for resulting frame. Will default to RangeIndex if
    no indexing information part of input data and no index provided.
columns : Index or array-like
    Column labels to use for resulting frame when data does not have them,
    defaulting to RangeIndex(0, 1, 2, ..., n). If data contains column labels,
    will perform column selection instead.
dtype : dtype, default None
    Data type to force. Only a single dtype is allowed. If None, infer.
copy : bool or None, default None
    Copy data from inputs.
    For dict data, the default of None behaves like ``copy=True``.  For DataFrame
    or 2d ndarray input, the default of None behaves like ``copy=False``.
    If data is a dict containing one or more Series (possibly of different dtypes),
    ``copy=False`` will ensure that these inputs are not copied.

    .. versionchanged:: 1.3.0

See Also
--------
DataFrame.from_records : Constructor from tuples, also record arrays.
DataFrame.from_dict : From dicts of Series, arrays, or dicts.
read_csv : Read a comma-separated values (csv) file into DataFrame.
read_table : Read general delimited file into DataFrame.
read_clipboard : Read text from clipboard into DataFrame.

Notes
-----
Please reference the :ref:`User Guide <basics.dataframe>` for more information.

Examples
--------
Constructing DataFrame from a dictionary.

>>> d = {'col1': [1, 2], 'col2': [3, 4]}
>>> df = pd.DataFrame(data=d)
>>> df
   col1  col2
0     1     3
1     2     4

Notice that the inferred dtype is int64.

>>> df.dtypes
col1    int64
col2    int64
dtype: object

To enforce a single dtype:

>>> df = pd.DataFrame(data=d, dtype=np.int8)
>>> df.dtypes
col1    int8
col2    int8
dtype: object

Constructing DataFrame from a dictionary including Series:

>>> d = {'col1': [0, 1, 2, 3], 'col2': pd.Series([2, 3], index=[2, 3])}
>>> pd.DataFrame(data=d, index=[0, 1, 2, 3])
   col1  col2
0     0   NaN
1     1   NaN
2     2   2.0
3     3   3.0

Constructing DataFrame from numpy ndarray:

>>> df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
...                    columns=['a', 'b', 'c'])
>>> df2
   a  b  c
0  1  2  3
1  4  5  6
2  7  8  9

Constructing DataFrame from a numpy ndarray that has labeled columns:

>>> data = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)],
...                 dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")])
>>> df3 = pd.DataFrame(data, columns=['c', 'a'])
...
>>> df3
   c  a
0  3  1
1  6  4
2  9  7

Constructing DataFrame from dataclass:

>>> from dataclasses import make_dataclass
>>> Point = make_dataclass("Point", [("x", int), ("y", int)])
>>> pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)])
   x  y
0  0  0
1  0  3
2  2  3

Constructing DataFrame from Series/DataFrame:

>>> ser = pd.Series([1, 2, 3], index=["a", "b", "c"])
>>> df = pd.DataFrame(data=ser, index=["a", "c"])
>>> df
   0
a  1
c  3

>>> df1 = pd.DataFrame([1, 2, 3], index=["a", "b", "c"], columns=["x"])
>>> df2 = pd.DataFrame(data=df1, index=["a", "c"])
>>> df2
   x
a  1
c  3

df[['open','close']].values[:10]

array([[177.68, 181.58],
       [181.88, 184.78],
       [184.9 , 186.21],
       [185.59, 186.9 ],
       [187.2 , 188.9 ],
       [188.7 , 188.8 ],
       [186.94, 187.89],
       [188.4 , 188.4 ],
       [178.06, 181.48],
       [181.5 , 181.75]])

df.mean() #produces the mean of the columns/features

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-25-bfb9cc9bfb14> in <cell line: 1>()
----> 1 df.mean() #produces the mean of the columns/features

/usr/local/lib/python3.10/dist-packages/pandas/core/frame.py in mean(self, axis, skipna, numeric_only, **kwargs)
  11691         **kwargs,
  11692     ):
> 11693         result = super().mean(axis, skipna, numeric_only, **kwargs)
  11694         if isinstance(result, Series):
  11695             result = result.__finalize__(self, method="mean")

/usr/local/lib/python3.10/dist-packages/pandas/core/generic.py in mean(self, axis, skipna, numeric_only, **kwargs)
  12418         **kwargs,
  12419     ) -> Series | float:
> 12420         return self._stat_function(
  12421             "mean", nanops.nanmean, axis, skipna, numeric_only, **kwargs
  12422         )

/usr/local/lib/python3.10/dist-packages/pandas/core/generic.py in _stat_function(self, name, func, axis, skipna, numeric_only, **kwargs)
  12375         validate_bool_kwarg(skipna, "skipna", none_allowed=False)
  12376 
> 12377         return self._reduce(
  12378             func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only
  12379         )

/usr/local/lib/python3.10/dist-packages/pandas/core/frame.py in _reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)
  11560         # After possibly _get_data and transposing, we are now in the
  11561         #  simple case where we can use BlockManager.reduce
> 11562         res = df._mgr.reduce(blk_func)
  11563         out = df._constructor_from_mgr(res, axes=res.axes).iloc[0]
  11564         if out_dtype is not None and out.dtype != "boolean":

/usr/local/lib/python3.10/dist-packages/pandas/core/internals/managers.py in reduce(self, func)
   1498         res_blocks: list[Block] = []
   1499         for blk in self.blocks:
-> 1500             nbs = blk.reduce(func)
   1501             res_blocks.extend(nbs)
   1502 

/usr/local/lib/python3.10/dist-packages/pandas/core/internals/blocks.py in reduce(self, func)
    402         assert self.ndim == 2
    403 
--> 404         result = func(self.values)
    405 
    406         if self.values.ndim == 1:

/usr/local/lib/python3.10/dist-packages/pandas/core/frame.py in blk_func(values, axis)
  11479                     return np.array([result])
  11480             else:
> 11481                 return op(values, axis=axis, skipna=skipna, **kwds)
  11482 
  11483         def _get_data() -> DataFrame:

/usr/local/lib/python3.10/dist-packages/pandas/core/nanops.py in f(values, axis, skipna, **kwds)
    145                     result = alt(values, axis=axis, skipna=skipna, **kwds)
    146             else:
--> 147                 result = alt(values, axis=axis, skipna=skipna, **kwds)
    148 
    149             return result

/usr/local/lib/python3.10/dist-packages/pandas/core/nanops.py in new_func(values, axis, skipna, mask, **kwargs)
    402             mask = isna(values)
    403 
--> 404         result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs)
    405 
    406         if datetimelike:

/usr/local/lib/python3.10/dist-packages/pandas/core/nanops.py in nanmean(values, axis, skipna, mask)
    718     count = _get_counts(values.shape, mask, axis, dtype=dtype_count)
    719     the_sum = values.sum(axis, dtype=dtype_sum)
--> 720     the_sum = _ensure_numeric(the_sum)
    721 
    722     if axis is not None and getattr(the_sum, "ndim", False):

/usr/local/lib/python3.10/dist-packages/pandas/core/nanops.py in _ensure_numeric(x)
   1684             if inferred in ["string", "mixed"]:
   1685                 # GH#44008, GH#36703 avoid casting e.g. strings to numeric
-> 1686                 raise TypeError(f"Could not convert {x} to numeric")
   1687             try:
   1688                 x = x.astype(np.complex128)

TypeError: Could not convert ['2018-01-02 00:00:00+00:002018-01-03 00:00:00+00:002018-01-04 00:00:00+00:002018-01-05 00:00:00+00:002018-01-08 00:00:00+00:002018-01-09 00:00:00+00:002018-01-10 00:00:00+00:002018-01-11 00:00:00+00:002018-01-12 00:00:00+00:002018-01-16 00:00:00+00:002018-01-17 00:00:00+00:002018-01-18 00:00:00+00:002018-01-19 00:00:00+00:002018-01-22 00:00:00+00:002018-01-23 00:00:00+00:002018-01-24 00:00:00+00:002018-01-25 00:00:00+00:002018-01-26 00:00:00+00:002018-01-29 00:00:00+00:002018-01-30 00:00:00+00:002018-01-31 00:00:00+00:002018-02-01 00:00:00+00:002018-02-02 00:00:00+00:002018-02-05 00:00:00+00:002018-02-06 00:00:00+00:002018-02-07 00:00:00+00:002018-02-08 00:00:00+00:002018-02-09 00:00:00+00:002018-02-12 00:00:00+00:002018-02-13 00:00:00+00:002018-02-14 00:00:00+00:002018-02-15 00:00:00+00:002018-02-16 00:00:00+00:002018-02-20 00:00:00+00:002018-02-21 00:00:00+00:002018-02-22 00:00:00+00:002018-02-23 00:00:00+00:002018-02-26 00:00:00+00:002018-02-27 00:00:00+00:002018-02-28 00:00:00+00:002018-03-01 00:00:00+00:002018-03-02 00:00:00+00:002018-03-05 00:00:00+00:002018-03-06 00:00:00+00:002018-03-07 00:00:00+00:002018-03-08 00:00:00+00:002018-03-09 00:00:00+00:002018-03-12 00:00:00+00:002018-03-13 00:00:00+00:002018-03-14 00:00:00+00:002018-03-15 00:00:00+00:002018-03-16 00:00:00+00:002018-03-19 00:00:00+00:002018-03-20 00:00:00+00:002018-03-21 00:00:00+00:002018-03-22 00:00:00+00:002018-03-23 00:00:00+00:002018-03-26 00:00:00+00:002018-...

df[['open','high','low','close']].mean()

df[['open','high','low','close']].std() #produces the standard deviation of the columns/features

df[['open','high','low','close']].sem() #produces the standard error of the mean of the columns/features

#confidence interval
import scipy.stats as stats
conf = 0.95
t = stats.t.ppf((1+conf)/2.0, len(df)-1)
low = df[['open','high','low','low','close']].mean() - t*df[['open','high','low','low','close']].sem()
high = df[['open','high','low','low','close']].mean() + t*df[['open','high','low','low','close']].sem()
pd.DataFrame({'CI lower end':low, 'CI higher end':high})

df[['open','high','low','low','close']].median() #produces the median of the columns/features

df.open.mean()

171.47294820717133

stocks_data.describe()

stocks_data.sum()

df.sum(axis=1)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-29-f6cc363c055e> in <cell line: 1>()
----> 1 df.sum(axis=1)

/usr/local/lib/python3.10/dist-packages/pandas/core/frame.py in sum(self, axis, skipna, numeric_only, min_count, **kwargs)
  11668         **kwargs,
  11669     ):
> 11670         result = super().sum(axis, skipna, numeric_only, min_count, **kwargs)
  11671         return result.__finalize__(self, method="sum")
  11672 

/usr/local/lib/python3.10/dist-packages/pandas/core/generic.py in sum(self, axis, skipna, numeric_only, min_count, **kwargs)
  12504         **kwargs,
  12505     ):
> 12506         return self._min_count_stat_function(
  12507             "sum", nanops.nansum, axis, skipna, numeric_only, min_count, **kwargs
  12508         )

/usr/local/lib/python3.10/dist-packages/pandas/core/generic.py in _min_count_stat_function(self, name, func, axis, skipna, numeric_only, min_count, **kwargs)
  12487             axis = 0
  12488 
> 12489         return self._reduce(
  12490             func,
  12491             name=name,

/usr/local/lib/python3.10/dist-packages/pandas/core/frame.py in _reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)
  11560         # After possibly _get_data and transposing, we are now in the
  11561         #  simple case where we can use BlockManager.reduce
> 11562         res = df._mgr.reduce(blk_func)
  11563         out = df._constructor_from_mgr(res, axes=res.axes).iloc[0]
  11564         if out_dtype is not None and out.dtype != "boolean":

/usr/local/lib/python3.10/dist-packages/pandas/core/internals/managers.py in reduce(self, func)
   1498         res_blocks: list[Block] = []
   1499         for blk in self.blocks:
-> 1500             nbs = blk.reduce(func)
   1501             res_blocks.extend(nbs)
   1502 

/usr/local/lib/python3.10/dist-packages/pandas/core/internals/blocks.py in reduce(self, func)
    402         assert self.ndim == 2
    403 
--> 404         result = func(self.values)
    405 
    406         if self.values.ndim == 1:

/usr/local/lib/python3.10/dist-packages/pandas/core/frame.py in blk_func(values, axis)
  11479                     return np.array([result])
  11480             else:
> 11481                 return op(values, axis=axis, skipna=skipna, **kwds)
  11482 
  11483         def _get_data() -> DataFrame:

/usr/local/lib/python3.10/dist-packages/pandas/core/nanops.py in _f(*args, **kwargs)
     83                 )
     84             try:
---> 85                 return f(*args, **kwargs)
     86             except ValueError as e:
     87                 # we want to transform an object array

/usr/local/lib/python3.10/dist-packages/pandas/core/nanops.py in new_func(values, axis, skipna, mask, **kwargs)
    402             mask = isna(values)
    403 
--> 404         result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs)
    405 
    406         if datetimelike:

/usr/local/lib/python3.10/dist-packages/pandas/core/nanops.py in newfunc(values, axis, **kwargs)
    475             return np.array(results)
    476 
--> 477         return func(values, axis=axis, **kwargs)
    478 
    479     return cast(F, newfunc)

/usr/local/lib/python3.10/dist-packages/pandas/core/nanops.py in nansum(values, axis, skipna, min_count, mask)
    644         dtype_sum = np.dtype(np.float64)
    645 
--> 646     the_sum = values.sum(axis, dtype=dtype_sum)
    647     the_sum = _maybe_null_out(the_sum, axis, mask, values.shape, min_count=min_count)
    648 

/usr/local/lib/python3.10/dist-packages/numpy/core/_methods.py in _sum(a, axis, dtype, out, keepdims, initial, where)
     47 def _sum(a, axis=None, dtype=None, out=None, keepdims=False,
     48          initial=_NoValue, where=True):
---> 49     return umr_sum(a, axis, dtype, out, keepdims, initial, where)
     50 
     51 def _prod(a, axis=None, dtype=None, out=None, keepdims=False,

TypeError: can only concatenate str (not "float") to str

df[['open', 'close', 'low', 'high', 'vol']].sum(axis=1)

#when inplace is False (the default) it returns a new dataframe that is sorted.
#when it is True it does not return anything, just changes the dataframe.
stocks_data.sort_values(by = 'open', ascending =False, inplace=False).head()

df.date.head()

first_date = df.date.values[0]
first_date
#returns a string

'2018-01-02 00:00:00+00:00'

datetime.strptime(first_date, "%Y-%m-%d %H:%M:%S%z")

datetime.datetime(2018, 1, 2, 0, 0, tzinfo=datetime.timezone.utc)

df.date = df.date.apply(lambda d: datetime.strptime(d, "%Y-%m-%d %H:%M:%S%z").strftime("%Y-%m-%d"))
date_series = df.date # We want to keep the dates
df.date.head()

#Another way to do the same thing, by applying the function to every row (axis = 1)
#df.date = df.apply(lambda row: datetime.strptime(row.date, "%Y-%m-%d"), axis=1)

date_series.head()

#dftest = df[['open','close']]
#dftest.apply(lambda x: int(x))
df.apply(lambda r: int(r.open), axis=1)

dftest = df['open']
dftest.apply(lambda x: int(x))

list(df.index)[0:10]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

df.index = df.date
df.head()

dfe

dfe.index = dfe.B

dfe

df = df.drop(columns = ['date']) #Equivalent to df = df.drop(columns = ['date']), axis=1)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 251 entries, 2018-01-02 to 2018-12-31
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   open    251 non-null    float64
 1   high    251 non-null    float64
 2   low     251 non-null    float64
 3   close   251 non-null    float64
 4   vol     251 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 11.8+ KB

#axis = 0 refers to dropping labels from rows (or you can use index = labels as a parameter).
# Essentially we are droping a set of rows
#axis = 1 refers to dropping labels from columns.
dfe.drop(index='b')

df.index = pd.to_datetime(df.index)
print(df.index)

DatetimeIndex(['2018-01-02', '2018-01-03', '2018-01-04', '2018-01-05',
               '2018-01-08', '2018-01-09', '2018-01-10', '2018-01-11',
               '2018-01-12', '2018-01-16',
               ...
               '2018-12-17', '2018-12-18', '2018-12-19', '2018-12-20',
               '2018-12-21', '2018-12-24', '2018-12-26', '2018-12-27',
               '2018-12-28', '2018-12-31'],
              dtype='datetime64[ns]', name='date', length=251, freq=None)

df.loc[datetime(2018,5,7)]

df.iloc[10:20] #dataframe with rows from 10 to 20

df.iloc[0:2,[1,3]] #dataframe with rows 0:2, and the second and fourth columns

#select rows and columns
df[['high','close']].iloc[0:2]

print(df.dtypes)

open     float64
high     float64
low      float64
close    float64
vol        int64
dtype: object

num_positive_days = 0
for idx, row in df.iterrows(): #returns the index name and the row
    if row.close > row.open:
        num_positive_days += 1

print("The total number of positive-gain days is {}.".format(num_positive_days))

The total number of positive-gain days is 244.

num_positive_days = 0
for i in range(len(df)):
    row = df.iloc[i]
    if row.close > row.open:
        num_positive_days += 1

print("The total number of positive-gain days is {}.".format(num_positive_days))

The total number of positive-gain days is 244.

pos_days = [idx for (idx,row) in df.iterrows() if row.close > row.open]
print("The total number of positive-gain days is "+str(len(pos_days)))

The total number of positive-gain days is 244

pos_days[0:10]

[Timestamp('2018-01-02 00:00:00'),
 Timestamp('2018-01-03 00:00:00'),
 Timestamp('2018-01-04 00:00:00'),
 Timestamp('2018-01-05 00:00:00'),
 Timestamp('2018-01-08 00:00:00'),
 Timestamp('2018-01-09 00:00:00'),
 Timestamp('2018-01-10 00:00:00'),
 Timestamp('2018-01-12 00:00:00'),
 Timestamp('2018-01-16 00:00:00'),
 Timestamp('2018-01-17 00:00:00')]

df.loc[pos_days]

#This will iteratate the column names:
for x in df:
    print(x)

open
high
low
close
vol

tmp_high = df.high > 170
tmp_high.tail()

sum(tmp_high)

144

df[tmp_high].tail()

positive_days = df[df.close > df.open]
positive_days.head()

very_positive_days = df[df.close-df.open > 5]
very_positive_days.head()

df[(df.high<170)&(df.low>80)]

temp_df = df[df.high<170]
temp_df = temp_df[temp_df > 80]
temp_df

df['profit'] = (df.close - df.open)
df.head()

df.profit[df.profit>0].describe()

for idx, row in df.iterrows():
    if row.close < row.open:
        df.loc[idx,'gain']='negative'
    elif (row.close - row.open) < 1:
        df.loc[idx,'gain']='small_gain'
    elif (row.close - row.open) < 3:
        df.loc[idx,'gain']='medium_gain'
    else:
        df.loc[idx,'gain']='large_gain'
df.head()

def gainrow(row):
    if row.close < row.open:
        return 'negative'
    elif (row.close - row.open) < 1:
        return 'small_gain'
    elif (row.close - row.open) < 3:
        return 'medium_gain'
    else:
        return 'large_gain'

df['test_column'] = df.apply(gainrow, axis = 1)
#axis = 0 means rows, axis =1 means columns

df.head()

df = df.drop('test_column', axis = 1)
df.head()

mdf = pd.read_csv('example-missing.csv')
mdf

mdf.A.mean()

4.5

mdf.fillna(0)

mdf.A = mdf.A.fillna(mdf.A.mean())
mdf = mdf.fillna('')
mdf

mdf = pd.read_csv('example-missing.csv')
mdf.dropna()

mdf[mdf.B.isnull()]

gain_groups = df.groupby('gain')

type(gain_groups) #DataFrameGroupBy Object

pandas.core.groupby.generic.DataFrameGroupBydef __init__(obj: NDFrameT, keys: _KeysArgType | None=None, axis: Axis=0, level: IndexLabel | None=None, grouper: ops.BaseGrouper | None=None, exclusions: frozenset[Hashable] | None=None, selection: IndexLabel | None=None, as_index: bool=True, sort: bool=True, group_keys: bool=True, observed: bool | lib.NoDefault=lib.no_default, dropna: bool=True) -> None

/usr/local/lib/python3.10/dist-packages/pandas/core/groupby/generic.pyClass for grouping and aggregating relational data.

See aggregate, transform, and apply functions on this object.

It's easiest to use obj.groupby(...) to use GroupBy, but you can also do:

::

    grouped = groupby(obj, ...)

Parameters
----------
obj : pandas object
axis : int, default 0
level : int, default None
    Level of MultiIndex
groupings : list of Grouping objects
    Most users should ignore this
exclusions : array-like, optional
    List of columns to exclude
name : str
    Most users should ignore this

Returns
-------
**Attributes**
groups : dict
    {group name -> group labels}
len(grouped) : int
    Number of groups

Notes
-----
After grouping, see aggregate, apply, and transform functions. Here are
some other brief notes about usage. When grouping by multiple groups, the
result index will be a MultiIndex (hierarchical) by default.

Iteration produces (key, group) tuples, i.e. chunking the data by group. So
you can write code like:

::

    grouped = obj.groupby(keys, axis=axis)
    for key, group in grouped:
        # do something with the data

Function calls on GroupBy, if not specially implemented, "dispatch" to the
grouped data. So if you group a DataFrame and wish to invoke the std()
method on each group, you can simply do:

::

    df.groupby(mapper).std()

rather than

::

    df.groupby(mapper).aggregate(np.std)

You can pass arguments to these "wrapped" functions, too.

See the online documentation for full exposition on these topics and much
more

for gain, gain_data in gain_groups:
    print(gain)
    print(gain_data.head())
    print('=============================')

large_gain
              open    high     low   close       vol  profit        gain
date                                                                    
2018-01-02  177.68  181.42  177.55  181.58  17694891    3.90  large_gain
2018-01-12  178.06  179.37  177.40  181.48  76645626    3.42  large_gain
2018-01-22  180.80  185.37  180.41  185.39  20567285    4.59  large_gain
2018-01-23  186.05  189.35  185.55  189.55  24956444    3.50  large_gain
2018-02-01  188.22  193.09  187.89  195.32  53608910    7.10  large_gain
=============================
medium_gain
              open    high     low   close       vol  profit         gain
date                                                                     
2018-01-03  181.88  184.67  181.33  184.78  16595495    2.90  medium_gain
2018-01-04  184.90  184.33  184.10  186.21  13554357    1.31  medium_gain
2018-01-05  185.59  186.85  184.93  186.90  13042388    1.31  medium_gain
2018-01-08  187.20  188.28  186.33  188.90  14719216    1.70  medium_gain
2018-01-18  178.13  179.80  177.08  180.98  22783759    2.85  medium_gain
=============================
small_gain
              open    high     low   close       vol  profit        gain
date                                                                    
2018-01-09  188.70  187.87  187.10  188.80  12342722    0.10  small_gain
2018-01-10  186.94  187.84  185.63  187.89  10464528    0.95  small_gain
2018-01-11  188.40  187.77  187.38  188.40   8855144    0.00  small_gain
2018-01-16  181.50  178.39  178.04  181.75  35027166    0.25  small_gain
2018-01-17  179.26  177.60  175.80  179.32  27356988    0.06  small_gain
=============================

sm = gain_groups.get_group('small_gain')
sm.head()

for gain, gain_data in df.groupby("gain"):
    print('The average closing value for the {} group is {}'.format(gain,
                                                    gain_data.close.mean()))

The average closing value for the large_gain group is 171.15434782608693
The average closing value for the medium_gain group is 175.6158333333333
The average closing value for the small_gain group is 173.3509302325581

gdf= df[['open','low','high','close','vol','gain']].groupby('gain').mean()
type(gdf)

pandas.core.frame.DataFramedef __init__(data=None, index: Axes | None=None, columns: Axes | None=None, dtype: Dtype | None=None, copy: bool | None=None) -> None

/usr/local/lib/python3.10/dist-packages/pandas/core/frame.pyTwo-dimensional, size-mutable, potentially heterogeneous tabular data.

Data structure also contains labeled axes (rows and columns).
Arithmetic operations align on both row and column labels. Can be
thought of as a dict-like container for Series objects. The primary
pandas data structure.

Parameters
----------
data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame
    Dict can contain Series, arrays, constants, dataclass or list-like objects. If
    data is a dict, column order follows insertion-order. If a dict contains Series
    which have an index defined, it is aligned by its index. This alignment also
    occurs if data is a Series or a DataFrame itself. Alignment is done on
    Series/DataFrame inputs.

    If data is a list of dicts, column order follows insertion-order.

index : Index or array-like
    Index to use for resulting frame. Will default to RangeIndex if
    no indexing information part of input data and no index provided.
columns : Index or array-like
    Column labels to use for resulting frame when data does not have them,
    defaulting to RangeIndex(0, 1, 2, ..., n). If data contains column labels,
    will perform column selection instead.
dtype : dtype, default None
    Data type to force. Only a single dtype is allowed. If None, infer.
copy : bool or None, default None
    Copy data from inputs.
    For dict data, the default of None behaves like ``copy=True``.  For DataFrame
    or 2d ndarray input, the default of None behaves like ``copy=False``.
    If data is a dict containing one or more Series (possibly of different dtypes),
    ``copy=False`` will ensure that these inputs are not copied.

    .. versionchanged:: 1.3.0

See Also
--------
DataFrame.from_records : Constructor from tuples, also record arrays.
DataFrame.from_dict : From dicts of Series, arrays, or dicts.
read_csv : Read a comma-separated values (csv) file into DataFrame.
read_table : Read general delimited file into DataFrame.
read_clipboard : Read text from clipboard into DataFrame.

Notes
-----
Please reference the :ref:`User Guide <basics.dataframe>` for more information.

Examples
--------
Constructing DataFrame from a dictionary.

>>> d = {'col1': [1, 2], 'col2': [3, 4]}
>>> df = pd.DataFrame(data=d)
>>> df
   col1  col2
0     1     3
1     2     4

Notice that the inferred dtype is int64.

>>> df.dtypes
col1    int64
col2    int64
dtype: object

To enforce a single dtype:

>>> df = pd.DataFrame(data=d, dtype=np.int8)
>>> df.dtypes
col1    int8
col2    int8
dtype: object

Constructing DataFrame from a dictionary including Series:

>>> d = {'col1': [0, 1, 2, 3], 'col2': pd.Series([2, 3], index=[2, 3])}
>>> pd.DataFrame(data=d, index=[0, 1, 2, 3])
   col1  col2
0     0   NaN
1     1   NaN
2     2   2.0
3     3   3.0

Constructing DataFrame from numpy ndarray:

>>> df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
...                    columns=['a', 'b', 'c'])
>>> df2
   a  b  c
0  1  2  3
1  4  5  6
2  7  8  9

Constructing DataFrame from a numpy ndarray that has labeled columns:

>>> data = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)],
...                 dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")])
>>> df3 = pd.DataFrame(data, columns=['c', 'a'])
...
>>> df3
   c  a
0  3  1
1  6  4
2  9  7

Constructing DataFrame from dataclass:

>>> from dataclasses import make_dataclass
>>> Point = make_dataclass("Point", [("x", int), ("y", int)])
>>> pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)])
   x  y
0  0  0
1  0  3
2  2  3

Constructing DataFrame from Series/DataFrame:

>>> ser = pd.Series([1, 2, 3], index=["a", "b", "c"])
>>> df = pd.DataFrame(data=ser, index=["a", "c"])
>>> df
   0
a  1
c  3

>>> df1 = pd.DataFrame([1, 2, 3], index=["a", "b", "c"], columns=["x"])
>>> df2 = pd.DataFrame(data=df1, index=["a", "c"])
>>> df2
   x
a  1
c  3

gdf

#This can be used to remove the hiearchical index, if necessary
gdf = gdf.reset_index()
gdf

gdf.set_index('gain')

test = pd.DataFrame({'A':[1,2,3,4],'B':['a','b','b','a'],'C':['a','a','b','a']})
test

gtest = test.groupby(['B','C']).mean()
gtest
#note that in this case we get a hierarchical index

gtest = gtest.reset_index()
gtest
#the hierarchical index is flattened out

client = TiingoClient({'api_key':'614c1590a592cc6696f6082f83b2666cd83882ef'})
start = datetime(2018,1,1)
end = datetime(2018,12,31)

dfb = client.get_dataframe('META',frequency='daily',startDate=start,endDate=end)
dfb = dfb[['open','close','low','high','volume']]

dgoog = client.get_dataframe('GOOGL',frequency='daily',startDate=start,endDate=end)
dgoog = dgoog[['open','close','low','high','volume']]

print(dfb.head())
print(dgoog.head())

                             open   close     low    high    volume
date                                                               
2018-01-02 00:00:00+00:00  177.68  181.42  177.55  181.58  17694891
2018-01-03 00:00:00+00:00  181.88  184.67  181.33  184.78  16595495
2018-01-04 00:00:00+00:00  184.90  184.33  184.10  186.21  13554357
2018-01-05 00:00:00+00:00  185.59  186.85  184.93  186.90  13042388
2018-01-08 00:00:00+00:00  187.20  188.28  186.33  188.90  14719216
                              open    close      low     high   volume
date                                                                  
2018-01-02 00:00:00+00:00  1053.02  1073.21  1053.02  1075.98  1555809
2018-01-03 00:00:00+00:00  1073.93  1091.52  1073.43  1096.10  1550593
2018-01-04 00:00:00+00:00  1097.09  1095.76  1094.26  1104.08  1289293
2018-01-05 00:00:00+00:00  1103.45  1110.29  1101.80  1113.58  1493389
2018-01-08 00:00:00+00:00  1111.00  1114.21  1110.00  1119.16  1148958

common_dates = pd.merge(dfb,dgoog,on='date')
common_dates.head()

len(common_dates)

251

common_dates = pd.merge(dfb,dgoog,on='date',suffixes=('_fb', '_goog'))
common_dates.head()

dfb['gain'] = dfb.apply(gainrow, axis = 1)
dgoog['gain'] = dgoog.apply(gainrow, axis = 1)
dfb['profit'] = dfb.close-dfb.open
dgoog['profit'] = dgoog.close-dgoog.open

common_gain_dates = pd.merge(dfb, dgoog, on=['date','gain'],suffixes=('_fb', '_goog'))
common_gain_dates.head()

len(common_gain_dates)

128

left = pd.DataFrame({'key': ['foo', 'foo', 'boo'], 'lval': [1, 2,3]})
print(left)
print('\n')
right = pd.DataFrame({'key': ['foo', 'hoo'], 'rval': [4, 5]})
print(right)
print('\n')

   key  lval
0  foo     1
1  foo     2
2  boo     3


   key  rval
0  foo     4
1  hoo     5

dfm = pd.merge(left, right, on='key')
print(dfm)

   key  lval  rval
0  foo     1     4
1  foo     2     4

dfm = pd.merge(left, right, on='key', how='left') #keeps all the keys from the left and puts NaN for missing values
print(dfm)
print('\n')
dfm = dfm.fillna(0) #fills the NaN values with specified value
dfm

   key  lval  rval
0  foo     1   4.0
1  foo     2   4.0
2  boo     3   NaN

dfb.join(dgoog,lsuffix='_FB',rsuffix='_GOOG')

left.index=left.key
left = left.drop('key',axis=1)
right.index =right.key
right = right.drop('key',axis=1)
left.join(right)

	open	close	low	high	volume
date
2018-12-24 00:00:00+00:00	123.10	124.06	123.02	129.74	22066002
2018-12-26 00:00:00+00:00	126.00	134.18	125.89	134.24	39723370
2018-12-27 00:00:00+00:00	132.44	134.52	129.67	134.99	31202509
2018-12-28 00:00:00+00:00	135.34	133.20	132.20	135.92	22627569
2018-12-31 00:00:00+00:00	134.45	131.09	129.95	134.64	24625308

	CI lower end	CI higher end
open	169.024405	173.921491
high	169.027487	173.994426
low	166.807605	171.798651
low	166.807605	171.798651
close	171.198608	176.028085

	open	close	low	high	volume
count	251.000000	251.000000	251.000000	251.000000	2.510000e+02
mean	171.472948	171.510956	169.303128	173.613347	2.743828e+07
std	19.696493	19.977452	20.074408	19.424564	1.910143e+07
min	123.100000	124.060000	123.020000	129.740000	8.855144e+06
25%	157.815000	157.915000	155.525000	160.745000	1.750763e+07
50%	174.890000	174.700000	172.830000	176.980000	2.186093e+07
75%	184.915000	185.270000	183.420000	186.450000	2.984990e+07
max	215.720000	217.500000	214.270000	218.620000	1.698037e+08

	open	close	low	high	volume
date
2018-07-25 00:00:00+00:00	215.72	217.50	214.27	218.62	64592585
2018-07-24 00:00:00+00:00	215.11	214.67	212.60	216.20	28468681
2018-07-23 00:00:00+00:00	210.58	210.91	208.80	211.62	16731969
2018-07-18 00:00:00+00:00	209.82	209.36	208.44	210.99	15334907
2018-07-20 00:00:00+00:00	208.85	209.94	208.50	211.50	16241508

	2018-05-07
open	177.35
high	177.97
low	177.17
close	179.50
vol	18697195.00

Introduction to Pandas and other libraries¶

Pandas¶

Storing data tables¶

Creating Data Frames¶

Working with data columns¶

Data Frame methods¶

Bulk Operations¶

Accessing rows of the DataFrame¶

To iterate over the rows, use `.iterrows()`¶

Filtering¶

Creating new columns¶

Missing values¶

Grouping¶

Joins¶

	open	close	low	high	volume
date
2018-01-02 00:00:00+00:00	177.68	181.42	177.55	181.58	17694891
2018-01-03 00:00:00+00:00	181.88	184.67	181.33	184.78	16595495
2018-01-04 00:00:00+00:00	184.90	184.33	184.10	186.21	13554357
2018-01-05 00:00:00+00:00	185.59	186.85	184.93	186.90	13042388
2018-01-08 00:00:00+00:00	187.20	188.28	186.33	188.90	14719216

	0
open	4.303971e+04
close	4.304925e+04
low	4.249509e+04
high	4.357695e+04
volume	6.887007e+09

	0
date
2018-01-02	17695609.23
2018-01-03	16596227.66
2018-01-04	13555096.54
2018-01-05	13043132.27
2018-01-08	14719966.71
...	...
2018-12-24	22066501.92
2018-12-26	39723890.31
2018-12-27	31203040.62
2018-12-28	22628105.66
2018-12-31	24625838.13

	open	high	low	close	vol
date
2018-01-17	179.26	177.60	175.80	179.32	27356988
2018-01-18	178.13	179.80	177.08	180.98	22783759
2018-01-19	180.85	181.29	180.17	182.37	26266081
2018-01-22	180.80	185.37	180.41	185.39	20567285
2018-01-23	186.05	189.35	185.55	189.55	24956444
2018-01-24	189.89	186.55	186.52	190.66	22992031
2018-01-25	187.95	187.48	186.60	188.62	16698537
2018-01-26	187.75	190.00	186.81	190.00	16652197
2018-01-29	188.75	185.98	185.63	188.84	20022931
2018-01-30	187.62	187.12	181.84	188.18	20382841

	high
date
2018-12-24	False
2018-12-26	False
2018-12-27	False
2018-12-28	False
2018-12-31	False

	open	high	low	close	vol
date
2018-08-28	178.10	176.26	175.83	178.24	15910675
2018-08-29	176.30	175.90	174.75	176.79	18678301
2018-08-30	175.90	177.64	175.70	179.79	24216532
2018-08-31	177.15	175.73	174.98	177.62	18065159
2018-09-04	173.50	171.16	168.80	173.89	29808971

	open	high	low	close	vol
date
2018-02-01	188.22	193.09	187.89	195.32	53608910
2018-02-06	178.57	185.31	177.74	185.77	36829710
2018-02-14	173.45	179.52	173.21	179.81	27963758
2018-03-07	178.74	183.71	178.07	183.82	19097293
2018-03-21	164.80	169.39	163.30	173.40	105350867

	open	high	low	close	vol
date
2018-03-20	167.47	168.15	161.95	170.20	128925534
2018-03-21	164.80	169.39	163.30	173.40	105350867
2018-03-22	166.13	164.89	163.72	170.27	73389988
2018-03-23	165.44	159.39	159.02	166.60	52306891
2018-03-26	160.82	160.06	149.02	161.10	125438294
...	...	...	...	...	...
2018-12-24	123.10	124.06	123.02	129.74	22066002
2018-12-26	126.00	134.18	125.89	134.24	39723370
2018-12-27	132.44	134.52	129.67	134.99	31202509
2018-12-28	135.34	133.20	132.20	135.92	22627569
2018-12-31	134.45	131.09	129.95	134.64	24625308

	profit
count	244.000000
mean	2.201803
std	1.834447
min	0.010000
25%	0.785000
50%	1.585000
75%	3.427500
max	8.600000

	open	high	low	close	vol	profit	gain
date
2018-01-09	188.70	187.87	187.10	188.80	12342722	0.10	small_gain
2018-01-10	186.94	187.84	185.63	187.89	10464528	0.95	small_gain
2018-01-11	188.40	187.77	187.38	188.40	8855144	0.00	small_gain
2018-01-16	181.50	178.39	178.04	181.75	35027166	0.25	small_gain
2018-01-17	179.26	177.60	175.80	179.32	27356988	0.06	small_gain

	0
open	1.243232
high	1.260966
low	1.267085
close	1.226068

	0
open	174.89
high	174.70
low	172.83
low	172.83
close	176.98

	0
0	177
1	181
2	184
3	185
4	187
...	...
246	123
247	126
248	132
249	135
250	134

	open
0	177
1	181
2	184
3	185
4	187
...	...
246	123
247	126
248	132
249	135
250	134

	open	low	high	close	vol
gain
large_gain	166.503623	165.626087	169.394638	171.154348	3.481156e+07
medium_gain	173.778021	171.991458	173.742292	175.615833	2.371978e+07
small_gain	172.886860	169.252385	170.718140	173.350930	2.567338e+07

Introduction to Pandas and other libraries¶

Pandas¶

Storing data tables¶

Creating Data Frames¶

Working with data columns¶

Data Frame methods¶

Bulk Operations¶

Accessing rows of the DataFrame¶

To iterate over the rows, use .iterrows()¶

Filtering¶

Creating new columns¶

Missing values¶

Grouping¶

Joins¶

To iterate over the rows, use `.iterrows()`¶