# imports import random import time import os import pandas as pd import seaborn as sns import numpy as np from scipy import stats as sts import matplotlib.pyplot as plt a4_dims = (11.7, 8.27) plt.rcParams['figure.figsize'] = a4_dims import warnings warnings.filterwarnings('ignore') #to display all rows columns pd.set_option('display.max_rows', None) pd.set_option('display.max_columns', None) pd.set_option('display.expand_frame_repr', False) pd.set_option('max_colwidth', -1) # to remove scientific notation pd.set_option('display.float_format', lambda x: '%.3f' % x) #timing your program? import time start = time.time() # your code here end = time.time() print(end - start) # working with date time # convert a col to datetime pandas df['date'] = pd.to_datetime(df['date']) #Change working directory import os os.getcwd() os.chdir("directory") %pwd %cd folder # get df value df['col'][1].item() # create empty df with n cols & m rows #read excel pd.read_excel('.xlsx', sheet_name = 'Sheet1') # remove index while exporting df.to_csv('csv', index = False) #importing multiple files in a directory l = [pd.read_csv(filename) for filename in glob.glob("path\*.csv")] df = pd.concat(l, axis = 0)df = pd.concat(l, axis = 1) df = df1.append(df2)axis - 0 row - 1 col df = pd.merge(df1, df2, on = 'com_col', how = 'outer') #index reset df.reset_index(drop = True, inplace = True) #change dtype df.Weight = df.Weight.astype('int64') #replace blanks with NaN df.replace(r'^\s*$', np.nan, regex=True) #accepts only 1D , get all unique elements in a column pd.unique(df['col1']) df['col1'].unique() #to flat 2D into 1D df[['col1', 'col2']].values.ravel() #to flat 2D into 1D & get only unique values pd.unique(df[['col1', 'col2']].values.ravel()) #number of unique elements in one column df['col1'].value_counts() #number of unique elements in all columns df.nunique() #missing values sns.heatmap(df.isnull()) # NaNs by col df.isnull().sum(axis = 0) #drop a column in df df.drop(['colname'], axis = 1) #percentile df['col'].quantile(0.1) #top 10 percentile #filter columns based on names col_list = list(df.filter(like = 'Avg_').columns) #create a sample dataframe df = pd.DataFrame({'col1': [1,2,3], 'col2': [11,22,33]}) df = pd.DataFrame({'x': [1,2,3], 'y':[11,22,33]}, columns = ['x1', 'y1']) #with n cols & rows pd.DataFrame(index=np.arange(1), columns=np.arange(8)) #sorting values by 1 col df.sort_values(by = ['col1'], ascending = True) #sorting values by more columns df.sort_values(by = ['col1', 'col2', 'col3'], ascending = True) #renaming the columns df.rename(columns = {'col1':'rnm1', 'col2':'rnm2'}, inplace = True) #column slicing all = df.columns except last one = df.columns[:-1] mirror columns = df.columns[::-1] #filter function df.filter(['col1', 'col2', 'col3']) df.filter(regex = '/d') #upto 2 place decimal "{:.2f}".format(x) #row slicing #top 4 rows df[:4]#col slicing df[(cond1) | (cond2) & (cond3)] #where cond1 = df['col1'] > 2 #iloc & loc df.iloc[<index>, <index>] df.loc[(cond1), ['col1', 'col2']] #where cond1 = df['col1'] > 2#groupby df.groupby(by = ['col1'])['reqcols'].mean() #replacing nan with space df['col1'] = df['col1'].replace('whattoreplace', 'replacewith') df = df.replace('','') df = df.fillna('') #drop rows with nan df.dropna()#converting string to datetime df['col1'] = pd.to_datetime(df['col1']) #summary & transpose df.describe().transpose() #check for null values in a column df.isnull().any() df.isnull().all() df['col1'].isnull() df['col2'].notnull() #null values in each col df.isna().sum() #check for non-null values pd.notnull() #isin in pandas df['col1'].isin('somelist') #dropping duplicates #drops duplicates excluding first occurence df.drop_duplicates() #drops duplicates excluding last occurence df.drop_duplicates(keep = 'last') #drops duplicates by col df.drop_duplicates(['col1']) df.drop_duplicates(['col1'], keep = 'last') df.drop_duplicates(subset = 'Col1') #joining dataframes#Creating a pivot df.pivot('A', 'B', 'C') - [A - vertical, B - Horizontal, C - values] pd.pivot_table(df, values = '', index = ['',''], columns = [''], aggfunc = np.sum) #Unpivot pd.DataFrame(pivoted.to_records()) #replace infinity with nan df.replace([np.inf, -np.inf], np.nan) #check for infinite values np.isfinite(df).any() #data types of all columns df.dtypes #data type of a single column df.colname.dtypes #convert dtypes df['col'].astype(str).astype(int) #lambda function lambda x : x + 10 #applying functions to a dataframe df.apply(lambda x: x + 3) #apply function referencing multiple columns df['Value'] = df.apply(lambda row: my_test(row['a'], row['c']), axis=1) #if else loop in a lambda function df.apply(lambda x: 1 if x == 'W' else 0) #List Comprehensions ls = [i for i in ls1 if i not in ls2] #numpy array methods np.zeros((shape)) np.ones((shape)) np.full(5, -1) np.full((2,5), -1) #list methods list.sort() list.sort(reverse = true) ls = ['a','b','c'] "".join(ls) = abc "-".join(ls) = a-b-c #reverse a list list.reverse() #remove list items list.clear() #remove - removes the element from the list ls.remove(element) list.pop[0] #pop - last element ls.pop() #pop at index ls.pop(0) del list[0] #get list index list_name.index('element') #append - adds an element to the list ls.append(element) #extend - adds ls2 to the end of ls1 ls1.extend(ls2) #insert ls.insert(position, element) #delete - deletes the element at that index del ls[0] del ls[:] #deletes all elements from the list#enumerate enumerate(iterable, start) li = ['a','b','c'] ob = enumerate(li) #String methods str.endswith('pattern')

5 basic commands you’ll need to know if you want to be proficient in SQL

SQL is a powerful language that can be used for all sorts of data analysis tasks.

It’s also a perfect fit for people who want to get into coding because it’s so similar to many other programming languages.

In this article, we’ll break down 5 SQL commands you need to know to get started along with examples so that by the end, you’ll know enough about SQL to start using it on your own projects!

1. SELECT

The first command you need to know if you’re starting in SQL is SELECT. It’s the most basic command in SQL and is used to get data from a table.

Some uses of SELECT include:

Selecting all of the data from a table
Selecting specific columns from a table
Selecting data based on certain criteria (using WHERE)

Examples:

**SELECT * FROM tablename**

This will give you all of the data from the tablename table. You can also select specific columns by specifying their name after SELECT:

SELECT id, name FROM tablename

This will give you the id and name columns from the tablename table.

SELECT DISTINCT

If you want to select only unique values, you can use SELECT DISTINCT. This command removes duplicate values from the results:

SELECT DISTINCT id FROM tablename

This will give you a list of all the unique ids from the tablename table.

SELECT COUNT

The SELECT COUNT command returns the number of rows in a table:

SELECT COUNT(*) FROM tablename

This will return the total number of rows in the tablename table. You can also count specific columns

2. WHERE

WHERE is another very common command in SQL. It’s used to filter the data that appears in a SELECT statement:

Some uses of WHERE include:

Filtering data by a certain column
Filtering data by a certain value
Filtering data by date range

Examples:

SELECT * FROM tablename WHERE id = 100

This will return only the rows from the tablename table where id equals 100. Multiple conditions can be specified using AND or OR:

SELECT * FROM tablename WHERE (id = 100) OR (name = ‘John’)

This would return all of the rows from the tablename table where either id=100, or name=’John’.

SELECT * FROM tablename WHERE id BETWEEN 100 AND 200

This would return all of the rows from the tablename table where id is between 100 and 200.

SELECT * FROM tablename WHERE id NOT IN (100,200)

This would return all of the rows from the tablename table where id is not equal to 100 or 200.

3. ORDERBY

ORDERBY is also commonly used in SQL. It’s used to sort the results of a SELECT statement. These results can either be sorted by descending order or ascending.

Some uses of ORDERBY include:

Sort results in ascending order: SELECT * FROM tablename ORDERBY id
Sort results in descending order: SELECT * FROM tablename ORDERBY id DESC
Sort results alphabetically: SELECT * FROM tablename ORDERBY name
Sort results by date: SELECT * FROM tablename ORDERBY created_at

Examples:

SELECT * FROM tablename ORDER BY name

This would return all of the rows from the tablenname table and order by their names. If you want to use multiple columns for sorting, specify them in an comma-separated list:

SELECT * FROM tablename WHERE id > 100 ORDER BY age DESC, name ASC

This will give us all of the rows where ID is greater than 100 and will order those values by descending age first, then ascending name second.

4. GROUPBY

GROUPBY is a statement in SQL used to group the data in a SELECT statement by a certain column.

Some uses of GROUPBY include:

Summarizing data
Finding the max or min value for a column
Getting the average, median, or standard deviation for a column

Examples:

SELECT id, name, SUM(age) AS “Age” FROM tablename GROUP BY id

This will return a table with three columns: ID, Name, and Age. The Age column will have the sum of all the age values in the tablename table grouped by ID.

SELECT max(age) as “Oldest Person” from tablename GROUP BY id

This will return a table with one column: Oldest Person. The Oldest Person column will have the max age value from the tablename table grouped by ID.

SELECT avg(age) as “Average Age” from tablename GROUP BY id

This will return a table with one column: Average Age. The Average Age column will have the average age value for all rows in the tablename table, grouped by ID.

5. LIKE

The LIKE operator is used to match a pattern in a character string. The percent sign (%) is used as a wildcard character, which means that it can represent any number of characters.

Some uses of LIKE include:

Matching a pattern in a column
Finding specific values in a column

Examples:

SELECT id, name FROM tablename WHERE name LIKE ‘A%’

This would return all of the rows where the first column (name) contains the letter A at least once.

SELECT id, name FROM tablename WHERE name LIKE ‘%end’

This would return all of the rows where there are columns named “end”.

SELECT * FROM tablename WHERE name LIKE ‘John%’

This will return all of the rows from the tablename table where the name column contains the string John followed by any number of characters (%). The % can be used at the beginning, end, or anywhere in the string.

Start Mastering SQL

The SQL commands we’ve discussed in this blog post are powerful tools that can help you get the most out of your data.

Use these commands to help you analyze and optimize your data, and you’ll be well on your way to mastering SQL.

Month: March 2022

Python Pandas commands for EDA

5 SQL Commands You Need to Know as a Beginner Starting Out

5 basic commands you’ll need to know if you want to be proficient in SQL

1. SELECT

Examples:

**SELECT * FROM tablename**

SELECT id, name FROM tablename

SELECT DISTINCT

SELECT DISTINCT id FROM tablename

SELECT COUNT

SELECT COUNT(*) FROM tablename

2. WHERE

Examples:

SELECT * FROM tablename WHERE id = 100

SELECT * FROM tablename WHERE (id = 100) OR (name = ‘John’)

SELECT * FROM tablename WHERE id BETWEEN 100 AND 200

SELECT * FROM tablename WHERE id NOT IN (100,200)

3. ORDERBY

Examples:

SELECT * FROM tablename ORDER BY name

SELECT * FROM tablename WHERE id > 100 ORDER BY age DESC, name ASC

4. GROUPBY

Examples:

SELECT id, name, SUM(age) AS “Age” FROM tablename GROUP BY id

SELECT max(age) as “Oldest Person” from tablename GROUP BY id

SELECT avg(age) as “Average Age” from tablename GROUP BY id

5. LIKE

Examples:

SELECT id, name FROM tablename WHERE name LIKE ‘A%’

SELECT id, name FROM tablename WHERE name LIKE ‘%end’

SELECT * FROM tablename WHERE name LIKE ‘John%’

Start Mastering SQL