Pandas Ecommerce
This section uses a complete e-commerce data analysis case to comprehensively apply various functions of Pandas for data analysis.
* * *
## Case Overview
Analyze order data from an e-commerce platform, including sales trends, product performance, and customer analysis dimensions.
### Data Preparation
## Example
import pandas as pd
import numpy as np
# Simulate e-commerce order data
np.random.seed(42)
n_orders =1000
orders = pd.DataFrame({
"Order ID": range(1, n_orders + 1),
"Customer ID": np.random.randint(100,200, n_orders),
"Product ID": np.random.randint(1,20, n_orders),
"Order Date": pd.date_range("2024-01-01", periods=n_orders, freq="30min"),
"Quantity": np.random.randint(1,5, n_orders),
"Unit Price": np.random.uniform(10,500, n_orders).round(2)
})
# Calculate order amount
orders=(orders * orders).round(2)
print("Order data overview:")
print(orders.head(10))
print(f"n Data volume: {len(orders)} records")
### Data Preprocessing
## Example
# Extract date features
orders= orders.dt.date
orders= orders.dt.hour
orders= orders.dt.day_name()
orders= orders.dt.month
print("After adding time features:")
print(orders.head())
print()
# Check for missing values
print("Missing value check:")
print(orders.isnull().sum())
* * *
## Sales Analysis
### Overall Sales Situation
## Example
# Overall sales metrics
print("=== Overall Sales Situation ===n")
print(f"Total Orders: {len(orders):,}")
print(f"Total Sales: Β₯{orders['Order Amount'].sum():,.2f}")
print(f"Average Order Value: Β₯{orders['Order Amount'].mean():,.2f}")
print(f"Median Order Value: Β₯{orders['Order Amount'].median():,.2f}")
print()
# Monthly statistics
monthly = orders.groupby("Month").agg({
"Order ID": "count",
"Order Amount": "sum",
"Customer ID": "nunique"
}).rename(columns={
"Order ID": "Order Count",
"Order Amount": "Sales",
"Customer ID": "Customer Count"
})
print("Monthly Sales Trend:")
print(monthly)
### Product Analysis
## Example
# Product sales ranking
product_sales = orders.groupby("Product ID").agg({
"Order ID": "count",
"Quantity": "sum",
"Order Amount": "sum"
}).rename(columns={
"Order ID": "Order Count",
"Quantity": "Sales Volume",
"Order Amount": "Sales"
}).sort_values("Sales", ascending=False)
print("=== Top 10 Product Sales Ranking ===n")
print(product_sales.head(10))
print()
# Best-selling products
print(f"Best-selling product: Product {product_sales.index}")
print(f"Sales: Β₯{product_sales.iloc['Sales']:,.2f}")
### Customer Analysis
## Example
# Customer spending analysis
customer_sales = orders.groupby("Customer ID").agg({
"Order ID": "count",
"Order Amount": "sum"
}).rename(columns={
"Order ID": "Order Count",
"Order Amount": "Total Spending"
})
print("=== Customer Analysis ===n")
print(f"Active Customers: {len(customer_sales)}")
print(f"Average Orders per Customer: {customer_sales['Order Count'].mean():.1f}")
print(f"Average Customer Spending: Β₯{customer_sales['Total Spending'].mean():,.2f}")
print()
# Customer segmentation
customer_sales= pd.cut(
customer_sales,
bins=[0,1000,5000,10000,float("inf")],
labels=["Regular","Silver Card","Gold Card","VIP"]
)
print("Customer Segmentation Statistics:")
print(customer_sales.value_counts())
### Time Analysis
## Example
# Hourly analysis
hourly = orders.groupby("Hour").sum()
print("=== Hourly Sales Analysis ===n")
print(f"Peak Sales Hour: {hourly.idxmax()} o'clock")
print(f"Sales during peak hour: Β₯{hourly.max():,.2f}")
print()
# Weekly analysis
weekday = orders.groupby("Weekday").sum().reindex([
"Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"
])
print("Weekly Sales:")
for day, amount in weekday.items():
print(f"{day}: Β₯{amount:,.2f}")
* * *
## Analysis Summary
## Example
print("""
=== E-commerce Data Analysis Summary ===
1. Sales Overview
- Total Orders: {0}
- Total Sales: Β₯{1:,.2f}
- Average Order Value: Β₯{2:.2f}
2. Product Performance
- Best-selling Product: Product {3}
- Sales distribution is uneven among products, with top products contributing most revenue
3. Customer Insights
- Active Customers: {4} people
- Suggestion: Focus on maintaining high-value customers
4. Time Patterns
- Peak Sales Around: {5} o'clock
- Marketing strategies can be adjusted based on peak hours
5. Optimization Suggestions
- 1) Increase inventory and promotion for best-selling products
- 2) Provide personalized services to high-value customers
- 3) Increase customer service staffing during peak hours
"""
.format(
len(orders),
orders.sum(),
orders.mean(),
product_sales.index,
len(customer_sales),
hourly.idxmax()
))
YouTip