For example, the file form is:
File name: test
Name: 4p
Sample: 4pcs
Name Location Location 2 Sample 1 2 3
Withdrawal 7334 Aaa 0.1
Younghee 7333 Ag 0.3
Go 9334 Aaa0.1
stain 830 Acca 0.2
Withdrawal 7334 Bcc 0.2
Younghee 7333 Bta 0.1
Go 9334 Back 0.3
Stain 8330 Bcc 0.1
Withdrawal 7334 Cct 0.2
Younghee 7333 Ct 0.1
Go 9334 Cac 0.3
Stain 8330 Cat 0.1
It's like this
Name Location Location 2 ABC
Younghee 7333 actor
Withdrawal 7334 aaccct
Stain 8330 cascat
Go 9334 aa acat
I'd like to change it to this style.
Files are tabbed.
The missing value in AB is - - It says like this, but I want to change it to NN.
python pandas
There is unstack
.
import pandas as pd
from io import StringIO
f = ""File name: test
Name: 4
Sample: 4 pieces
Name Location Location 2 Sample 1 2 3
Withdrawal 7 334 A a a 0.1
Younghee 7333 Ag 0.3
Go 9334 A a a 0.1
Stain 8330 Acca 0.2
Withdrawal 7 334 Bcc 0.2
Younghee 7 333 Bta 0.1
Go 9334 Back 0.3
Stain 8330 Bcc 0.1
Withdrawal 7334 Cct 0.2
Younghee 7333 Ct 0.1
Go 9334 Cac 0.3
"Stainless 8 330 Cat 0.1""
df = pd.read_csv(StringIO(f), skiprows=3, sep="\t")
print(df.to_markdown())
"""
| | Name | Location | Location 2 | Sample | 1 | 2 | 3 |
|---:|:-------|-------:|--------:|:-------|:----|:----|----:|
| 0 | Withdrawal | 7 | 334 | A | a | a | 0.1 |
| 1 | Younghee | 7 | 333 | A | a | g | 0.3 |
| 2 | Go | 9 | 334 | A | a | a | 0.1 |
| 3 | Stain | 8 | 330 | A | c | a | 0.2 |
| 4 | Withdrawal | 7 | 334 | B | c | c | 0.2 |
| 5 | Younghee | 7 | 333 | B | t | a | 0.1 |
| 6 | Go | 9 | 334 | B | a | c | 0.3 |
| 7 | Stain | 8 | 330 | B | c | c | 0.1 |
| 8 | Withdrawal | 7 | 334 | C | c | t | 0.2 |
| 9 | Younghee | 7 | 333 | C | t | t | 0.1 |
| 10 | Go | 9 | 334 | C | a | c | 0.3 |
| 11 | Stain | 8 | 330 | C | a | t | 0.1 |
"""
df["1_2"] = df["1"] + df["2"]
print(df.to_markdown())
"""
| | Name | Location | Location 2 | Sample | 1 | 2 | 3 | 1_2 |
|---:|:-------|-------:|--------:|:-------|:----|:----|----:|:------|
| 0 | Withdrawal | 7 | 334 | A | a | a | 0.1 | aa |
| 1 | Younghee | 7 | 333 | A | a | g | 0.3 | ag |
| 2 | Go | 9 | 334 | A | a | a | 0.1 | a |
| 3 | Stain | 8 | 330 | A | c | a | 0.2 | ca |
| 4 | Withdrawal | 7 | 334 | B | c | c | 0.2 | c | c |
| 5 | Younghee | 7 | 333 | B | t | a | 0.1 | ta |
| 6 | Go | 9 | 334 | B | a | c | 0.3 | ac |
| 7 | Stain | 8 | 330 | B | c | c | 0.1 | c |
| 8 | Withdrawal | 7 | 334 | C | c | t | 0.2 | ct |
| 9 | Younghee | 7 | 333 | C | t | t | 0.1 | t |
| 10 | Go | 9 | 334 | C | a | c | 0.3 | ac |
| 11 | Stain | 8 | 330 | C | a | t | 0.1 | at |
"""
df1 = df[["Name", "Location", "Location2", "Sample", "1_2"]); set_index (["Name", "Location2", "Sample"])
print(df1.to_markdown())
"""
| | 1_2 |
|:----------------------|:------|
| ('Withdrawal', 7,334, 'A') | aa |
| ('Younghee', 7, 333, 'A') | ag |
| ('Go', 9, 334, 'A') | aa |
| ('Spark', 8, 330, 'A') | ca |
| ('Withdrawal', 7,334, 'B') | cc |
| ('Younghee', 7, 333, 'B') | ta |
| ('Go', 9, 334, 'B') | ac |
| ('Spark', 8, 330, 'B') | cc |
| ('Withdrawal', 7, 334, 'C') | ct |
| ('Younghee', 7, 333, 'C') | tt |
| ('Go', 9, 334, 'C') | ac |
| ('Spark', 8, 330, 'C') | at |
"""
df2 = df1.unstack ("sample").droplevel(0, axis=1)
print(df2.to_markdown())
"""
| | | A | B | C |
|:-----------------|:----|:----|:----|
| ('Go', 9, 334) | aa | ac |
| ('Spark', 8, 330) | ca | cc | at |
| ('Younghee', 7, 333) | ag | ta | tt |
| ('Withdrawal', 7, 334) | aa | cc | ct |
"""
df2 = df2.reset_index()
print(df2.to_markdown())
"""
| | Name | Location | Location 2 | A | B | C |
|---:|:-------|-------:|--------:|:----|:----|:----|
| 0 | Go | 9 | 334 | aa | ac |
| 1 | Stain | 8 | 330 | Ca | cc | at |
| 2 | Younghee | 7 | 333 | ag | ta | tt |
| 3 | Withdrawal | 7 | 334 | aa | cc | ct |
"""
df2.to_csv("result.csv", sep="\t", index=None)
"""
Name Location Location 2 AB C
Go 9334 aa ac
Stain 8330
YOUNG HEE 7333 agate
Withdrawal 7334 aacct
"""
© 2025 OneMinuteCode. All rights reserved.