160 lines
6.4 KiB
Python
160 lines
6.4 KiB
Python
import json
|
|
import httpx
|
|
import logging
|
|
|
|
logging.basicConfig(level=logging.WARNING)
|
|
|
|
class Profile:
|
|
def __init__(self, username: str) -> None:
|
|
"""
|
|
Initialize a Profile instance to scrape data from Instagram user.
|
|
|
|
Args:
|
|
username (str): The username of the Instagram profile to scrape.
|
|
"""
|
|
self.username = username
|
|
self.user_client = httpx.Client(
|
|
headers={
|
|
"x-ig-app-id": "936619743392459",
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
"Accept": "*/*",
|
|
"Cookie": "sessionid=50347012053%3AyyygawcCJZRigQ%3A1%3AAYdTy1-Y086Kr6-SoHIudLxocmEw6SfIrwYpJvF6ww; csrftoken=HJidkBXRjkGQEJz1vPj84TETkbuc72Gs; ds_user_id=50347012053",
|
|
"X-CSRFToken": "HJidkBXRjkGQEJz1vPj84TETkbuc72Gs"
|
|
}
|
|
)
|
|
self.user_client_no_cookies = httpx.Client(
|
|
headers={
|
|
"x-ig-app-id": "936619743392459",
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
"Accept": "*/*",
|
|
}
|
|
)
|
|
self.user_profile_data = self.scrape_user_profile()
|
|
self.user_id = self.get_user_id()
|
|
logging.info(f"User identification number is: {self.user_id}")
|
|
if self.is_user_private():
|
|
logging.info("User is private.")
|
|
|
|
def scrape_user_profile(self) -> dict:
|
|
"""Scrape Instagram user's profile data and return it as a dictionary."""
|
|
try:
|
|
response = self.user_client_no_cookies.get(
|
|
f"https://i.instagram.com/api/v1/users/web_profile_info/?username={self.username}"
|
|
)
|
|
response.raise_for_status() # Ensure the request was successful
|
|
return response.json()
|
|
except httpx.HTTPStatusError as e:
|
|
logging.error(f"Failed to retrieve profile data: {e}")
|
|
return {}
|
|
|
|
def get_user_id(self) -> str:
|
|
"""Extract and return the user ID from the profile data."""
|
|
return self.user_profile_data.get("data", {}).get("user", {}).get("id", "")
|
|
|
|
def is_user_private(self) -> bool:
|
|
"""Get information about if user is private or not
|
|
|
|
Returns:
|
|
bool: true if user profile is private, false is user profile is public
|
|
"""
|
|
data = self.user_profile_data.get("data", {}).get("user", {}).get("is_private", "")
|
|
if data == "False":
|
|
return False
|
|
else:
|
|
return True
|
|
|
|
def is_user_verified(self) -> bool:
|
|
data = self.user_profile_data.get("data", {}).get("user", {}).get("is_verified", "")
|
|
if data == "False":
|
|
return False
|
|
else:
|
|
return True
|
|
|
|
def get_user_data(self, download=True) -> dict:
|
|
user_data = self.user_profile_data.get("data", {}).get("user", {})
|
|
if download == True:
|
|
with open(f"user_data_{self.username}_{self.user_id}.json", "w") as file:
|
|
file.write(json.dumps(user_data))
|
|
logging.info("File saved successfully!")
|
|
return user_data
|
|
|
|
def Get_user_bio(self) -> str:
|
|
"""Get bio of user"""
|
|
self.user_bio = self.user_profile_data.get("data", {}).get("user", {}).get("biography","")
|
|
return self.user_bio
|
|
|
|
def Get_followers_count(self) -> int:
|
|
"""Retrieve and return the follower count from profile data."""
|
|
return self.user_profile_data.get("data", {}).get("user", {}).get("edge_followed_by", {}).get("count", 0)
|
|
|
|
def Get_following_count(self) -> int:
|
|
"""Retrieve and return the following count from profile data."""
|
|
return self.user_profile_data.get("data", {}).get("user", {}).get("edge_follow", {}).get("count", 0)
|
|
|
|
def Get_followers(self) -> dict:
|
|
pass
|
|
|
|
def Get_following(self) -> dict:
|
|
"""Scrape the list of users that the user is following and return their usernames and full names as a dictionary."""
|
|
try:
|
|
response = self.user_client.get(
|
|
f"https://www.instagram.com/api/v1/friendships/{self.user_id}/following/"
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
logging.debug(data)
|
|
|
|
# Create a dictionary with the usernames as keys and full names as values
|
|
return {
|
|
user.get("username"): user.get("full_name")
|
|
for user in data.get("users", [])
|
|
}
|
|
except Exception as e:
|
|
logging.error(f"An error occurred: {e}")
|
|
return {}
|
|
|
|
def Get_external_url(self):
|
|
return self.user_profile_data.get("data", {}).get("user", {}).get("external_url", "")
|
|
|
|
def Get_profile_picture(self, hd=True, download=True):
|
|
if hd:
|
|
tag = "profile_pic_url_hd"
|
|
else:
|
|
tag = "profile_pic_url"
|
|
|
|
self.profile_img = self.user_profile_data.get("data", {}).get("user", {}).get(tag, {})
|
|
|
|
if download:
|
|
image = self.user_client.get(self.profile_img)
|
|
if image.status_code == 200:
|
|
with open(f"profile_{self.username}_{self.user_id}.jpg", "wb") as file:
|
|
file.write(image.content)
|
|
logging.info("Image downloaded successfully!")
|
|
else:
|
|
logging.error("Failed to download image")
|
|
return self.profile_img
|
|
|
|
|
|
if __name__ == "__main__":
|
|
profile = Profile("ittrendy.cz")
|
|
#print(profile.Get_user_bio())
|
|
print(profile.Get_external_url())
|
|
profile.get_user_data()
|
|
profile.Get_profile_picture()
|
|
following = profile.Get_following()
|
|
i = 0
|
|
for user, username in following.items():
|
|
i+=1
|
|
print(f"{i}. User: \"{user}\" \t\t Fullname: {username}")
|
|
#followers_count = profile.Get_followers_count()
|
|
#following_count = profile.Get_following_count()
|
|
#following_usernames = profile.Get_following()
|
|
#profile.Get_profile_picture(hd=True)
|
|
#print(f"{profile.username} has {followers_count} followers.")
|
|
#print(f"{profile.username} is following {following_count} users.")
|
|
#print(f"Usernames of accounts {profile.username} is following: {following_usernames}")
|